mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-13 15:11:39 +00:00
CellSPU: Fix bug 3056. Varadic extract_element was not implemented (nor was it
ever conceived to occur). llvm-svn: 59891
This commit is contained in:
parent
0da772db1c
commit
50e49b28f0
@ -591,13 +591,24 @@ SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
|
||||
SDValue &Index) {
|
||||
if (!SelectAFormAddr(Op, N, Base, Index)
|
||||
&& !SelectDFormAddr(Op, N, Base, Index)) {
|
||||
// default form of a X-form address is r(r) in operands 0 and 1:
|
||||
// Default form of a X-form address is r(r) in operands 0 and 1:
|
||||
SDValue Op0 = N.getOperand(0);
|
||||
SDValue Op1 = N.getOperand(1);
|
||||
|
||||
if (Op0.getOpcode() == ISD::Register && Op1.getOpcode() == ISD::Register) {
|
||||
if ((Op0.getOpcode() == ISD::Register
|
||||
|| Op.getOpcode() == ISD::CopyFromReg)
|
||||
&& (Op1.getOpcode() == ISD::Register
|
||||
|| Op.getOpcode() == ISD::CopyFromReg)) {
|
||||
if (Op.getOpcode() == ISD::Register)
|
||||
Base = Op0;
|
||||
else
|
||||
Base = Op0.getOperand(1);
|
||||
|
||||
if (Op1.getOpcode() == ISD::Register)
|
||||
Index = Op1;
|
||||
else
|
||||
Index = Op1.getOperand(1);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -171,7 +171,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
// Expand the jumptable branches
|
||||
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
||||
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
|
||||
|
||||
// Custom lower SELECT_CC for most cases, but expand by default
|
||||
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
|
||||
|
||||
// SPU has no intrinsics for these particular operations:
|
||||
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
|
||||
@ -398,6 +404,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
// Set other properties:
|
||||
setSchedulingPreference(SchedulingForLatency);
|
||||
}
|
||||
|
||||
const char *
|
||||
@ -413,7 +422,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||
node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
|
||||
node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
|
||||
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
|
||||
node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
|
||||
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
|
||||
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
||||
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
|
||||
node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
|
||||
@ -750,7 +759,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
}
|
||||
|
||||
SDValue insertEltOp =
|
||||
DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr);
|
||||
SDValue vectorizeOp =
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
|
||||
|
||||
@ -1720,11 +1729,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
/// which the Cell can operate. The code inspects V3 to ascertain whether the
|
||||
/// permutation vector, V3, is monotonically increasing with one "exception"
|
||||
/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
|
||||
/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
|
||||
/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
|
||||
/// In either case, the net result is going to eventually invoke SHUFB to
|
||||
/// permute/shuffle the bytes from V1 and V2.
|
||||
/// \note
|
||||
/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
|
||||
/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
|
||||
/// control word for byte/halfword/word insertion. This takes care of a single
|
||||
/// element move from V2 into V1.
|
||||
/// \note
|
||||
@ -1782,9 +1791,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
// Initialize temporary register to 0
|
||||
SDValue InitTempReg =
|
||||
DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
|
||||
// Copy register's contents as index in INSERT_MASK:
|
||||
// Copy register's contents as index in SHUFFLE_MASK:
|
||||
SDValue ShufMaskOp =
|
||||
DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(),
|
||||
DAG.getTargetConstant(V2Elt, MVT::i32),
|
||||
DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
|
||||
// Use shuffle mask in SHUFB synthetic instruction:
|
||||
@ -2050,11 +2059,10 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
SDValue N = Op.getOperand(0);
|
||||
SDValue Elt = Op.getOperand(1);
|
||||
SDValue ShufMask[16];
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
|
||||
|
||||
assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
|
||||
SDValue retval;
|
||||
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
|
||||
// Constant argument:
|
||||
int EltNo = (int) C->getZExtValue();
|
||||
|
||||
// sanity checks:
|
||||
@ -2102,6 +2110,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(prefslot_begin != -1 && prefslot_end != -1 &&
|
||||
"LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
|
||||
|
||||
unsigned int ShufBytes[16];
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
// zero fill uppper part of preferred slot, don't care about the
|
||||
// other slots:
|
||||
@ -2112,20 +2121,138 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
? 0x80
|
||||
: elt_byte + (i - prefslot_begin));
|
||||
|
||||
ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
|
||||
ShufBytes[i] = mask_val;
|
||||
} else
|
||||
ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
|
||||
ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
|
||||
}
|
||||
|
||||
SDValue ShufMaskVec =
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
|
||||
SDValue ShufMask[4];
|
||||
for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
|
||||
unsigned bidx = i / 4;
|
||||
unsigned int bits = ((ShufBytes[bidx] << 24) |
|
||||
(ShufBytes[bidx+1] << 16) |
|
||||
(ShufBytes[bidx+2] << 8) |
|
||||
ShufBytes[bidx+3]);
|
||||
ShufMask[i] = DAG.getConstant(bits, MVT::i32);
|
||||
}
|
||||
|
||||
SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
&ShufMask[0],
|
||||
sizeof(ShufMask) / sizeof(ShufMask[0]));
|
||||
|
||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
||||
retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
||||
DAG.getNode(SPUISD::SHUFB, N.getValueType(),
|
||||
N, N, ShufMaskVec));
|
||||
} else {
|
||||
// Variable index: Rotate the requested element into slot 0, then replicate
|
||||
// slot 0 across the vector
|
||||
MVT VecVT = N.getValueType();
|
||||
if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
|
||||
cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
|
||||
abort();
|
||||
}
|
||||
|
||||
// Make life easier by making sure the index is zero-extended to i32
|
||||
if (Elt.getValueType() != MVT::i32)
|
||||
Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
|
||||
|
||||
// Scale the index to a bit/byte shift quantity
|
||||
APInt scaleFactor =
|
||||
APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
|
||||
SDValue vecShift;
|
||||
|
||||
switch (VT.getSimpleVT()) {
|
||||
default:
|
||||
cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
|
||||
abort();
|
||||
/*NOTREACHED*/
|
||||
case MVT::i8: {
|
||||
// Don't need to scale, but we do need to correct for where bytes go in
|
||||
// slot 0:
|
||||
SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
|
||||
Elt, DAG.getConstant(3, MVT::i32));
|
||||
SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
|
||||
DAG.getConstant(16, MVT::i32));
|
||||
|
||||
SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
|
||||
prefSlot, DAG.getConstant(0, MVT::i32),
|
||||
prefSlot, // trueval
|
||||
corrected, // falseval
|
||||
DAG.getCondCode(ISD::SETGT));
|
||||
vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
|
||||
break;
|
||||
}
|
||||
case MVT::i16: {
|
||||
// Scale the index to bytes, subtract for preferred slot:
|
||||
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
|
||||
DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
|
||||
SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
|
||||
Elt, DAG.getConstant(2, MVT::i32));
|
||||
SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
|
||||
DAG.getConstant(16, MVT::i32));
|
||||
|
||||
SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
|
||||
prefSlot, DAG.getConstant(0, MVT::i32),
|
||||
prefSlot, // trueval
|
||||
corrected, // falseval
|
||||
DAG.getCondCode(ISD::SETGT));
|
||||
vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
|
||||
break;
|
||||
}
|
||||
case MVT::i32:
|
||||
case MVT::f32:
|
||||
case MVT::i64:
|
||||
case MVT::f64:
|
||||
// Simple left shift to slot 0
|
||||
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
|
||||
DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
|
||||
vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
|
||||
break;
|
||||
}
|
||||
|
||||
// Replicate slot 0 across the entire vector (for consistency with the
|
||||
// notion of a unified register set)
|
||||
SDValue replicate;
|
||||
|
||||
switch (VT.getSimpleVT()) {
|
||||
default:
|
||||
cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
|
||||
abort();
|
||||
/*NOTREACHED*/
|
||||
case MVT::i8: {
|
||||
SDValue factor = DAG.getConstant(0x03030303, MVT::i32);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
|
||||
factor, factor);
|
||||
break;
|
||||
}
|
||||
case MVT::i16: {
|
||||
SDValue factor = DAG.getConstant(0x02030203, MVT::i32);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
|
||||
factor, factor);
|
||||
break;
|
||||
}
|
||||
case MVT::i32:
|
||||
case MVT::f32: {
|
||||
SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
|
||||
factor, factor);
|
||||
break;
|
||||
}
|
||||
case MVT::i64:
|
||||
case MVT::f64: {
|
||||
SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
|
||||
SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
|
||||
loFactor, hiFactor);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
||||
DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
@ -2145,7 +2272,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
DAG.getNode(SPUISD::SHUFB, VT,
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
|
||||
VecOp,
|
||||
DAG.getNode(SPUISD::INSERT_MASK, VT,
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, VT,
|
||||
DAG.getNode(ISD::ADD, PtrVT,
|
||||
PtrBase,
|
||||
DAG.getConstant(CN->getZExtValue(),
|
||||
@ -2614,8 +2741,39 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// LowerOperation - Provide custom lowering hooks for some operations.
|
||||
///
|
||||
//! Lower ISD::SELECT_CC
|
||||
/*!
|
||||
ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
|
||||
SELB instruction.
|
||||
|
||||
\note Need to revisit this in the future: if the code path through the true
|
||||
and false value computations is longer than the latency of a branch (6
|
||||
cycles), then it would be more advantageous to branch and insert a new basic
|
||||
block and branch on the condition. However, this code does not make that
|
||||
assumption, given the simplisitc uses so far.
|
||||
*/
|
||||
|
||||
static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
SDValue lhs = Op.getOperand(0);
|
||||
SDValue rhs = Op.getOperand(1);
|
||||
SDValue trueval = Op.getOperand(2);
|
||||
SDValue falseval = Op.getOperand(3);
|
||||
SDValue condition = Op.getOperand(4);
|
||||
|
||||
// Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
|
||||
// legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
|
||||
// with another "cannot select select_cc" assert:
|
||||
|
||||
SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
|
||||
return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
|
||||
}
|
||||
|
||||
//! Custom (target-specific) lowering entry point
|
||||
/*!
|
||||
This is where LLVM's DAG selection process calls to do target-specific
|
||||
lowering of nodes.
|
||||
*/
|
||||
SDValue
|
||||
SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
{
|
||||
@ -2704,13 +2862,19 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
case ISD::FDIV:
|
||||
if (VT == MVT::f32 || VT == MVT::v4f32)
|
||||
return LowerFDIVf32(Op, DAG);
|
||||
// else if (Op.getValueType() == MVT::f64)
|
||||
// return LowerFDIVf64(Op, DAG);
|
||||
#if 0
|
||||
// This is probably a libcall
|
||||
else if (Op.getValueType() == MVT::f64)
|
||||
return LowerFDIVf64(Op, DAG);
|
||||
#endif
|
||||
else
|
||||
assert(0 && "Calling FDIV on unsupported MVT");
|
||||
|
||||
case ISD::CTPOP:
|
||||
return LowerCTPOP(Op, DAG);
|
||||
|
||||
case ISD::SELECT_CC:
|
||||
return LowerSELECT_CC(Op, DAG);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -2967,7 +3131,7 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
||||
#if 0
|
||||
case CALL:
|
||||
case SHUFB:
|
||||
case INSERT_MASK:
|
||||
case SHUFFLE_MASK:
|
||||
case CNTB:
|
||||
#endif
|
||||
|
||||
|
@ -37,7 +37,7 @@ namespace llvm {
|
||||
LDRESULT, ///< Load result (value, chain)
|
||||
CALL, ///< CALL instruction
|
||||
SHUFB, ///< Vector shuffle (permute)
|
||||
INSERT_MASK, ///< Insert element shuffle mask
|
||||
SHUFFLE_MASK, ///< Shuffle mask
|
||||
CNTB, ///< Count leading ones in bytes
|
||||
PROMOTE_SCALAR, ///< Promote scalar->vector
|
||||
EXTRACT_ELT0, ///< Extract element 0
|
||||
|
@ -272,51 +272,51 @@ def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
|
||||
def CBD :
|
||||
RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cbd\t$rT, $src", ShuffleOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUvecinsmask dform2_addr:$src))]>;
|
||||
[(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CBX : RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cbx\t$rT, $src", ShuffleOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUvecinsmask xform_addr:$src))]>;
|
||||
[(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CHD : RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"chd\t$rT, $src", ShuffleOp,
|
||||
[(set (v8i16 VECREG:$rT), (SPUvecinsmask dform2_addr:$src))]>;
|
||||
[(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CHX : RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"chx\t$rT, $src", ShuffleOp,
|
||||
[(set (v8i16 VECREG:$rT), (SPUvecinsmask xform_addr:$src))]>;
|
||||
[(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CWD : RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cwd\t$rT, $src", ShuffleOp,
|
||||
[(set (v4i32 VECREG:$rT), (SPUvecinsmask dform2_addr:$src))]>;
|
||||
[(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CWDf32 : RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cwd\t$rT, $src", ShuffleOp,
|
||||
[(set (v4f32 VECREG:$rT), (SPUvecinsmask dform2_addr:$src))]>;
|
||||
[(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CWX : RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cwx\t$rT, $src", ShuffleOp,
|
||||
[(set (v4i32 VECREG:$rT), (SPUvecinsmask xform_addr:$src))]>;
|
||||
[(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CWXf32 : RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cwx\t$rT, $src", ShuffleOp,
|
||||
[(set (v4f32 VECREG:$rT), (SPUvecinsmask xform_addr:$src))]>;
|
||||
[(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CDD : RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cdd\t$rT, $src", ShuffleOp,
|
||||
[(set (v2i64 VECREG:$rT), (SPUvecinsmask dform2_addr:$src))]>;
|
||||
[(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CDDf64 : RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cdd\t$rT, $src", ShuffleOp,
|
||||
[(set (v2f64 VECREG:$rT), (SPUvecinsmask dform2_addr:$src))]>;
|
||||
[(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CDX : RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cdx\t$rT, $src", ShuffleOp,
|
||||
[(set (v2i64 VECREG:$rT), (SPUvecinsmask xform_addr:$src))]>;
|
||||
[(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CDXf64 : RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cdx\t$rT, $src", ShuffleOp,
|
||||
[(set (v2f64 VECREG:$rT), (SPUvecinsmask xform_addr:$src))]>;
|
||||
[(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Constant formation:
|
||||
@ -1647,14 +1647,23 @@ multiclass SelectBits
|
||||
|
||||
defm SELB : SelectBits;
|
||||
|
||||
class SPUselbPat<ValueType vectype, SPUInstr inst>:
|
||||
class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
|
||||
Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
|
||||
(inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
|
||||
|
||||
def : SPUselbPat<v16i8, SELBv16i8>;
|
||||
def : SPUselbPat<v8i16, SELBv8i16>;
|
||||
def : SPUselbPat<v4i32, SELBv4i32>;
|
||||
def : SPUselbPat<v2i64, SELBv2i64>;
|
||||
def : SPUselbPatVec<v16i8, SELBv16i8>;
|
||||
def : SPUselbPatVec<v8i16, SELBv8i16>;
|
||||
def : SPUselbPatVec<v4i32, SELBv4i32>;
|
||||
def : SPUselbPatVec<v2i64, SELBv2i64>;
|
||||
|
||||
class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
|
||||
Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
|
||||
(inst rclass:$rA, rclass:$rB, rclass:$rC)>;
|
||||
|
||||
def : SPUselbPatReg<R8C, SELBr8>;
|
||||
def : SPUselbPatReg<R16C, SELBr16>;
|
||||
def : SPUselbPatReg<R32C, SELBr32>;
|
||||
def : SPUselbPatReg<R64C, SELBr64>;
|
||||
|
||||
class SelectConditional<RegisterClass rclass, SPUInstr inst>:
|
||||
Pat<(select rclass:$rCond, rclass:$rTrue, rclass:$rFalse),
|
||||
@ -1811,8 +1820,8 @@ def : SHUFBVecPat1<v4f32, v16i8, SHUFBv16i8>;
|
||||
def : SHUFBVecPat1<v2f64, v16i8, SHUFBv16i8>;
|
||||
|
||||
// Shuffle mask is a v4i32 vector:
|
||||
def : SHUFBVecPat1<v16i8, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v8i16, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v4i32, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v2i64, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v4f32, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v2f64, v4i32, SHUFBv4i32>;
|
||||
@ -1939,7 +1948,9 @@ multiclass ShiftLeftQuadByBits
|
||||
def v16i8: SHLQBIVecInst<v16i8>;
|
||||
def v8i16: SHLQBIVecInst<v8i16>;
|
||||
def v4i32: SHLQBIVecInst<v4i32>;
|
||||
def v4f32: SHLQBIVecInst<v4f32>;
|
||||
def v2i64: SHLQBIVecInst<v2i64>;
|
||||
def v2f64: SHLQBIVecInst<v2f64>;
|
||||
}
|
||||
|
||||
defm SHLQBI : ShiftLeftQuadByBits;
|
||||
@ -1960,7 +1971,9 @@ multiclass ShiftLeftQuadByBitsImm
|
||||
def v16i8 : SHLQBIIVecInst<v16i8>;
|
||||
def v8i16 : SHLQBIIVecInst<v8i16>;
|
||||
def v4i32 : SHLQBIIVecInst<v4i32>;
|
||||
def v4f32 : SHLQBIIVecInst<v4f32>;
|
||||
def v2i64 : SHLQBIIVecInst<v2i64>;
|
||||
def v2f64 : SHLQBIIVecInst<v2f64>;
|
||||
}
|
||||
|
||||
defm SHLQBII : ShiftLeftQuadByBitsImm;
|
||||
@ -1982,7 +1995,9 @@ multiclass ShiftLeftQuadBytes
|
||||
def v16i8: SHLQBYVecInst<v16i8>;
|
||||
def v8i16: SHLQBYVecInst<v8i16>;
|
||||
def v4i32: SHLQBYVecInst<v4i32>;
|
||||
def v4f32: SHLQBYVecInst<v4f32>;
|
||||
def v2i64: SHLQBYVecInst<v2i64>;
|
||||
def v2f64: SHLQBYVecInst<v2f64>;
|
||||
def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
|
||||
[(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
|
||||
}
|
||||
@ -2003,7 +2018,9 @@ multiclass ShiftLeftQuadBytesImm
|
||||
def v16i8: SHLQBYIVecInst<v16i8>;
|
||||
def v8i16: SHLQBYIVecInst<v8i16>;
|
||||
def v4i32: SHLQBYIVecInst<v4i32>;
|
||||
def v4f32: SHLQBYIVecInst<v4f32>;
|
||||
def v2i64: SHLQBYIVecInst<v2i64>;
|
||||
def v2f64: SHLQBYIVecInst<v2f64>;
|
||||
def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
|
||||
[(set GPRC:$rT,
|
||||
(SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
|
||||
|
@ -16,7 +16,7 @@ def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
|
||||
|
||||
// SPU_GenControl: Type profile for generating control words for insertions
|
||||
def SPU_GenControl : SDTypeProfile<1, 1, []>;
|
||||
def SPUvecinsmask : SDNode<"SPUISD::INSERT_MASK", SPU_GenControl, []>;
|
||||
def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
|
||||
|
||||
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
|
||||
[SDNPHasChain, SDNPOutFlag]>;
|
||||
|
@ -1,10 +1,36 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
|
||||
; RUN: grep shufb %t1.s | count 27
|
||||
; RUN: grep lqa %t1.s | count 27
|
||||
; RUN: grep lqd %t2.s | count 27
|
||||
; RUN: grep space %t1.s | count 8
|
||||
; RUN: grep byte %t1.s | count 424
|
||||
; RUN: grep shufb %t1.s | count 39
|
||||
; RUN: grep ilhu %t1.s | count 31
|
||||
; RUN: grep iohl %t1.s | count 31
|
||||
; RUN: grep lqa %t1.s | count 10
|
||||
; RUN: grep shlqbyi %t1.s | count 8
|
||||
; RUN: grep selb %t1.s | count 4
|
||||
; RUN: grep cgti %t1.s | count 4
|
||||
; RUN: grep 515 %t1.s | count 5
|
||||
; RUN: grep 1029 %t1.s | count 2
|
||||
; RUN: grep 1543 %t1.s | count 2
|
||||
; RUN: grep 2057 %t1.s | count 2
|
||||
; RUN: grep 2571 %t1.s | count 2
|
||||
; RUN: grep 3085 %t1.s | count 2
|
||||
; RUN: grep 3599 %t1.s | count 2
|
||||
; RUN: grep 32768 %t1.s | count 1
|
||||
; RUN: grep 32769 %t1.s | count 1
|
||||
; RUN: grep 32770 %t1.s | count 1
|
||||
; RUN: grep 32771 %t1.s | count 1
|
||||
; RUN: grep 32772 %t1.s | count 1
|
||||
; RUN: grep 32773 %t1.s | count 1
|
||||
; RUN: grep 32774 %t1.s | count 1
|
||||
; RUN: grep 32775 %t1.s | count 1
|
||||
; RUN: grep 32776 %t1.s | count 1
|
||||
; RUN: grep 32777 %t1.s | count 1
|
||||
; RUN: grep 32778 %t1.s | count 1
|
||||
; RUN: grep 32779 %t1.s | count 1
|
||||
; RUN: grep 32780 %t1.s | count 1
|
||||
; RUN: grep 32781 %t1.s | count 1
|
||||
; RUN: grep 32782 %t1.s | count 1
|
||||
; RUN: grep 32783 %t1.s | count 1
|
||||
; RUN: grep 32896 %t1.s | count 24
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
@ -175,3 +201,79 @@ entry:
|
||||
%a = extractelement <16 x i8> %v, i32 15
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
;;--------------------------------------------------------------------------
|
||||
;; extract element, variable index:
|
||||
;;--------------------------------------------------------------------------
|
||||
|
||||
define i8 @extract_varadic_i8(i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i
|
||||
ret i8 %0
|
||||
}
|
||||
|
||||
define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <16 x i8> %v, i32 %i
|
||||
ret i8 %0
|
||||
}
|
||||
|
||||
define i16 @extract_varadic_i16(i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <8 x i16> %v, i32 %i
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
define i32 @extract_varadic_i32(i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <4 x i32> %v, i32 %i
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define float @extract_varadic_f32(i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <4 x float> %v, i32 %i
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define i64 @extract_varadic_i64(i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <2 x i64> %v, i32 %i
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define double @extract_varadic_f64(i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i
|
||||
ret double %0
|
||||
}
|
||||
|
||||
define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone {
|
||||
entry:
|
||||
%0 = extractelement <2 x double> %v, i32 %i
|
||||
ret double %0
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user