mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-20 10:42:36 +00:00
128-bit sign extension and vector shift cleanups, contributed by Ken Werner
(IBM). llvm-svn: 79949
This commit is contained in:
parent
01d74b3fed
commit
ee51c50e21
@ -322,6 +322,9 @@ namespace {
|
||||
/// target-specific node if it hasn't already been changed.
|
||||
SDNode *Select(SDValue Op);
|
||||
|
||||
//! Emit the instruction sequence for i128 sext
|
||||
SDNode *SelectSEXTi128(SDValue &Op, EVT OpVT);
|
||||
|
||||
//! Emit the instruction sequence for i64 shl
|
||||
SDNode *SelectSHLi64(SDValue &Op, EVT OpVT);
|
||||
|
||||
@ -833,6 +836,10 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (Opc == ISD::SIGN_EXTEND) {
|
||||
if (OpVT == MVT::i128) {
|
||||
return SelectSEXTi128(Op, OpVT);
|
||||
}
|
||||
} else if (Opc == ISD::SHL) {
|
||||
if (OpVT == MVT::i64) {
|
||||
return SelectSHLi64(Op, OpVT);
|
||||
@ -956,6 +963,58 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
return SelectCode(Op);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Emit the instruction sequence for i64 -> i128 sign extend. The basic
|
||||
* algorithm is to duplicate the sign bit using rotmai to generate at
|
||||
* least one byte full of sign bits. Then propagate the "sign-byte" into
|
||||
* theleftmost words and the i64 into the rightmost words using shufb.
|
||||
*
|
||||
* @param Op The sext operand
|
||||
* @param OpVT The type to extend to
|
||||
* @return The SDNode with the entire instruction sequence
|
||||
*/
|
||||
SDNode *
|
||||
SPUDAGToDAGISel::SelectSEXTi128(SDValue &Op, EVT OpVT)
|
||||
{
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
// Type to extend from
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
EVT Op0VT = Op0.getValueType();
|
||||
|
||||
assert((OpVT == MVT::i128 && Op0VT == MVT::i64) &&
|
||||
"LowerSIGN_EXTEND: input and/or output operand have wrong size");
|
||||
|
||||
// Create shuffle mask
|
||||
unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
|
||||
unsigned mask2 = 0x01020304; // byte 8 - 11
|
||||
unsigned mask3 = 0x05060708; // byte 12 - 15
|
||||
SDValue shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
CurDAG->getConstant(mask1, MVT::i32),
|
||||
CurDAG->getConstant(mask1, MVT::i32),
|
||||
CurDAG->getConstant(mask2, MVT::i32),
|
||||
CurDAG->getConstant(mask3, MVT::i32));
|
||||
SDNode *shufMaskLoad = emitBuildVector(shufMask);
|
||||
|
||||
// Word wise arithmetic right shift to generate at least one byte
|
||||
// that contains sign bits.
|
||||
SDNode *PromoteScalar = SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
|
||||
MVT::v2i64, Op0, Op0));
|
||||
SDNode *sraVal = SelectCode(CurDAG->getNode(ISD::SRA, dl, MVT::v2i64,
|
||||
SDValue(PromoteScalar, 0),
|
||||
CurDAG->getConstant(31, MVT::i32)));
|
||||
|
||||
// Shuffle bytes - Copy the sign bits into the upper 64 bits
|
||||
// and the input value into the lower 64 bits.
|
||||
SDNode *extShuffle = SelectCode(CurDAG->getNode(SPUISD::SHUFB, dl,
|
||||
MVT::v2i64, Op0,
|
||||
SDValue(sraVal, 0),
|
||||
SDValue(shufMaskLoad, 0)));
|
||||
|
||||
return SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, MVT::i128,
|
||||
SDValue(extShuffle, 0)));
|
||||
}
|
||||
|
||||
/*!
|
||||
* Emit the instruction sequence for i64 left shifts. The basic algorithm
|
||||
* is to fill the bottom two word slots with zeros so that zeros are shifted
|
||||
|
@ -350,6 +350,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
// Custom lower i128 -> i64 truncates
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
|
||||
|
||||
// Custom lower i64 -> i128 sign extend
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
|
||||
@ -511,9 +514,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
||||
node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
|
||||
node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
|
||||
node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
|
||||
node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
|
||||
node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
|
||||
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
|
||||
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
|
||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
||||
@ -2610,6 +2610,45 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
|
||||
return SDValue(); // Leave the truncate unmolested
|
||||
}
|
||||
|
||||
//! Custom lower ISD::SIGN_EXTEND
|
||||
static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
|
||||
{
|
||||
// Type to extend to
|
||||
EVT VT = Op.getValueType();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
// Type to extend from
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
EVT Op0VT = Op0.getValueType();
|
||||
|
||||
assert((VT == MVT::i128 && Op0VT == MVT::i64) &&
|
||||
"LowerSIGN_EXTEND: input and/or output operand have wrong size");
|
||||
|
||||
// Create shuffle mask
|
||||
unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
|
||||
unsigned mask2 = 0x01020304; // byte 8 - 11
|
||||
unsigned mask3 = 0x05060708; // byte 12 - 15
|
||||
SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
DAG.getConstant(mask1, MVT::i32),
|
||||
DAG.getConstant(mask1, MVT::i32),
|
||||
DAG.getConstant(mask2, MVT::i32),
|
||||
DAG.getConstant(mask3, MVT::i32));
|
||||
|
||||
// Word wise arithmetic right shift to generate a byte that contains sign bits
|
||||
SDValue sraVal = DAG.getNode(ISD::SRA,
|
||||
dl,
|
||||
MVT::v2i64,
|
||||
DAG.getNode(SPUISD::PREFSLOT2VEC, dl, MVT::v2i64, Op0, Op0),
|
||||
DAG.getConstant(31, MVT::i32));
|
||||
|
||||
// shuffle bytes - copies the sign bits into the upper 64 bits
|
||||
// and the input value into the lower 64 bits
|
||||
SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, MVT::v2i64,
|
||||
Op0, sraVal, shufMask);
|
||||
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
|
||||
}
|
||||
|
||||
//! Custom (target-specific) lowering entry point
|
||||
/*!
|
||||
This is where LLVM's DAG selection process calls to do target-specific
|
||||
@ -2702,6 +2741,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
|
||||
case ISD::TRUNCATE:
|
||||
return LowerTRUNCATE(Op, DAG);
|
||||
|
||||
case ISD::SIGN_EXTEND:
|
||||
return LowerSIGN_EXTEND(Op, DAG);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -2864,9 +2906,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
}
|
||||
case SPUISD::SHLQUAD_L_BITS:
|
||||
case SPUISD::SHLQUAD_L_BYTES:
|
||||
case SPUISD::VEC_SHL:
|
||||
case SPUISD::VEC_SRL:
|
||||
case SPUISD::VEC_SRA:
|
||||
case SPUISD::ROTBYTES_LEFT: {
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
|
||||
@ -2994,9 +3033,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
||||
case SPUISD::VEC2PREFSLOT:
|
||||
case SPUISD::SHLQUAD_L_BITS:
|
||||
case SPUISD::SHLQUAD_L_BYTES:
|
||||
case SPUISD::VEC_SHL:
|
||||
case SPUISD::VEC_SRL:
|
||||
case SPUISD::VEC_SRA:
|
||||
case SPUISD::VEC_ROTL:
|
||||
case SPUISD::VEC_ROTR:
|
||||
case SPUISD::ROTBYTES_LEFT:
|
||||
|
@ -43,9 +43,6 @@ namespace llvm {
|
||||
VEC2PREFSLOT, ///< Extract element 0
|
||||
SHLQUAD_L_BITS, ///< Rotate quad left, by bits
|
||||
SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
|
||||
VEC_SHL, ///< Vector shift left
|
||||
VEC_SRL, ///< Vector shift right (logical)
|
||||
VEC_SRA, ///< Vector shift right (arithmetic)
|
||||
VEC_ROTL, ///< Vector rotate left
|
||||
VEC_ROTR, ///< Vector rotate right
|
||||
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
|
||||
|
@ -87,9 +87,9 @@ def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
|
||||
def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
|
||||
|
||||
// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
|
||||
def SPUvec_shl: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type, []>;
|
||||
def SPUvec_srl: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type, []>;
|
||||
def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>;
|
||||
def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
|
||||
def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
|
||||
def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
|
||||
|
||||
def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
|
||||
def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
|
||||
|
17
test/CodeGen/CellSPU/sext128.ll
Normal file
17
test/CodeGen/CellSPU/sext128.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep {long.*269488144} %t1.s | count 2
|
||||
; RUN: grep {long.*16909060} %t1.s | count 1
|
||||
; RUN: grep {long.*84281096} %t1.s | count 1
|
||||
; RUN: grep {rotmai} %t1.s | count 1
|
||||
; RUN: grep {lqa} %t1.s | count 1
|
||||
; RUN: grep {shufb} %t1.s | count 1
|
||||
|
||||
; ModuleID = 'sext128.bc'
|
||||
target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
define i128 @sext_i64_i128(i64 %a) {
|
||||
entry:
|
||||
%0 = sext i64 %a to i128
|
||||
ret i128 %0
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user