mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-04 01:11:44 +00:00
Have SPU handle halfvec stores aligned by 8 bytes.
llvm-svn: 110576
This commit is contained in:
parent
72463de287
commit
e2c0e66ff1
@ -470,6 +470,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
|
|
||||||
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
|
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
||||||
|
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
|
||||||
|
|
||||||
setShiftAmountType(MVT::i32);
|
setShiftAmountType(MVT::i32);
|
||||||
setBooleanContents(ZeroOrNegativeOneBooleanContent);
|
setBooleanContents(ZeroOrNegativeOneBooleanContent);
|
||||||
|
|
||||||
@ -518,6 +521,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|||||||
node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
|
node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
|
||||||
node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
|
node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
|
||||||
node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
|
node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
|
||||||
|
node_names[(unsigned) SPUISD::HALF2VEC] = "SPUISD::HALF2VEC";
|
||||||
|
node_names[(unsigned) SPUISD::VEC2HALF] = "SPUISD::VEC2HALF";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
|
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
|
||||||
@ -738,12 +743,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
unsigned alignment = SN->getAlignment();
|
unsigned alignment = SN->getAlignment();
|
||||||
|
const bool isVec = VT.isVector();
|
||||||
|
EVT eltTy = isVec ? VT.getVectorElementType(): VT;
|
||||||
|
|
||||||
switch (SN->getAddressingMode()) {
|
switch (SN->getAddressingMode()) {
|
||||||
case ISD::UNINDEXED: {
|
case ISD::UNINDEXED: {
|
||||||
// The vector type we really want to load from the 16-byte chunk.
|
// The vector type we really want to load from the 16-byte chunk.
|
||||||
EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
|
EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
|
||||||
VT, (128 / VT.getSizeInBits()));
|
eltTy, (128 / eltTy.getSizeInBits()));
|
||||||
|
|
||||||
SDValue alignLoadVec;
|
SDValue alignLoadVec;
|
||||||
SDValue basePtr = SN->getBasePtr();
|
SDValue basePtr = SN->getBasePtr();
|
||||||
@ -752,7 +759,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
|
|
||||||
if (alignment == 16) {
|
if (alignment == 16) {
|
||||||
ConstantSDNode *CN;
|
ConstantSDNode *CN;
|
||||||
|
|
||||||
// Special cases for a known aligned load to simplify the base pointer
|
// Special cases for a known aligned load to simplify the base pointer
|
||||||
// and insertion byte:
|
// and insertion byte:
|
||||||
if (basePtr.getOpcode() == ISD::ADD
|
if (basePtr.getOpcode() == ISD::ADD
|
||||||
@ -776,6 +782,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
|
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
|
||||||
basePtr,
|
basePtr,
|
||||||
DAG.getConstant(0, PtrVT));
|
DAG.getConstant(0, PtrVT));
|
||||||
|
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
|
||||||
|
basePtr,
|
||||||
|
DAG.getConstant(0, PtrVT));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Unaligned load: must be more pessimistic about addressing modes:
|
// Unaligned load: must be more pessimistic about addressing modes:
|
||||||
@ -812,8 +821,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
DAG.getConstant(0, PtrVT));
|
DAG.getConstant(0, PtrVT));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Re-emit as a v16i8 vector load
|
// Load the memory to which to store.
|
||||||
alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
|
alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
|
||||||
SN->getSrcValue(), SN->getSrcValueOffset(),
|
SN->getSrcValue(), SN->getSrcValueOffset(),
|
||||||
SN->isVolatile(), SN->isNonTemporal(), 16);
|
SN->isVolatile(), SN->isNonTemporal(), 16);
|
||||||
|
|
||||||
@ -844,11 +853,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SDValue insertEltOp =
|
SDValue insertEltOp;
|
||||||
DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
|
SDValue vectorizeOp;
|
||||||
SDValue vectorizeOp =
|
if (isVec)
|
||||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
|
{
|
||||||
|
// FIXME: this works only if the vector is 64bit!
|
||||||
|
insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v2i64, insertEltOffs);
|
||||||
|
vectorizeOp = DAG.getNode(SPUISD::HALF2VEC, dl, vecVT, theValue);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
|
||||||
|
vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
|
||||||
|
}
|
||||||
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
|
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
|
||||||
vectorizeOp, alignLoadVec,
|
vectorizeOp, alignLoadVec,
|
||||||
DAG.getNode(ISD::BIT_CONVERT, dl,
|
DAG.getNode(ISD::BIT_CONVERT, dl,
|
||||||
|
@ -54,6 +54,8 @@ namespace llvm {
|
|||||||
ADD64_MARKER, ///< i64 addition marker
|
ADD64_MARKER, ///< i64 addition marker
|
||||||
SUB64_MARKER, ///< i64 subtraction marker
|
SUB64_MARKER, ///< i64 subtraction marker
|
||||||
MUL64_MARKER, ///< i64 multiply marker
|
MUL64_MARKER, ///< i64 multiply marker
|
||||||
|
HALF2VEC, ///< Promote 64 bit vector to 128 bits
|
||||||
|
VEC2HALF, ///< Extract first 64 bits from 128 bit vector
|
||||||
LAST_SPUISD ///< Last user-defined instruction
|
LAST_SPUISD ///< Last user-defined instruction
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1468,6 +1468,9 @@ class ORCvtGPRCVec:
|
|||||||
class ORCvtVecGPRC:
|
class ORCvtVecGPRC:
|
||||||
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
|
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
|
||||||
|
|
||||||
|
class ORCvtVecVec:
|
||||||
|
ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
|
||||||
|
|
||||||
multiclass BitwiseOr
|
multiclass BitwiseOr
|
||||||
{
|
{
|
||||||
def v16i8: ORVecInst<v16i8>;
|
def v16i8: ORVecInst<v16i8>;
|
||||||
@ -1514,6 +1517,13 @@ multiclass BitwiseOr
|
|||||||
def f32_v4f32: ORExtractElt<R32FP>;
|
def f32_v4f32: ORExtractElt<R32FP>;
|
||||||
def f64_v2f64: ORExtractElt<R64FP>;
|
def f64_v2f64: ORExtractElt<R64FP>;
|
||||||
|
|
||||||
|
// half <-> full vector mappings
|
||||||
|
def v2i32_v4i32: ORCvtVecVec;
|
||||||
|
def v4i32_v2i32: ORCvtVecVec;
|
||||||
|
def v2f32_v4f32: ORCvtVecVec;
|
||||||
|
def v4f32_v2f32: ORCvtVecVec;
|
||||||
|
|
||||||
|
|
||||||
// Conversion from vector to GPRC
|
// Conversion from vector to GPRC
|
||||||
def i128_vec: ORCvtVecGPRC;
|
def i128_vec: ORCvtVecGPRC;
|
||||||
|
|
||||||
@ -1623,6 +1633,18 @@ def : Pat<(SPUvec2prefslot (v2f32 VECREG:$rA)),
|
|||||||
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
|
||||||
(ORf64_v2f64 VECREG:$rA)>;
|
(ORf64_v2f64 VECREG:$rA)>;
|
||||||
|
|
||||||
|
// Conversions between 64 bit and 128 bit vectors.
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (SPUhalf2vec (v2i32 VECREG:$rA))),
|
||||||
|
(ORv4i32_v2i32 (v2i32 VECREG:$rA))>;
|
||||||
|
def : Pat<(v4f32 (SPUhalf2vec (v2f32 VECREG:$rA))),
|
||||||
|
(ORv4f32_v2f32 (v2f32 VECREG:$rA))>;
|
||||||
|
|
||||||
|
def : Pat<(v2i32 (SPUvec2half (v4i32 VECREG:$rA))),
|
||||||
|
(ORv2i32_v4i32 VECREG:$rA)>;
|
||||||
|
def : Pat<(v2f32 (SPUvec2half (v4f32 VECREG:$rA))),
|
||||||
|
(ORv2f32_v4f32 VECREG:$rA)>;
|
||||||
|
|
||||||
// Load Register: This is an assembler alias for a bitwise OR of a register
|
// Load Register: This is an assembler alias for a bitwise OR of a register
|
||||||
// against itself. It's here because it brings some clarity to assembly
|
// against itself. It's here because it brings some clarity to assembly
|
||||||
// language output.
|
// language output.
|
||||||
|
@ -117,6 +117,12 @@ def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
|
|||||||
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
|
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
|
||||||
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
|
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
|
||||||
|
|
||||||
|
def SPU_half_2_vec : SDTypeProfile<1, 1, []>;
|
||||||
|
def SPUhalf2vec: SDNode<"SPUISD::HALF2VEC", SPU_half_2_vec, []>;
|
||||||
|
|
||||||
|
def SPU_vec_2_half : SDTypeProfile<1, 1, []>;
|
||||||
|
def SPUvec2half: SDNode<"SPUISD::VEC2HALF", SPU_vec_2_half, []>;
|
||||||
|
|
||||||
// Address high and low components, used for [r+r] type addressing
|
// Address high and low components, used for [r+r] type addressing
|
||||||
def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
|
def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
|
||||||
def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
|
def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
|
||||||
|
@ -587,6 +587,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
|
|||||||
case SPU::LQDr32: return SPU::LQXr32;
|
case SPU::LQDr32: return SPU::LQXr32;
|
||||||
case SPU::LQDr128: return SPU::LQXr128;
|
case SPU::LQDr128: return SPU::LQXr128;
|
||||||
case SPU::LQDv16i8: return SPU::LQXv16i8;
|
case SPU::LQDv16i8: return SPU::LQXv16i8;
|
||||||
|
case SPU::LQDv4i32: return SPU::LQXv4i32;
|
||||||
case SPU::LQDv4f32: return SPU::LQXv4f32;
|
case SPU::LQDv4f32: return SPU::LQXv4f32;
|
||||||
case SPU::STQDr32: return SPU::STQXr32;
|
case SPU::STQDr32: return SPU::STQXr32;
|
||||||
case SPU::STQDr128: return SPU::STQXr128;
|
case SPU::STQDr128: return SPU::STQXr128;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
; RUN: llc < %s -march=cellspu -o %t1.s
|
; RUN: llc < %s -march=cellspu -o %t1.s
|
||||||
; RUN: grep lqx %t1.s | count 4
|
; RUN: grep lqx %t1.s | count 3
|
||||||
; RUN: grep il %t1.s | grep -v file | count 7
|
; RUN: grep il %t1.s | grep -v file | count 5
|
||||||
; RUN: grep stqx %t1.s | count 2
|
; RUN: grep stqx %t1.s | count 1
|
||||||
|
|
||||||
define i32 @bigstack() nounwind {
|
define i32 @bigstack() nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
@ -61,3 +61,15 @@ define %vec @test_insert(){
|
|||||||
ret %vec %rv
|
ret %vec %rv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @test_unaligned_store() {
|
||||||
|
;CHECK: cdd $3, 8($3)
|
||||||
|
;CHECK: lqd
|
||||||
|
;CHECK: shufb
|
||||||
|
;CHECK: stqd
|
||||||
|
%data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1]
|
||||||
|
%ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
|
||||||
|
%vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
|
||||||
|
store <2 x float> undef, <2 x float>* %vptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user