mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-04 18:06:49 +00:00
[Power9] Exploit move and splat instructions for build_vector improvement
This patch corresponds to review: https://reviews.llvm.org/D21135 This patch exploits the following instructions: mtvsrws lxvwsx mtvsrdd mfvsrld In order to improve some build_vector and extractelement patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282246 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e2c1cbe138
commit
a04f9019ef
@ -328,10 +328,12 @@ void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
|
|||||||
O << (unsigned int)Value;
|
O << (unsigned int)Value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Operands of BUILD_VECTOR are signed and we use this to print operands
|
||||||
|
// of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and
|
||||||
|
// print as unsigned.
|
||||||
void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
|
void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
|
||||||
raw_ostream &O) {
|
raw_ostream &O) {
|
||||||
unsigned int Value = MI->getOperand(OpNo).getImm();
|
unsigned char Value = MI->getOperand(OpNo).getImm();
|
||||||
assert(Value <= 255 && "Invalid u8imm argument!");
|
|
||||||
O << (unsigned int)Value;
|
O << (unsigned int)Value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -672,6 +672,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
||||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
|
||||||
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Legal);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Subtarget.hasQPX()) {
|
if (Subtarget.hasQPX()) {
|
||||||
@ -7079,6 +7082,16 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
|
|||||||
return DAG.getNode(ISD::BITCAST, dl, VT, T);
|
return DAG.getNode(ISD::BITCAST, dl, VT, T);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
|
||||||
|
if (BVN->getValueType(0) != Type)
|
||||||
|
return false;
|
||||||
|
auto OpZero = BVN->getOperand(0);
|
||||||
|
for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
|
||||||
|
if (BVN->getOperand(i) != OpZero)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// If this is a case we can't handle, return null and let the default
|
// If this is a case we can't handle, return null and let the default
|
||||||
// expansion code take care of it. If we CAN select this case, and if it
|
// expansion code take care of it. If we CAN select this case, and if it
|
||||||
// selects to a single instruction, return Op. Otherwise, if we can codegen
|
// selects to a single instruction, return Op. Otherwise, if we can codegen
|
||||||
@ -7200,8 +7213,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||||||
bool HasAnyUndefs;
|
bool HasAnyUndefs;
|
||||||
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
||||||
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
|
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
|
||||||
SplatBitSize > 32)
|
SplatBitSize > 32) {
|
||||||
|
// We can splat a non-const value on CPU's that implement ISA 3.0
|
||||||
|
// in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat).
|
||||||
|
auto OpZero = BVN->getOperand(0);
|
||||||
|
bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
|
||||||
|
BVN->isOnlyUserOf(OpZero.getNode());
|
||||||
|
if (Subtarget.isISA3_0() &&
|
||||||
|
isNonConstSplatBV(BVN, MVT::v4i32) && !CanLoadAndSplat)
|
||||||
|
return Op;
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
unsigned SplatBits = APSplatBits.getZExtValue();
|
unsigned SplatBits = APSplatBits.getZExtValue();
|
||||||
unsigned SplatUndef = APSplatUndef.getZExtValue();
|
unsigned SplatUndef = APSplatUndef.getZExtValue();
|
||||||
@ -7219,6 +7241,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||||||
return Op;
|
return Op;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We have XXSPLTIB for constant splats one byte wide
|
||||||
|
if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8)
|
||||||
|
return Op;
|
||||||
|
|
||||||
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
|
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
|
||||||
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
|
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
|
||||||
(32-SplatBitSize));
|
(32-SplatBitSize));
|
||||||
@ -7462,6 +7488,18 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
|||||||
if (Subtarget.hasVSX()) {
|
if (Subtarget.hasVSX()) {
|
||||||
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
|
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
|
||||||
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
|
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
|
||||||
|
|
||||||
|
// If the source for the shuffle is a scalar_to_vector that came from a
|
||||||
|
// 32-bit load, it will have used LXVWSX so we don't need to splat again.
|
||||||
|
if (Subtarget.isISA3_0() &&
|
||||||
|
((isLittleEndian && SplatIdx == 3) ||
|
||||||
|
(!isLittleEndian && SplatIdx == 0))) {
|
||||||
|
SDValue Src = V1.getOperand(0);
|
||||||
|
if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||||
|
Src.getOperand(0).getOpcode() == ISD::LOAD &&
|
||||||
|
Src.getOperand(0).hasOneUse())
|
||||||
|
return V1;
|
||||||
|
}
|
||||||
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
|
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
|
||||||
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
|
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
|
||||||
DAG.getConstant(SplatIdx, dl, MVT::i32));
|
DAG.getConstant(SplatIdx, dl, MVT::i32));
|
||||||
|
@ -1059,6 +1059,13 @@ class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
|||||||
let Inst{31} = XT{5};
|
let Inst{31} = XT{5};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||||
|
InstrItinClass itin, list<dag> pattern>
|
||||||
|
: XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
|
||||||
|
let XA = XT;
|
||||||
|
let XB = XT;
|
||||||
|
}
|
||||||
|
|
||||||
class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||||
InstrItinClass itin, list<dag> pattern>
|
InstrItinClass itin, list<dag> pattern>
|
||||||
: I<opcode, OOL, IOL, asmstr, itin> {
|
: I<opcode, OOL, IOL, asmstr, itin> {
|
||||||
|
@ -312,6 +312,7 @@ def immZExt16 : PatLeaf<(imm), [{
|
|||||||
// field. Used by instructions like 'ori'.
|
// field. Used by instructions like 'ori'.
|
||||||
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
|
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
|
||||||
}], LO16>;
|
}], LO16>;
|
||||||
|
def immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
|
||||||
|
|
||||||
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
|
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
|
||||||
// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
|
// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
|
||||||
|
@ -767,6 +767,10 @@ let Uses = [RM] in {
|
|||||||
"xxlxor $XT, $XA, $XB", IIC_VecGeneral,
|
"xxlxor $XT, $XA, $XB", IIC_VecGeneral,
|
||||||
[(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
|
[(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
|
||||||
} // isCommutable
|
} // isCommutable
|
||||||
|
let isCodeGenOnly = 1 in
|
||||||
|
def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
|
||||||
|
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
|
||||||
|
[(set v4i32:$XT, (v4i32 immAllZerosV))]>;
|
||||||
|
|
||||||
// Permutation Instructions
|
// Permutation Instructions
|
||||||
def XXMRGHW : XX3Form<60, 18,
|
def XXMRGHW : XX3Form<60, 18,
|
||||||
@ -1315,8 +1319,7 @@ let Predicates = [HasDirectMove] in {
|
|||||||
|
|
||||||
let Predicates = [IsISA3_0, HasDirectMove] in {
|
let Predicates = [IsISA3_0, HasDirectMove] in {
|
||||||
def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
|
def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
|
||||||
"mtvsrws $XT, $rA", IIC_VecGeneral,
|
"mtvsrws $XT, $rA", IIC_VecGeneral, []>;
|
||||||
[]>;
|
|
||||||
|
|
||||||
def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
|
def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
|
||||||
"mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
|
"mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
|
||||||
@ -1880,6 +1883,10 @@ def AlignValues {
|
|||||||
dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
|
dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Materialize a zero-vector of long long
|
||||||
|
def : Pat<(v2i64 immAllZerosV),
|
||||||
|
(v2i64 (XXLXORz))>;
|
||||||
|
|
||||||
// The following VSX instructions were introduced in Power ISA 3.0
|
// The following VSX instructions were introduced in Power ISA 3.0
|
||||||
def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
|
def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
|
||||||
let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
||||||
@ -2310,4 +2317,40 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||||||
(STXVX $rS, xoaddr:$dst)>;
|
(STXVX $rS, xoaddr:$dst)>;
|
||||||
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
|
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
|
||||||
(STXVX $rS, xoaddr:$dst)>;
|
(STXVX $rS, xoaddr:$dst)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
|
||||||
|
(v4i32 (LXVWSX xoaddr:$src))>;
|
||||||
|
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
|
||||||
|
(v4f32 (LXVWSX xoaddr:$src))>;
|
||||||
|
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||||
|
(v4i32 (MTVSRWS $A))>;
|
||||||
|
def : Pat<(v16i8 (build_vector immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||||
|
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||||
|
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||||
|
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||||
|
immSExt8:$A, immSExt8:$A, immSExt8:$A,
|
||||||
|
immSExt8:$A)),
|
||||||
|
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
|
||||||
|
def : Pat<(v16i8 immAllOnesV),
|
||||||
|
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
|
||||||
|
def : Pat<(v8i16 immAllOnesV),
|
||||||
|
(v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
|
||||||
|
def : Pat<(v4i32 immAllOnesV),
|
||||||
|
(v4i32 (XXSPLTIB 255))>;
|
||||||
|
def : Pat<(v2i64 immAllOnesV),
|
||||||
|
(v2i64 (XXSPLTIB 255))>;
|
||||||
} // end HasP9Vector, AddedComplexity
|
} // end HasP9Vector, AddedComplexity
|
||||||
|
|
||||||
|
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
|
||||||
|
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
|
||||||
|
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||||
|
def : Pat<(i64 (extractelt v2i64:$A, 0)),
|
||||||
|
(i64 (MFVSRLD $A))>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
|
||||||
|
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
|
||||||
|
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||||
|
def : Pat<(i64 (extractelt v2i64:$A, 1)),
|
||||||
|
(i64 (MFVSRLD $A))>;
|
||||||
|
}
|
||||||
|
167
test/CodeGen/PowerPC/power9-moves-and-splats.ll
Normal file
167
test/CodeGen/PowerPC/power9-moves-and-splats.ll
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||||
|
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
|
||||||
|
; RUN: --check-prefix=CHECK-BE
|
||||||
|
|
||||||
|
@Globi = external global i32, align 4
|
||||||
|
@Globf = external global float, align 4
|
||||||
|
|
||||||
|
define <2 x i64> @test1(i64 %a, i64 %b) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test1
|
||||||
|
; CHECK: mtvsrdd 34, 4, 3
|
||||||
|
; CHECK-BE-LABEL: test1
|
||||||
|
; CHECK-BE: mtvsrdd 34, 3, 4
|
||||||
|
%vecins = insertelement <2 x i64> undef, i64 %a, i32 0
|
||||||
|
%vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
|
||||||
|
ret <2 x i64> %vecins1
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @test2(<2 x i64> %a) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test2
|
||||||
|
; CHECK: mfvsrld 3, 34
|
||||||
|
%0 = extractelement <2 x i64> %a, i32 0
|
||||||
|
ret i64 %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @test3(<2 x i64> %a) {
|
||||||
|
entry:
|
||||||
|
; CHECK-BE-LABEL: test3
|
||||||
|
; CHECK-BE: mfvsrld 3, 34
|
||||||
|
%0 = extractelement <2 x i64> %a, i32 1
|
||||||
|
ret i64 %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @test4(i32* nocapture readonly %in) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test4
|
||||||
|
; CHECK: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-NOT: xxspltw
|
||||||
|
; CHECK-BE-LABEL: test4
|
||||||
|
; CHECK-BE: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-BE-NOT: xxspltw
|
||||||
|
%0 = load i32, i32* %in, align 4
|
||||||
|
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
|
||||||
|
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||||
|
ret <4 x i32> %splat.splat
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @test5(float* nocapture readonly %in) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test5
|
||||||
|
; CHECK: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-NOT: xxspltw
|
||||||
|
; CHECK-BE-LABEL: test5
|
||||||
|
; CHECK-BE: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-BE-NOT: xxspltw
|
||||||
|
%0 = load float, float* %in, align 4
|
||||||
|
%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
|
||||||
|
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
||||||
|
ret <4 x float> %splat.splat
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @test6() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test6
|
||||||
|
; CHECK: addis
|
||||||
|
; CHECK: ld [[TOC:[0-9]+]], .LC0
|
||||||
|
; CHECK: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-NOT: xxspltw
|
||||||
|
; CHECK-BE-LABEL: test6
|
||||||
|
; CHECK-BE: addis
|
||||||
|
; CHECK-BE: ld [[TOC:[0-9]+]], .LC0
|
||||||
|
; CHECK-BE: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-BE-NOT: xxspltw
|
||||||
|
%0 = load i32, i32* @Globi, align 4
|
||||||
|
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
|
||||||
|
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||||
|
ret <4 x i32> %splat.splat
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @test7() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test7
|
||||||
|
; CHECK: addis
|
||||||
|
; CHECK: ld [[TOC:[0-9]+]], .LC1
|
||||||
|
; CHECK: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-NOT: xxspltw
|
||||||
|
; CHECK-BE-LABEL: test7
|
||||||
|
; CHECK-BE: addis
|
||||||
|
; CHECK-BE: ld [[TOC:[0-9]+]], .LC1
|
||||||
|
; CHECK-BE: lxvwsx 34, 0, 3
|
||||||
|
; CHECK-BE-NOT: xxspltw
|
||||||
|
%0 = load float, float* @Globf, align 4
|
||||||
|
%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
|
||||||
|
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
||||||
|
ret <4 x float> %splat.splat
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @test8() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test8
|
||||||
|
; CHECK: xxlxor 34, 34, 34
|
||||||
|
; CHECK-BE-LABEL: test8
|
||||||
|
; CHECK-BE: xxlxor 34, 34, 34
|
||||||
|
ret <16 x i8> zeroinitializer
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @test9() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test9
|
||||||
|
; CHECK: xxspltib 34, 1
|
||||||
|
; CHECK-BE-LABEL: test9
|
||||||
|
; CHECK-BE: xxspltib 34, 1
|
||||||
|
ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @test10() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test10
|
||||||
|
; CHECK: xxspltib 34, 127
|
||||||
|
; CHECK-BE-LABEL: test10
|
||||||
|
; CHECK-BE: xxspltib 34, 127
|
||||||
|
ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @test11() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test11
|
||||||
|
; CHECK: xxspltib 34, 128
|
||||||
|
; CHECK-BE-LABEL: test11
|
||||||
|
; CHECK-BE: xxspltib 34, 128
|
||||||
|
ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @test12() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test12
|
||||||
|
; CHECK: xxspltib 34, 255
|
||||||
|
; CHECK-BE-LABEL: test12
|
||||||
|
; CHECK-BE: xxspltib 34, 255
|
||||||
|
ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @test13() {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test13
|
||||||
|
; CHECK: xxspltib 34, 129
|
||||||
|
; CHECK-BE-LABEL: test13
|
||||||
|
; CHECK-BE: xxspltib 34, 129
|
||||||
|
ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test14
|
||||||
|
; CHECK: lwz [[LD:[0-9]+]],
|
||||||
|
; CHECK: mtvsrws 34, [[LD]]
|
||||||
|
; CHECK-BE-LABEL: test14
|
||||||
|
; CHECK-BE: lwz [[LD:[0-9]+]],
|
||||||
|
; CHECK-BE: mtvsrws 34, [[LD]]
|
||||||
|
%0 = load i32, i32* %b, align 4
|
||||||
|
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
|
||||||
|
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||||
|
%1 = add i32 %0, 5
|
||||||
|
store i32 %1, i32* %b, align 4
|
||||||
|
ret <4 x i32> %splat.splat
|
||||||
|
}
|
@ -17,16 +17,16 @@
|
|||||||
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
|
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
|
||||||
|
|
||||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||||
; RUN: -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-P9 \
|
; RUN: -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \
|
||||||
; RUN: --implicit-check-not xxswapd
|
; RUN: -check-prefix=CHECK-P9 --implicit-check-not xxswapd
|
||||||
|
|
||||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||||
; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
|
; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
|
||||||
; RUN: --implicit-check-not xxswapd
|
; RUN: --implicit-check-not xxswapd
|
||||||
|
|
||||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||||
; RUN: -mcpu=pwr9 -mattr=-power9-vector < %s | FileCheck %s \
|
; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \
|
||||||
; RUN: -check-prefix=CHECK-LE
|
; RUN: FileCheck %s -check-prefix=CHECK-LE
|
||||||
|
|
||||||
@x = common global <1 x i128> zeroinitializer, align 16
|
@x = common global <1 x i128> zeroinitializer, align 16
|
||||||
@y = common global <1 x i128> zeroinitializer, align 16
|
@y = common global <1 x i128> zeroinitializer, align 16
|
||||||
@ -55,8 +55,10 @@ define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
|
|||||||
; CHECK-LE: blr
|
; CHECK-LE: blr
|
||||||
|
|
||||||
; CHECK-P9-LABEL: @v1i128_increment_by_one
|
; CHECK-P9-LABEL: @v1i128_increment_by_one
|
||||||
; CHECK-P9: lxvx
|
; CHECK-P9-DAG: li [[R1:r[0-9]+]], 1
|
||||||
; CHECK-P9: vadduqm 2, 2, 3
|
; CHECK-P9-DAG: li [[R2:r[0-9]+]], 0
|
||||||
|
; CHECK-P9: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
|
||||||
|
; CHECK-P9: vadduqm v2, v2, [[V1]]
|
||||||
; CHECK-P9: blr
|
; CHECK-P9: blr
|
||||||
|
|
||||||
; CHECK-BE-LABEL: @v1i128_increment_by_one
|
; CHECK-BE-LABEL: @v1i128_increment_by_one
|
||||||
@ -232,8 +234,8 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind {
|
|||||||
; CHECK-LE: blr
|
; CHECK-LE: blr
|
||||||
|
|
||||||
; CHECK-P9-LABEL: @call_v1i128_increment_by_val
|
; CHECK-P9-LABEL: @call_v1i128_increment_by_val
|
||||||
; CHECK-P9-DAG: lxvx 34
|
; CHECK-P9-DAG: lxvx v2
|
||||||
; CHECK-P9-DAG: lxvx 35
|
; CHECK-P9-DAG: lxvx v3
|
||||||
; CHECK-P9: bl v1i128_increment_by_val
|
; CHECK-P9: bl v1i128_increment_by_val
|
||||||
; CHECK-P9: blr
|
; CHECK-P9: blr
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user