From 536c57b8dbb18e8042cd469065f1ee282ea2624c Mon Sep 17 00:00:00 2001 From: Tony Jiang Date: Mon, 12 Jun 2017 18:24:36 +0000 Subject: [PATCH] [PowerPC] Match vec_revb builtins to P9 instructions. Power9 has instructions that will reverse the bytes within an element for all sizes (half-word, word, double-word and quad-word). These can be used for the vec_revb builtins in altivec.h. However, we implement these to match vector shuffle nodes as that will cover both the builtins and vector shuffles that occur in the SDAG through other means. Differential Revision: https://reviews.llvm.org/D33690 llvm-svn: 305214 --- lib/Target/PowerPC/PPCISelLowering.cpp | 76 +++++++++++++++++++++++--- lib/Target/PowerPC/PPCISelLowering.h | 21 +++++++ lib/Target/PowerPC/PPCInstrInfo.td | 5 ++ lib/Target/PowerPC/PPCInstrVSX.td | 10 ++++ test/CodeGen/PowerPC/vec_revb.ll | 54 ++++++++++++++++++ 5 files changed, 159 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/PowerPC/vec_revb.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8880e9d88fb..7f9ea78c4b1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1128,6 +1128,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; @@ -1610,22 +1611,34 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -// Check that the mask is shuffling N byte elements. -static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) { +/// Check that the mask is shuffling N byte elements. Within each N byte +/// element of the mask, the indices could be either in increasing or +/// decreasing order as long as they are consecutive. +/// \param[in] N: the shuffle vector SD Node to analyze +/// \param[in] Width: the element width in bytes, could be 2/4/8/16 (HalfWord/ +/// Word/DoubleWord/QuadWord). +/// \param[in] StepLen: the delta indices number among the N byte element, if +/// the mask is in increasing/decreasing order then it is 1/-1. +/// \return true iff the mask is shuffling N byte elements. +static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, + int StepLen) { assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && "Unexpected element width."); + assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); unsigned NumOfElem = 16 / Width; unsigned MaskVal[16]; // Width is never greater than 16 for (unsigned i = 0; i < NumOfElem; ++i) { MaskVal[0] = N->getMaskElt(i * Width); - if (MaskVal[0] % Width) { + if ((StepLen == 1) && (MaskVal[0] % Width)) { + return false; + } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { return false; } for (unsigned int j = 1; j < Width; ++j) { MaskVal[j] = N->getMaskElt(i * Width + j); - if (MaskVal[j] != MaskVal[j-1] + 1) { + if (MaskVal[j] != MaskVal[j-1] + StepLen) { return false; } } @@ -1636,7 +1649,7 @@ static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) { bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { - if (!isNByteElemShuffleMask(N, 4)) + if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12 @@ -1713,7 +1726,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the word is consecutive. - if (!isNByteElemShuffleMask(N, 4)) + if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12, which are the beginning of words. @@ -1771,6 +1784,35 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, } } +bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + if (!isNByteElemShuffleMask(N, Width, -1)) + return false; + + for (int i = 0; i < 16; i += Width) + if (N->getMaskElt(i) != i + Width - 1) + return false; + + return true; +} + +bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 2); +} + +bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 4); +} + +bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 8); +} + +bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 16); +} + /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap /// if the inputs to the instruction should be swapped and set \p DM to the /// value for the immediate. @@ -1784,7 +1826,7 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the double word is consecutive. - if (!isNByteElemShuffleMask(N, 8)) + if (!isNByteElemShuffleMask(N, 8, 1)) return false; unsigned M0 = N->getMaskElt(0) / 8; @@ -7859,6 +7901,26 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); } + if (Subtarget.hasP9Vector()) { + if (PPC::isXXBRHShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); + } else if (PPC::isXXBRWShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); + } else if (PPC::isXXBRDShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); + } else if (PPC::isXXBRQShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); + SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); + } + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 678d92898a0..a5108727bb4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -86,6 +86,10 @@ namespace llvm { /// XXINSERT, + /// XXREVERSE - The PPC VSX reverse instruction + /// + XXREVERSE, + /// VECSHL - The PPC VSX shift left instruction /// VECSHL, @@ -458,6 +462,23 @@ namespace llvm { /// for a XXSLDWI instruction. bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE); + + /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRH instruction. + bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRW instruction. + bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRD instruction. + bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRQ instruction. + bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); + /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable /// for a XXPERMDI instruction. bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 37506239c58..47d59c25392 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; +def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>, + SDTCisVec<1> +]>; + def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; @@ -174,6 +178,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index e214d26c063..9cfc897cdb3 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -2340,6 +2340,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; + // Vector Reverse + def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)), + (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)), + (v4i32 (XXBRW $A))>; + def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)), + (v2i64 (XXBRD $A))>; + def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)), + (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + // Vector Permute def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, IIC_VecPerm, []>; diff --git a/test/CodeGen/PowerPC/vec_revb.ll b/test/CodeGen/PowerPC/vec_revb.ll new file mode 100644 index 00000000000..c09164bae13 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_revb.ll @@ -0,0 +1,54 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +define <8 x i16> @testXXBRH(<8 x i16> %a) { +; CHECK-LABEL: testXXBRH: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrh 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <8 x i16> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <8 x i16> + ret <8 x i16> %2 +} + +define <4 x i32> @testXXBRW(<4 x i32> %a) { +; CHECK-LABEL: testXXBRW: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrw 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +} + +define <2 x double> @testXXBRD(<2 x double> %a) { +; CHECK-LABEL: testXXBRD: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrd 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <2 x double> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <2 x double> + ret <2 x double> %2 +} + +define <1 x i128> @testXXBRQ(<1 x i128> %a) { +; CHECK-LABEL: testXXBRQ: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrq 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <1 x i128> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <1 x i128> + ret <1 x i128> %2 +}