mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-04 01:11:44 +00:00
Add lowering of ARM 4-element shuffles to multiple instructios via perfectshuffle-generated table.
llvm-svn: 79624
This commit is contained in:
parent
838740d897
commit
218db4a01c
@ -17,6 +17,7 @@
|
||||
#include "ARMConstantPoolValue.h"
|
||||
#include "ARMISelLowering.h"
|
||||
#include "ARMMachineFunctionInfo.h"
|
||||
#include "ARMPerfectShuffle.h"
|
||||
#include "ARMRegisterInfo.h"
|
||||
#include "ARMSubtarget.h"
|
||||
#include "ARMTargetMachine.h"
|
||||
@ -2488,6 +2489,26 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
bool
|
||||
ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
|
||||
EVT VT) const {
|
||||
if (VT.getVectorNumElements() == 4 &&
|
||||
(VT.is128BitVector() || VT.is64BitVector())) {
|
||||
unsigned PFIndexes[4];
|
||||
for (unsigned i = 0; i != 4; ++i) {
|
||||
if (M[i] < 0)
|
||||
PFIndexes[i] = 8;
|
||||
else
|
||||
PFIndexes[i] = M[i];
|
||||
}
|
||||
|
||||
// Compute the index in the perfect shuffle table.
|
||||
unsigned PFTableIndex =
|
||||
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
|
||||
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
|
||||
unsigned Cost = (PFEntry >> 30);
|
||||
|
||||
if (Cost <= 4)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ReverseVEXT;
|
||||
unsigned Imm;
|
||||
|
||||
@ -2498,10 +2519,84 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
|
||||
isVEXTMask(M, VT, ReverseVEXT, Imm));
|
||||
}
|
||||
|
||||
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
|
||||
/// the specified operations to build the shuffle.
|
||||
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
|
||||
SDValue RHS, SelectionDAG &DAG,
|
||||
DebugLoc dl) {
|
||||
unsigned OpNum = (PFEntry >> 26) & 0x0F;
|
||||
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
|
||||
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
|
||||
|
||||
enum {
|
||||
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
|
||||
OP_VREV,
|
||||
OP_VDUP0,
|
||||
OP_VDUP1,
|
||||
OP_VDUP2,
|
||||
OP_VDUP3,
|
||||
OP_VEXT1,
|
||||
OP_VEXT2,
|
||||
OP_VEXT3,
|
||||
OP_VUZPL, // VUZP, left result
|
||||
OP_VUZPR, // VUZP, right result
|
||||
OP_VZIPL, // VZIP, left result
|
||||
OP_VZIPR, // VZIP, right result
|
||||
OP_VTRNL, // VTRN, left result
|
||||
OP_VTRNR // VTRN, right result
|
||||
};
|
||||
|
||||
if (OpNum == OP_COPY) {
|
||||
if (LHSID == (1*9+2)*9+3) return LHS;
|
||||
assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
|
||||
return RHS;
|
||||
}
|
||||
|
||||
SDValue OpLHS, OpRHS;
|
||||
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
|
||||
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
|
||||
EVT VT = OpLHS.getValueType();
|
||||
|
||||
switch (OpNum) {
|
||||
default: llvm_unreachable("Unknown shuffle opcode!");
|
||||
case OP_VREV:
|
||||
return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
|
||||
case OP_VDUP0:
|
||||
case OP_VDUP1:
|
||||
case OP_VDUP2:
|
||||
case OP_VDUP3:
|
||||
return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
|
||||
OpLHS, DAG.getConstant(OpNum-OP_VDUP0+1, MVT::i32));
|
||||
case OP_VEXT1:
|
||||
case OP_VEXT2:
|
||||
case OP_VEXT3:
|
||||
return DAG.getNode(ARMISD::VEXT, dl, VT,
|
||||
OpLHS, OpRHS,
|
||||
DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
|
||||
case OP_VUZPL:
|
||||
case OP_VUZPR:
|
||||
return DAG.getNode(VT.is64BitVector() ? ARMISD::VUZP16 : ARMISD::VUZP32,
|
||||
dl, DAG.getVTList(VT, VT),
|
||||
OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
|
||||
case OP_VZIPL:
|
||||
case OP_VZIPR:
|
||||
return DAG.getNode(VT.is64BitVector() ? ARMISD::VZIP16 : ARMISD::VZIP32,
|
||||
dl, DAG.getVTList(VT, VT),
|
||||
OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
|
||||
case OP_VTRNL:
|
||||
case OP_VTRNR:
|
||||
return DAG.getNode(VT.is64BitVector() ? ARMISD::VTRN16 : ARMISD::VTRN32,
|
||||
dl, DAG.getVTList(VT, VT),
|
||||
OpLHS, OpRHS).getValue(0);
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
|
||||
SDValue V1 = Op.getOperand(0);
|
||||
SDValue V2 = Op.getOperand(1);
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
|
||||
SmallVector<int, 8> ShuffleMask;
|
||||
|
||||
// Convert shuffles that are directly supported on NEON to target-specific
|
||||
@ -2514,11 +2609,10 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
|
||||
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
|
||||
int Lane = SVN->getSplatIndex();
|
||||
SDValue Op0 = SVN->getOperand(0);
|
||||
if (Lane == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||
return DAG.getNode(ARMISD::VDUP, dl, VT, Op0.getOperand(0));
|
||||
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||
return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
|
||||
}
|
||||
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0),
|
||||
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
|
||||
DAG.getConstant(Lane, MVT::i32));
|
||||
}
|
||||
|
||||
@ -2534,11 +2628,32 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
if (isVREVMask(ShuffleMask, VT, 64))
|
||||
return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0));
|
||||
return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
|
||||
if (isVREVMask(ShuffleMask, VT, 32))
|
||||
return DAG.getNode(ARMISD::VREV32, dl, VT, SVN->getOperand(0));
|
||||
return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
|
||||
if (isVREVMask(ShuffleMask, VT, 16))
|
||||
return DAG.getNode(ARMISD::VREV16, dl, VT, SVN->getOperand(0));
|
||||
return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
|
||||
|
||||
if (VT.getVectorNumElements() == 4 &&
|
||||
(VT.is128BitVector() || VT.is64BitVector())) {
|
||||
unsigned PFIndexes[4];
|
||||
for (unsigned i = 0; i != 4; ++i) {
|
||||
if (ShuffleMask[i] < 0)
|
||||
PFIndexes[i] = 8;
|
||||
else
|
||||
PFIndexes[i] = ShuffleMask[i];
|
||||
}
|
||||
|
||||
// Compute the index in the perfect shuffle table.
|
||||
unsigned PFTableIndex =
|
||||
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
|
||||
|
||||
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
|
||||
unsigned Cost = (PFEntry >> 30);
|
||||
|
||||
if (Cost <= 4)
|
||||
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -131,7 +131,17 @@ namespace llvm {
|
||||
VEXT, // extract
|
||||
VREV64, // reverse elements within 64-bit doublewords
|
||||
VREV32, // reverse elements within 32-bit words
|
||||
VREV16 // reverse elements within 16-bit halfwords
|
||||
VREV16, // reverse elements within 16-bit halfwords
|
||||
|
||||
VZIP32,
|
||||
VZIP16,
|
||||
VZIP8,
|
||||
VUZP32,
|
||||
VUZP16,
|
||||
VUZP8,
|
||||
VTRN32,
|
||||
VTRN16,
|
||||
VTRN8
|
||||
};
|
||||
}
|
||||
|
||||
|
6586
lib/Target/ARM/ARMPerfectShuffle.h
Normal file
6586
lib/Target/ARM/ARMPerfectShuffle.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -444,8 +444,6 @@ int main() {
|
||||
}
|
||||
|
||||
|
||||
#define GENERATE_ALTIVEC
|
||||
|
||||
#ifdef GENERATE_ALTIVEC
|
||||
|
||||
///===---------------------------------------------------------------------===//
|
||||
@ -498,3 +496,76 @@ vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
|
||||
vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
|
||||
|
||||
#endif
|
||||
|
||||
#define GENERATE_NEON
|
||||
|
||||
#ifdef GENERATE_NEON
|
||||
enum {
|
||||
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
|
||||
OP_VREV,
|
||||
OP_VDUP0,
|
||||
OP_VDUP1,
|
||||
OP_VDUP2,
|
||||
OP_VDUP3,
|
||||
OP_VEXT1,
|
||||
OP_VEXT2,
|
||||
OP_VEXT3,
|
||||
OP_VUZPL, // VUZP, left result
|
||||
OP_VUZPR, // VUZP, right result
|
||||
OP_VZIPL, // VZIP, left result
|
||||
OP_VZIPR, // VZIP, right result
|
||||
OP_VTRNL, // VTRN, left result
|
||||
OP_VTRNR // VTRN, right result
|
||||
};
|
||||
|
||||
struct vrev : public Operator {
|
||||
vrev() : Operator(0x1032, "vrev", OP_VREV) {}
|
||||
} the_vrev;
|
||||
|
||||
template<unsigned Elt>
|
||||
struct vdup : public Operator {
|
||||
vdup(const char *N, unsigned Opc)
|
||||
: Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
|
||||
};
|
||||
|
||||
vdup<0> the_vdup0("vdup0", OP_VDUP0);
|
||||
vdup<1> the_vdup1("vdup1", OP_VDUP1);
|
||||
vdup<2> the_vdup2("vdup2", OP_VDUP2);
|
||||
vdup<3> the_vdup3("vdup3", OP_VDUP3);
|
||||
|
||||
template<unsigned N>
|
||||
struct vext : public Operator {
|
||||
vext(const char *Name, unsigned Opc)
|
||||
: Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
|
||||
}
|
||||
};
|
||||
|
||||
vext<1> the_vext1("vext1", OP_VEXT1);
|
||||
vext<2> the_vext2("vext2", OP_VEXT2);
|
||||
vext<3> the_vext3("vext3", OP_VEXT3);
|
||||
|
||||
struct vuzpl : public Operator {
|
||||
vuzpl() : Operator(0x1032, "vuzpl", OP_VUZPL, 2) {}
|
||||
} the_vuzpl;
|
||||
|
||||
struct vuzpr : public Operator {
|
||||
vuzpr() : Operator(0x4602, "vuzpr", OP_VUZPR, 2) {}
|
||||
} the_vuzpr;
|
||||
|
||||
struct vzipl : public Operator {
|
||||
vzipl() : Operator(0x6273, "vzipl", OP_VZIPL, 2) {}
|
||||
} the_vzipl;
|
||||
|
||||
struct vzipr : public Operator {
|
||||
vzipr() : Operator(0x4051, "vzipr", OP_VZIPR, 2) {}
|
||||
} the_vzipr;
|
||||
|
||||
struct vtrnl : public Operator {
|
||||
vtrnl() : Operator(0x5173, "vtrnl", OP_VTRNL, 2) {}
|
||||
} the_vtrnl;
|
||||
|
||||
struct vtrnr : public Operator {
|
||||
vtrnr() : Operator(0x4062, "vtrnr", OP_VTRNR, 2) {}
|
||||
} the_vtrnr;
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user