ARM NEON: Handle v16i8 and v8i16 reverse shuffles

Lower reverse shuffles to a vrev64 and a vext instruction instead of the default
legalization of storing and loading to the stack. This is important because we
generate reverse shuffles in the loop vectorizer when we reverse store to an
array.

  uint8_t Arr[N];
  for (i = 0; i < N; ++i)
    Arr[N - i - 1] = ...

radar://13171760

llvm-svn: 174929
This commit is contained in:
Arnold Schwaighofer 2013-02-12 01:58:32 +00:00
parent 0422258982
commit 1ecca5fd68
2 changed files with 64 additions and 1 deletions

View File

@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return true;
}
/// \return true if this is a reverse operation on an vector.
static bool isReverseMask(ArrayRef<int> M, EVT VT) {
unsigned NumElts = VT.getVectorNumElements();
// Make sure the mask has the right size.
if (NumElts != M.size())
return false;
// Look for <15, ..., 3, -1, 1, 0>.
for (unsigned i = 0; i != NumElts; ++i)
if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
return false;
return true;
}
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isVZIPMask(M, VT, WhichResult) ||
isVTRN_v_undef_Mask(M, VT, WhichResult) ||
isVUZP_v_undef_Mask(M, VT, WhichResult) ||
isVZIP_v_undef_Mask(M, VT, WhichResult));
isVZIP_v_undef_Mask(M, VT, WhichResult) ||
((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
&VTBLMask[0], 8));
}
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
SelectionDAG &DAG) {
DebugLoc DL = Op.getDebugLoc();
SDValue OpLHS = Op.getOperand(0);
EVT VT = OpLHS.getValueType();
assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
"Expect an v8i16/v16i8 type");
OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
// For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
// extract the first 8 bytes into the top double word and the last 8 bytes
// into the bottom double word. The v8i16 case is similar.
unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
DAG.getConstant(ExtractNum, MVT::i32));
}
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
if (VT == MVT::v8i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
if (NewOp.getNode())

View File

@ -133,3 +133,30 @@ define i16 @foldBuildVectors() {
%3 = extractelement <8 x i16> %2, i32 0
ret i16 %3
}
; Test that we are generating vrev and vext for reverse shuffles of v8i16
; shuffles.
; CHECK: reverse_v8i16
define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
%v0 = load <8 x i16>* %loadaddr
; CHECK: vrev64.16
; CHECK: vext.16
%v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
<8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
store <8 x i16> %v1, <8 x i16>* %storeaddr
ret void
}
; Test that we are generating vrev and vext for reverse shuffles of v16i8
; shuffles.
; CHECK: reverse_v16i8
define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
%v0 = load <16 x i8>* %loadaddr
; CHECK: vrev64.8
; CHECK: vext.8
%v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8,
i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
store <16 x i8> %v1, <16 x i8>* %storeaddr
ret void
}