Implement Neon VTRN instructions. For now, anyway, these are selected

directly from the intrinsics produced by the frontend.  If it is more
convenient to have a custom DAG node for using these to implement shuffles,
we can add that later.

llvm-svn: 78459
This commit is contained in:
Bob Wilson 2009-08-08 05:53:00 +00:00
parent 591187332c
commit 935ee0c122
2 changed files with 48 additions and 0 deletions

View File

@ -1448,6 +1448,33 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
N->getOperand(4), N->getOperand(5) };
return CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7);
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
MVT VT = N->getValueType(0);
unsigned Opc = 0;
// Match intrinsics that return multiple values.
switch (IntNo) {
default: break;
case Intrinsic::arm_neon_vtrni:
switch (VT.getSimpleVT()) {
default: return NULL;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VTRNq8; break;
case MVT::v8i16: Opc = ARM::VTRNq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
}
return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1),
N->getOperand(2));
}
break;
}
}
return SelectCode(Op);

View File

@ -1917,6 +1917,27 @@ class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>;
def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>;
// VTRN : Vector Transpose
class VTRND<bits<2> op19_18, string OpcodeStr>
: N2V<0b11, 0b11, op19_18, 0b10, 0b00001, 0, 0, (outs DPR:$dst1, DPR:$dst2),
(ins DPR:$src1, DPR:$src2), NoItinerary,
!strconcat(OpcodeStr, "\t$dst1, $dst2"),
"$src1 = $dst1, $src2 = $dst2", []>;
class VTRNQ<bits<2> op19_18, string OpcodeStr>
: N2V<0b11, 0b11, op19_18, 0b10, 0b00001, 1, 0, (outs QPR:$dst1, QPR:$dst2),
(ins QPR:$src1, QPR:$src2), NoItinerary,
!strconcat(OpcodeStr, "\t$dst1, $dst2"),
"$src1 = $dst1, $src2 = $dst2", []>;
def VTRNd8 : VTRND<0b00, "vtrn.8">;
def VTRNd16 : VTRND<0b01, "vtrn.16">;
def VTRNd32 : VTRND<0b10, "vtrn.32">;
def VTRNq8 : VTRNQ<0b00, "vtrn.8">;
def VTRNq16 : VTRNQ<0b01, "vtrn.16">;
def VTRNq32 : VTRNQ<0b10, "vtrn.32">;
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//