diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index bfaf6786ba9..8e73a43b2d9 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1459,6 +1459,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { default: break; case Intrinsic::arm_neon_vtrni: + case Intrinsic::arm_neon_vtrnf: switch (VT.getSimpleVT()) { default: return NULL; case MVT::v8i8: Opc = ARM::VTRNd8; break; @@ -1472,6 +1473,38 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1), N->getOperand(2)); + + case Intrinsic::arm_neon_vuzpi: + case Intrinsic::arm_neon_vuzpf: + switch (VT.getSimpleVT()) { + default: return NULL; + case MVT::v8i8: Opc = ARM::VUZPd8; break; + case MVT::v4i16: Opc = ARM::VUZPd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VUZPd32; break; + case MVT::v16i8: Opc = ARM::VUZPq8; break; + case MVT::v8i16: Opc = ARM::VUZPq16; break; + case MVT::v4f32: + case MVT::v4i32: Opc = ARM::VUZPq32; break; + } + return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1), + N->getOperand(2)); + + case Intrinsic::arm_neon_vzipi: + case Intrinsic::arm_neon_vzipf: + switch (VT.getSimpleVT()) { + default: return NULL; + case MVT::v8i8: Opc = ARM::VZIPd8; break; + case MVT::v4i16: Opc = ARM::VZIPd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VZIPd32; break; + case MVT::v16i8: Opc = ARM::VZIPq8; break; + case MVT::v8i16: Opc = ARM::VZIPq16; break; + case MVT::v4f32: + case MVT::v4i32: Opc = ARM::VZIPq32; break; + } + return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1), + N->getOperand(2)); } break; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a69ef1289fe..afa42d57ec2 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -364,6 +364,18 @@ class N2VLInt op21_16, bits<4> op11_8, bit op7, (ins DPR:$src), NoItinerary, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; +// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. +class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr> + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), + (ins DPR:$src1, DPR:$src2), NoItinerary, + !strconcat(OpcodeStr, "\t$dst1, $dst2"), + "$src1 = $dst1, $src2 = $dst2", []>; +class N2VQShuffle op19_18, bits<5> op11_7, string OpcodeStr> + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), + (ins QPR:$src1, QPR:$src2), NoItinerary, + !strconcat(OpcodeStr, "\t$dst1, $dst2"), + "$src1 = $dst1, $src2 = $dst2", []>; + // Basic 3-register operations, both double- and quad-register. class N3VD op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, @@ -1919,24 +1931,33 @@ def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; // VTRN : Vector Transpose -class VTRND op19_18, string OpcodeStr> - : N2V<0b11, 0b11, op19_18, 0b10, 0b00001, 0, 0, (outs DPR:$dst1, DPR:$dst2), - (ins DPR:$src1, DPR:$src2), NoItinerary, - !strconcat(OpcodeStr, "\t$dst1, $dst2"), - "$src1 = $dst1, $src2 = $dst2", []>; -class VTRNQ op19_18, string OpcodeStr> - : N2V<0b11, 0b11, op19_18, 0b10, 0b00001, 1, 0, (outs QPR:$dst1, QPR:$dst2), - (ins QPR:$src1, QPR:$src2), NoItinerary, - !strconcat(OpcodeStr, "\t$dst1, $dst2"), - "$src1 = $dst1, $src2 = $dst2", []>; +def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; +def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; +def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; -def VTRNd8 : VTRND<0b00, "vtrn.8">; -def VTRNd16 : VTRND<0b01, "vtrn.16">; -def VTRNd32 : VTRND<0b10, "vtrn.32">; +def VTRNq8 : N2VQShuffle<0b00, 0b00001, "vtrn.8">; +def VTRNq16 : N2VQShuffle<0b01, 0b00001, "vtrn.16">; +def VTRNq32 : N2VQShuffle<0b10, 0b00001, "vtrn.32">; -def VTRNq8 : VTRNQ<0b00, "vtrn.8">; -def VTRNq16 : VTRNQ<0b01, "vtrn.16">; -def VTRNq32 : VTRNQ<0b10, "vtrn.32">; +// VUZP : Vector Unzip (Deinterleave) + +def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; +def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; +def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; + +def VUZPq8 : N2VQShuffle<0b00, 0b00010, "vuzp.8">; +def VUZPq16 : N2VQShuffle<0b01, 0b00010, "vuzp.16">; +def VUZPq32 : N2VQShuffle<0b10, 0b00010, "vuzp.32">; + +// VZIP : Vector Zip (Interleave) + +def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; +def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; +def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; + +def VZIPq8 : N2VQShuffle<0b00, 0b00011, "vzip.8">; +def VZIPq16 : N2VQShuffle<0b01, 0b00011, "vzip.16">; +def VZIPq32 : N2VQShuffle<0b10, 0b00011, "vzip.32">; //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math