diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9b0a91b4e04..f39e00e4bae 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -119,8 +119,9 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, } MIB.addReg(D0, getKillRegState(SrcIsKill)) - .addReg(D1, getKillRegState(SrcIsKill)) - .addReg(D2, getKillRegState(SrcIsKill)); + .addReg(D1, getKillRegState(SrcIsKill)); + if (NumRegs > 2) + MIB.addReg(D2, getKillRegState(SrcIsKill)); if (NumRegs > 3) MIB.addReg(D3, getKillRegState(SrcIsKill)); MIB = AddDefaultPred(MIB); @@ -224,6 +225,48 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MI.eraseFromParent(); } + case ARM::VST1q8Pseudo: + ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break; + case ARM::VST1q16Pseudo: + ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break; + case ARM::VST1q32Pseudo: + ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break; + case ARM::VST1q64Pseudo: + ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break; + case ARM::VST1q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break; + case ARM::VST1q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break; + case ARM::VST1q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break; + case ARM::VST1q64Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break; + + case ARM::VST2d8Pseudo: + ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break; + case ARM::VST2d16Pseudo: + ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break; + case ARM::VST2d32Pseudo: + ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break; + case ARM::VST2q8Pseudo: + ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break; + case ARM::VST2q16Pseudo: + ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break; + case ARM::VST2q32Pseudo: + ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break; + case ARM::VST2d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break; + case ARM::VST2d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break; + case ARM::VST2d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break; + case ARM::VST2q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break; + case ARM::VST2q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break; + case ARM::VST2q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break; + case ARM::VST3d8Pseudo: ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break; case ARM::VST3d16Pseudo: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c25966a8a70..4f4f4d08b41 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1256,16 +1256,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector Ops; + SmallVector Ops; Ops.push_back(MemAddr); Ops.push_back(Align); - // FIXME: This is a temporary flag to distinguish VSTs that have been - // converted to pseudo instructions. - bool usePseudoInstrs = (NumVecs >= 3); - if (is64BitVector) { - if (NumVecs >= 2) { + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1282,124 +1280,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, : N->getOperand(3+3); RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - if (usePseudoInstrs) - Ops.push_back(RegSeq); - else { - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, - RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, - RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, - RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, - RegSeq)); - } - } else { - Ops.push_back(N->getOperand(3)); + Ops.push_back(RegSeq); } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = DOpcodes[OpcodeIndex]; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - usePseudoInstrs ? 6 : NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { - // Quad registers are directly supported for VST1 and VST2, - // storing pairs of D regs. + // Quad registers are directly supported for VST1 and VST2. unsigned Opc = QOpcodes0[OpcodeIndex]; - if (NumVecs == 2) { - // First extract the pair of Q registers. + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { + // Form a QQ register. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); - - // Form a QQ register. - SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT, - QQ)); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); - } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3))); - } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); + Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0)); } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. // Form the QQQQ REG_SEQUENCE. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3)); - V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(3+3); + SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); // Store the even D registers. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); Ops.push_back(Reg0); // post-access address offset - if (usePseudoInstrs) - Ops.push_back(RegSeq); - else - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, - RegVT, RegSeq)); + Ops.push_back(RegSeq); Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), - usePseudoInstrs ? 7 : NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStA, 1); // Store the odd D registers. Ops[0] = SDValue(VStA, 0); // MemAddr - if (usePseudoInstrs) - Ops[6] = Chain; - else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, - RegVT, RegSeq); - Ops[NumVecs+5] = Chain; - } + Ops[6] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), - usePseudoInstrs ? 7 : NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -2267,15 +2202,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, - ARM::VST1q32, ARM::VST1q64 }; + unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, + ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; return SelectVST(N, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, - ARM::VST2d32, ARM::VST1q64 }; - unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; + unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, + ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, + ARM::VST2q32Pseudo }; return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index bfd98a8c133..88d606c0f54 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -490,6 +490,12 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // Classes for VST* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. +class VSTQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">; +class VSTQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "$addr.addr = $wb">; class VSTQQPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">; class VSTQQWBPseudo @@ -520,6 +526,11 @@ def VST1q16 : VST1Q<0b0100, "16">; def VST1q32 : VST1Q<0b1000, "32">; def VST1q64 : VST1Q<0b1100, "64">; +def VST1q8Pseudo : VSTQPseudo; +def VST1q16Pseudo : VSTQPseudo; +def VST1q32Pseudo : VSTQPseudo; +def VST1q64Pseudo : VSTQPseudo; + // ...with address register writeback: class VST1DWB op7_4, string Dt> : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), @@ -540,6 +551,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">; def VST1q32_UPD : VST1QWB<0b1000, "32">; def VST1q64_UPD : VST1QWB<0b1100, "64">; +def VST1q8Pseudo_UPD : VSTQWBPseudo; +def VST1q16Pseudo_UPD : VSTQWBPseudo; +def VST1q32Pseudo_UPD : VSTQWBPseudo; +def VST1q64Pseudo_UPD : VSTQWBPseudo; + // ...with 3 registers (some of these are only for the disassembler): class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -610,6 +626,14 @@ def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; def VST2q32 : VST2Q<0b1000, "32">; +def VST2d8Pseudo : VSTQPseudo; +def VST2d16Pseudo : VSTQPseudo; +def VST2d32Pseudo : VSTQPseudo; + +def VST2q8Pseudo : VSTQQPseudo; +def VST2q16Pseudo : VSTQQPseudo; +def VST2q32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST2DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -631,6 +655,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; def VST2q32_UPD : VST2QWB<0b1000, "32">; +def VST2d8Pseudo_UPD : VSTQWBPseudo; +def VST2d16Pseudo_UPD : VSTQWBPseudo; +def VST2d32Pseudo_UPD : VSTQWBPseudo; + +def VST2q8Pseudo_UPD : VSTQQWBPseudo; +def VST2q16Pseudo_UPD : VSTQQWBPseudo; +def VST2q32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (for disassembly only): def VST2b8 : VST2D<0b1001, 0b0000, "8">; def VST2b16 : VST2D<0b1001, 0b0100, "16">; diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 254ac23db57..5142ebdf9e8 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -178,13 +178,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST1q8: - case ARM::VST1q16: - case ARM::VST1q32: - case ARM::VST1q64: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: @@ -192,13 +185,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 2; return true; - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - FirstOpnd = 2; - NumRegs = 4; - return true; - case ARM::VST2LNq16: case ARM::VST2LNq32: FirstOpnd = 2;