From eee70a1f65f5ecf89caae49f6a422ec9d104ce7d Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 17 Feb 2017 22:14:51 +0000 Subject: [PATCH] [Hexagon] Start using regmasks on calls Reapply r295371 with a fix for the Windows bot failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295504 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonBlockRanges.cpp | 63 +++++++++++++++++-- lib/Target/Hexagon/HexagonCopyToCombine.cpp | 26 ++++---- lib/Target/Hexagon/HexagonDepInstrInfo.td | 6 -- lib/Target/Hexagon/HexagonFrameLowering.cpp | 34 +++++++--- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 24 +++---- lib/Target/Hexagon/HexagonISelLowering.cpp | 51 ++++++++------- lib/Target/Hexagon/HexagonISelLowering.h | 2 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 20 ++++-- lib/Target/Hexagon/HexagonMCInstLower.cpp | 5 +- lib/Target/Hexagon/HexagonPseudo.td | 35 +++++------ lib/Target/Hexagon/HexagonRegisterInfo.cpp | 12 +++- lib/Target/Hexagon/HexagonRegisterInfo.h | 3 +- lib/Target/Hexagon/HexagonRegisterInfo.td | 17 ++--- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 57 +++++++++++++++-- lib/Target/Hexagon/HexagonVLIWPacketizer.h | 1 + lib/Target/Hexagon/RDFDeadCode.cpp | 12 +++- lib/Target/Hexagon/RDFRegisters.cpp | 15 ++++- lib/Target/Hexagon/RDFRegisters.h | 4 ++ .../Hexagon/avoid-predspill-calleesaved.ll | 1 - test/CodeGen/Hexagon/compound.ll | 4 +- 20 files changed, 273 insertions(+), 119 deletions(-) diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp index 52d1b1c65cd..04621f4f440 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -306,6 +306,8 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, LastUse[R] = LastDef[R] = IndexType::None; }; + RegisterSet Defs, Clobbers; + for (auto &In : B) { if (In.isDebugValue()) continue; @@ -324,19 +326,68 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, closeRange(S); } } - // Process defs. + // Process defs and clobbers. + Defs.clear(); + Clobbers.clear(); for (auto &Op : In.operands()) { if (!Op.isReg() || !Op.isDef() || Op.isUndef()) continue; RegisterRef R = { Op.getReg(), Op.getSubReg() }; - if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) - continue; for (auto S : expandToSubRegs(R, MRI, TRI)) { - if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) - closeRange(S); - LastDef[S] = Index; + if (TargetRegisterInfo::isPhysicalRegister(S.Reg) && Reserved[S.Reg]) + continue; + if (Op.isDead()) + Clobbers.insert(S); + else + Defs.insert(S); } } + + for (auto &Op : In.operands()) { + if (!Op.isRegMask()) + continue; + const uint32_t *BM = Op.getRegMask(); + for (unsigned PR = 1, N = TRI.getNumRegs(); PR != N; ++PR) { + // Skip registers that have subregisters. A register is preserved + // iff its bit is set in the regmask, so if R1:0 was preserved, both + // R1 and R0 would also be present. + if (MCSubRegIterator(PR, &TRI, false).isValid()) + continue; + if (Reserved[PR]) + continue; + if (BM[PR/32] & (1u << (PR%32))) + continue; + RegisterRef R = { PR, 0 }; + if (!Defs.count(R)) + Clobbers.insert(R); + } + } +#ifndef NDEBUG + for (RegisterRef R : Defs) + assert(!Clobbers.count(R)); + for (RegisterRef R : Clobbers) + assert(!Defs.count(R)); +#endif + // Update maps for defs. + for (RegisterRef S : Defs) { + // Defs should already be expanded into subregs. + assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) || + !MCSubRegIterator(S.Reg, &TRI, false).isValid()); + if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) + closeRange(S); + LastDef[S] = Index; + } + // Update maps for clobbers. + for (RegisterRef S : Clobbers) { + // Clobbers should already be expanded into subregs. + assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) || + !MCSubRegIterator(S.Reg, &TRI, false).isValid()); + if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) + closeRange(S); + // Create a single-instruction range. + LastDef[S] = LastUse[S] = Index; + closeRange(S); + } } // Collect live-on-exit. diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 36080997ec6..5f375f8dc74 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -440,17 +440,21 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { // Put instructions that last defined integer or double registers into the // map. - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || !Op.isDef() || !Op.getReg()) - continue; - unsigned Reg = Op.getReg(); - if (Hexagon::DoubleRegsRegClass.contains(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - LastDef[*SubRegs] = &MI; - } - } else if (Hexagon::IntRegsRegClass.contains(Reg)) - LastDef[Reg] = &MI; + for (MachineOperand &Op : MI.operands()) { + if (Op.isReg()) { + if (!Op.isDef() || !Op.getReg()) + continue; + unsigned Reg = Op.getReg(); + if (Hexagon::DoubleRegsRegClass.contains(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LastDef[*SubRegs] = &MI; + } else if (Hexagon::IntRegsRegClass.contains(Reg)) + LastDef[Reg] = &MI; + } else if (Op.isRegMask()) { + for (unsigned Reg : Hexagon::IntRegsRegClass) + if (Op.clobbersPhysReg(Reg)) + LastDef[Reg] = &MI; + } } } } diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td index 4c4a2788855..650261d859c 100644 --- a/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -4665,7 +4665,6 @@ let Defs = [PC, R31]; let BaseOpcode = "J2_call"; let isPredicable = 1; let hasSideEffects = 1; -let Defs = VolatileV3.Regs; let isExtendable = 1; let opExtendable = 0; let isExtentSigned = 1; @@ -4690,7 +4689,6 @@ let Defs = [PC, R31]; let BaseOpcode = "J2_call"; let hasSideEffects = 1; let isTaken = Inst{12}; -let Defs = VolatileV3.Regs; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 1; @@ -4710,7 +4708,6 @@ let prefersSlot3 = 1; let Uses = [R29]; let Defs = [PC, R31]; let hasSideEffects = 1; -let Defs = VolatileV3.Regs; } def J2_callrf : HInst< (outs), @@ -4729,7 +4726,6 @@ let Uses = [R29]; let Defs = [PC, R31]; let hasSideEffects = 1; let isTaken = Inst{12}; -let Defs = VolatileV3.Regs; } def J2_callrt : HInst< (outs), @@ -4747,7 +4743,6 @@ let Uses = [R29]; let Defs = [PC, R31]; let hasSideEffects = 1; let isTaken = Inst{12}; -let Defs = VolatileV3.Regs; } def J2_callt : HInst< (outs), @@ -4766,7 +4761,6 @@ let Defs = [PC, R31]; let BaseOpcode = "J2_call"; let hasSideEffects = 1; let isTaken = Inst{12}; -let Defs = VolatileV3.Regs; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 1; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 55aee261a6d..0e2380f4316 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -301,16 +301,30 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, // the frame creation/destruction instructions. if (MO.isFI()) return true; - if (!MO.isReg()) - continue; - unsigned R = MO.getReg(); - // Virtual registers will need scavenging, which then may require - // a stack slot. - if (TargetRegisterInfo::isVirtualRegister(R)) - return true; - for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S) - if (CSR[*S]) + if (MO.isReg()) { + unsigned R = MO.getReg(); + // Virtual registers will need scavenging, which then may require + // a stack slot. + if (TargetRegisterInfo::isVirtualRegister(R)) return true; + for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S) + if (CSR[*S]) + return true; + continue; + } + if (MO.isRegMask()) { + // A regmask would normally have all callee-saved registers marked + // as preserved, so this check would not be needed, but in case of + // ever having other regmasks (for other calling conventions), + // make sure they would be processed correctly. + const uint32_t *BM = MO.getRegMask(); + for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) { + unsigned R = x; + // If this regmask does not preserve a CSR, a frame will be needed. + if (!(BM[R/32] & (1u << (R%32)))) + return true; + } + } } } return false; @@ -1651,7 +1665,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, // Dead defs are recorded in Clobbers, but are not automatically removed // from the live set. for (auto &C : Clobbers) - if (C.second->isDead()) + if (C.second->isReg() && C.second->isDead()) LPR.removeReg(C.first); } diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 07e05c4b18d..86a8089401c 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -100,6 +100,7 @@ namespace { MachineRegisterInfo *MRI; MachineDominatorTree *MDT; const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; #ifndef NDEBUG static int Counter; #endif @@ -381,7 +382,9 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis(); MRI = &MF.getRegInfo(); MDT = &getAnalysis(); - TII = MF.getSubtarget().getInstrInfo(); + const HexagonSubtarget &HST = MF.getSubtarget(); + TII = HST.getInstrInfo(); + TRI = HST.getRegisterInfo(); for (auto &L : *MLI) if (!L->getParentLoop()) { @@ -960,24 +963,21 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, /// \brief Return true if the operation is invalid within hardware loop. bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, bool IsInnerHWLoop) const { - // Call is not allowed because the callee may use a hardware loop except for // the case when the call never returns. if (MI->getDesc().isCall()) return !TII->doesNotReturn(*MI); // Check if the instruction defines a hardware loop register. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned R = MO.getReg(); - if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 || - R == Hexagon::LC1 || R == Hexagon::SA1)) + using namespace Hexagon; + static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 }; + static const unsigned Regs1[] = { LC1, SA1 }; + auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01)) + : makeArrayRef(Regs1, array_lengthof(Regs1)); + for (unsigned R : CheckRegs) + if (MI->modifiesRegister(R, TRI)) return true; - if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1)) - return true; - } + return false; } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index e87e1e6a7e0..e67b2e1424d 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -644,11 +644,11 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { /// LowerCallResult - Lower the result values of an ISD::CALL into the /// appropriate copies out of appropriate physical registers. This assumes that -/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// Chain/Glue are the input chain/glue to use, and that TheCall is the call /// being lowered. Returns a SDNode with the same number of values as the /// ISD::CALL. SDValue HexagonTargetLowering::LowerCallResult( - SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, + SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, const SmallVectorImpl &OutVals, SDValue Callee) const { @@ -671,21 +671,24 @@ SDValue HexagonTargetLowering::LowerCallResult( // predicate register as the call result. auto &MRI = DAG.getMachineFunction().getRegInfo(); SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), - MVT::i32, InFlag); + MVT::i32, Glue); // FR0 = (Value, Chain, Glue) unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR, FR0.getValue(0), FR0.getValue(2)); // TPR = (Chain, Glue) - RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1, - TPR.getValue(1)); + // Don't glue this CopyFromReg, because it copies from a virtual + // register. If it is glued to the call, InstrEmitter will add it + // as an implicit def to the call (EmitMachineNode). + RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1); + Glue = TPR.getValue(1); } else { RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag); + RVLocs[i].getValVT(), Glue); + Glue = RetVal.getValue(2); } InVals.push_back(RetVal.getValue(0)); Chain = RetVal.getValue(1); - InFlag = RetVal.getValue(2); } return Chain; @@ -840,16 +843,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); + SDValue Glue; if (!IsTailCall) { SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true); Chain = DAG.getCALLSEQ_START(Chain, C, dl); + Glue = Chain.getValue(1); } // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The Glue is necessary since all emitted instructions must be // stuck together. - SDValue Glue; if (!IsTailCall) { for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, @@ -902,6 +906,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); } + const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (Glue.getNode()) Ops.push_back(Glue); @@ -1571,9 +1579,10 @@ HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) SDValue HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, - GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, unsigned ReturnReg, + GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) const { - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, @@ -1585,23 +1594,21 @@ HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, // 2. Callee which in this case is the Global address value. // 3. Registers live into the call.In this case its R0, as we // have just one argument to be passed. - // 4. InFlag if there is any. + // 4. Glue. // Note: The order is important. - if (InFlag) { - SDValue Ops[] = { Chain, TGA, - DAG.getRegister(Hexagon::R0, PtrVT), *InFlag }; - Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); - } else { - SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT)}; - Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); - } + const auto &HRI = *Subtarget.getRegisterInfo(); + const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT), + DAG.getRegisterMask(Mask), Glue }; + Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); // Inform MFI that function has calls. MFI.setAdjustsStack(true); - SDValue Flag = Chain.getValue(1); - return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); + Glue = Chain.getValue(1); + return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue); } // @@ -1694,7 +1701,7 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag); InFlag = Chain.getValue(1); - return GetDynamicTLSAddr(DAG, Chain, GA, &InFlag, PtrVT, + return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT, Hexagon::R0, HexagonII::MO_GDPLT); } diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index a8ed29e585d..792234b5072 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -163,7 +163,7 @@ namespace HexagonISD { SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const; SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, - GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, + GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 01405bf6b9a..6b74c2fd618 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1413,12 +1413,22 @@ bool HexagonInstrInfo::DefinesPredicate( auto &HRI = getRegisterInfo(); for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) { MachineOperand MO = MI.getOperand(oper); - if (MO.isReg() && MO.isDef()) { + if (MO.isReg()) { + if (!MO.isDef()) + continue; const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg()); if (RC == &Hexagon::PredRegsRegClass) { Pred.push_back(MO); return true; } + continue; + } else if (MO.isRegMask()) { + for (unsigned PR : Hexagon::PredRegsRegClass) { + if (!MI.modifiesRegister(PR, &HRI)) + continue; + Pred.push_back(MO); + return true; + } } } return false; @@ -3009,10 +3019,12 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI, bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, unsigned PredReg) const { - for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) { - const MachineOperand &MO = MI.getOperand(opNum); + for (const MachineOperand &MO : MI.operands()) { + // Predicate register must be explicitly defined. + if (MO.isRegMask() && MO.clobbersPhysReg(PredReg)) + return false; if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg)) - return false; // Predicate register must be explicitly defined. + return false; } // Hexagon Programmer's Reference says that decbin, memw_locked, and diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 9d8c29463bf..7189b5a52c4 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -111,9 +111,12 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, default: MI->print(errs()); llvm_unreachable("unknown operand type"); + case MachineOperand::MO_RegisterMask: + continue; case MachineOperand::MO_Register: // Ignore all implicit register operands. - if (MO.isImplicit()) continue; + if (MO.isImplicit()) + continue; MCO = MCOperand::createReg(MO.getReg()); break; case MachineOperand::MO_FPImmediate: { diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td index 1ef31c55cc6..5a720e79456 100644 --- a/lib/Target/Hexagon/HexagonPseudo.td +++ b/lib/Target/Hexagon/HexagonPseudo.td @@ -141,13 +141,12 @@ defm J2_loop1 : LOOP_ri<"loop1">; let isCall = 1, hasSideEffects = 1, isPredicable = 0, isExtended = 0, isExtendable = 1, opExtendable = 0, isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in -class T_Call +class T_Call : JInst<(outs), (ins a30_2Imm:$dst), "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> { let BaseOpcode = "call"; bits<24> dst; - let Defs = !if (CSR, VolatileV3.Regs, []); let IClass = 0b0101; let Inst{27-25} = 0b101; let Inst{24-16,13-1} = dst{23-2}; @@ -156,11 +155,11 @@ class T_Call let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [R16], isPredicable = 0 in -def CALLProfile : T_Call<1, "">; +def CALLProfile : T_Call<"">; let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [PC, R31, R6, R7, P0] in -def PS_call_stk : T_Call<0, "">; +def PS_call_stk : T_Call<"">; let isCall = 1, hasSideEffects = 1, cofMax1 = 1 in class JUMPR_MISC_CALLR; // Call, no return. } -let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, +let isCall = 1, hasSideEffects = 1, isExtended = 0, isExtendable = 1, opExtendable = 0, isCodeGenOnly = 1, BaseOpcode = "PS_call_nr", isExtentSigned = 1, opExtentAlign = 2, Itinerary = J_tc_2early_SLOT23 in @@ -375,43 +374,43 @@ let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, // Restore registers and dealloc frame before a tail call. let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<0, "">, PredRel; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; let isExtended = 1, opExtendable = 0 in - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<0, "">, PredRel; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel; let Defs = [R14, R15, R28, R29, R30, R31, PC] in { - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC : T_Call<0, "">, PredRel; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC : T_Call<"">, PredRel; let isExtended = 1, opExtendable = 0 in - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC : T_Call<0, "">, PredRel; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC : T_Call<"">, PredRel; } } // Save registers function call. let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { - def SAVE_REGISTERS_CALL_V4 : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; let isExtended = 1, opExtendable = 0 in - def SAVE_REGISTERS_CALL_V4_EXT : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel; let Defs = [P0] in - def SAVE_REGISTERS_CALL_V4STK : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4STK : T_Call<"">, PredRel; let Defs = [P0], isExtended = 1, opExtendable = 0 in - def SAVE_REGISTERS_CALL_V4STK_EXT : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4STK_EXT : T_Call<"">, PredRel; let Defs = [R14, R15, R28] in - def SAVE_REGISTERS_CALL_V4_PIC : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4_PIC : T_Call<"">, PredRel; let Defs = [R14, R15, R28], isExtended = 1, opExtendable = 0 in - def SAVE_REGISTERS_CALL_V4_EXT_PIC : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4_EXT_PIC : T_Call<"">, PredRel; let Defs = [R14, R15, R28, P0] in - def SAVE_REGISTERS_CALL_V4STK_PIC : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4STK_PIC : T_Call<"">, PredRel; let Defs = [R14, R15, R28, P0], isExtended = 1, opExtendable = 0 in - def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<0, "">, PredRel; + def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<"">, PredRel; } // Vector load/store pseudos diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 42568db4a13..a6af751d5f5 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -36,6 +36,9 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#define GET_REGINFO_TARGET_DESC +#include "HexagonGenRegisterInfo.inc" + using namespace llvm; HexagonRegisterInfo::HexagonRegisterInfo() @@ -134,6 +137,12 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } +const uint32_t *HexagonRegisterInfo::getCallPreservedMask( + const MachineFunction &MF, CallingConv::ID) const { + return HexagonCSR_RegMask; +} + + BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); @@ -284,6 +293,3 @@ unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const { return Hexagon::R6; } - -#define GET_REGINFO_TARGET_DESC -#include "HexagonGenRegisterInfo.inc" diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 1fb295b5bd8..8a3f175b848 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -35,7 +35,8 @@ public: /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; - + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index 5f813aafe58..a7b371fabbd 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -301,16 +301,7 @@ def V62Regs : RegisterClass<"Hexagon", [i32], 32, PKTCOUNTLO, PKTCOUNTHI, PKTCOUNT, UTIMERLO, UTIMERHI, UTIMER)>; -def VolatileV3 { - list Regs = [D0, D1, D2, D3, D4, D5, D6, D7, - R28, R31, - P0, P1, P2, P3, - M0, M1, - LC0, LC1, SA0, SA1, USR, USR_OVF, CS0, CS1, - V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, - V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, - V22, V23, V24, V25, V26, V27, V28, V29, V30, V31, - W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, - W12, W13, W14, W15, - Q0, Q1, Q2, Q3]; -} + +def HexagonCSR + : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23, + R24, R25, R26, R27)>; diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 6a79ec956a5..1c904f2b7cd 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -720,6 +720,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, // %R9 = ZXTH %R12, %D6, %R12 // S2_storerh_io %R8, 2, %R12; mem:ST2[%scevgep343] for (auto &MO : PacketMI.operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(DepReg)) + return false; if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) continue; unsigned R = MO.getReg(); @@ -759,9 +761,12 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI, } static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) { - for (auto &MO : I.operands()) + for (auto &MO : I.operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(DepReg)) + return true; if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) return true; + } return false; } @@ -1173,6 +1178,36 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I, (J.isBranch() || J.isCall() || J.isBarrier()); } +bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I, + const MachineInstr &J) { + // Adding I to a packet that has J. + + // Regmasks are not reflected in the scheduling dependency graph, so + // we need to check them manually. This code assumes that regmasks only + // occur on calls, and the problematic case is when we add an instruction + // defining a register R to a packet that has a call that clobbers R via + // a regmask. Those cannot be packetized together, because the call will + // be executed last. That's also a reson why it is ok to add a call + // clobbering R to a packet that defines R. + + // Look for regmasks in J. + for (const MachineOperand &OpJ : J.operands()) { + if (!OpJ.isRegMask()) + continue; + assert((J.isCall() || HII->isTailCall(J)) && "Regmask on a non-call"); + for (const MachineOperand &OpI : I.operands()) { + if (OpI.isReg()) { + if (OpJ.clobbersPhysReg(OpI.getReg())) + return true; + } else if (OpI.isRegMask()) { + // Both are regmasks. Assume that they intersect. + return true; + } + } + } + return false; +} + bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J) { bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); @@ -1219,6 +1254,14 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { if (Dependence) return false; + // Regmasks are not accounted for in the scheduling graph, so we need + // to explicitly check for dependencies caused by them. They should only + // appear on calls, so it's not too pessimistic to reject all regmask + // dependencies. + Dependence = hasRegMaskDependence(I, J); + if (Dependence) + return false; + // V4 allows dual stores. It does not allow second store, if the first // store is not in SLOT0. New value store, new value jump, dealloc_return // and memop always take SLOT0. Arch spec 3.4.4.2. @@ -1467,13 +1510,19 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // R0 = ... ; SUI // Those cannot be packetized together, since the call will observe // the effect of the assignment to R0. - if (DepType == SDep::Anti && J.isCall()) { + if ((DepType == SDep::Anti || DepType == SDep::Output) && J.isCall()) { // Check if I defines any volatile register. We should also check // registers that the call may read, but these happen to be a // subset of the volatile register set. - for (const MCPhysReg *P = J.getDesc().ImplicitDefs; P && *P; ++P) { - if (!I.modifiesRegister(*P, HRI)) + for (const MachineOperand &Op : I.operands()) { + if (Op.isReg() && Op.isDef()) { + unsigned R = Op.getReg(); + if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI)) + continue; + } else if (!Op.isRegMask()) { + // If I has a regmask assume dependency. continue; + } FoundSequentialDependence = true; break; } diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 6d708722979..3f28dc5b79c 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -112,6 +112,7 @@ protected: void reserveResourcesForConstExt(); bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J); bool hasControlDependence(const MachineInstr &I, const MachineInstr &J); + bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J); bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J); bool producesStall(const MachineInstr &MI); }; diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 63177d51cad..9aa8ad68e07 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -62,9 +62,19 @@ bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const { return true; if (MI->isPHI()) return false; - for (auto &Op : MI->operands()) + for (auto &Op : MI->operands()) { if (Op.isReg() && MRI.isReserved(Op.getReg())) return true; + if (Op.isRegMask()) { + const uint32_t *BM = Op.getRegMask(); + for (unsigned R = 0, RN = DFG.getTRI().getNumRegs(); R != RN; ++R) { + if (BM[R/32] & (1u << (R%32))) + continue; + if (MRI.isReserved(R)) + return true; + } + } + } return false; } diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp index 74d6ba53be7..e1589f02efe 100644 --- a/lib/Target/Hexagon/RDFRegisters.cpp +++ b/lib/Target/Hexagon/RDFRegisters.cpp @@ -70,18 +70,27 @@ std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { assert(isRegMaskId(Reg) || TargetRegisterInfo::isPhysicalRegister(Reg)); if (isRegMaskId(Reg)) { // XXX SLOW - // XXX Add other regmasks to the set. const uint32_t *MB = getRegMaskBits(Reg); for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) { if (MB[i/32] & (1u << (i%32))) continue; AS.insert(i); } + for (const uint32_t *RM : RegMasks) { + RegisterId MI = getRegMaskId(RM); + if (MI != Reg && aliasMM(RegisterRef(Reg), RegisterRef(MI))) + AS.insert(MI); + } return AS; } for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI) AS.insert(*AI); + for (const uint32_t *RM : RegMasks) { + RegisterId MI = getRegMaskId(RM); + if (aliasRM(RegisterRef(Reg), RegisterRef(MI))) + AS.insert(MI); + } return AS; } @@ -153,10 +162,10 @@ bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const { // is a superset of the lane mask from the register class, check the regmask // bit directly. if (RR.Mask == LaneBitmask::getAll()) - return Preserved; + return !Preserved; const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass; if (RC != nullptr && (RR.Mask & RC->LaneMask) == RC->LaneMask) - return Preserved; + return !Preserved; // Otherwise, check all subregisters whose lane mask overlaps the given // mask. For each such register, if it is preserved by the regmask, then diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h index e344f479e4c..76e929d8b83 100644 --- a/lib/Target/Hexagon/RDFRegisters.h +++ b/lib/Target/Hexagon/RDFRegisters.h @@ -51,6 +51,10 @@ namespace rdf { return F - Map.begin() + 1; } + typedef typename std::vector::const_iterator const_iterator; + const_iterator begin() const { return Map.begin(); } + const_iterator end() const { return Map.end(); } + private: std::vector Map; }; diff --git a/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll b/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll index 561013b174d..906a877b91e 100644 --- a/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll +++ b/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll @@ -7,7 +7,6 @@ ; without adding an extra spill of that register. ; ; CHECK: PredSpill: -; CHECK: memd(r29{{.*}}) = r17:16 ; CHECK-DAG: r{{[0-9]+}} = p0 ; CHECK-DAG: p0 = r{{[0-9]+}} ; CHECK-NOT: = memw(r29 diff --git a/test/CodeGen/Hexagon/compound.ll b/test/CodeGen/Hexagon/compound.ll index f8d36b8b77d..a3bd52f9719 100644 --- a/test/CodeGen/Hexagon/compound.ll +++ b/test/CodeGen/Hexagon/compound.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s +; RUN: llc -march=hexagon -filetype=obj -ifcvt-limit=0 -o - %s | llvm-objdump -d - | FileCheck %s ; CHECK: p0 = cmp.gt(r0,#-1); if (!p0.new) jump:nt @@ -14,4 +14,4 @@ ret void y: call void @b() ret void -} \ No newline at end of file +}