//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // The purpose of this pass is to employ a canonical code transformation so // that code compiled with slightly different IR passes can be diffed more // effectively than otherwise. This is done by renaming vregs in a given // LiveRange in a canonical way. This pass also does a pseudo-scheduling to // move defs closer to their use inorder to reduce diffs caused by slightly // different schedules. // // Basic Usage: // // llc -o - -run-pass mir-canonicalizer example.mir // // Reorders instructions canonically. // Renames virtual register operands canonically. // Strips certain MIR artifacts (optionally). // //===----------------------------------------------------------------------===// #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include using namespace llvm; namespace llvm { extern char &MIRCanonicalizerID; } // namespace llvm #define DEBUG_TYPE "mir-canonicalizer" static cl::opt CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("Function number to canonicalize.")); static cl::opt CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("BasicBlock number to canonicalize.")); namespace { class MIRCanonicalizer : public MachineFunctionPass { public: static char ID; MIRCanonicalizer() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Rename register operands in a canonical ordering."; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; class TypedVReg { VRType type; unsigned reg; public: TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} TypedVReg(VRType type) : type(type), reg(~0U) { assert(type != RSE_Reg && "Expected a non-register type."); } bool isReg() const { return type == RSE_Reg; } bool isFrameIndex() const { return type == RSE_FrameIndex; } bool isCandidate() const { return type == RSE_NewCandidate; } VRType getType() const { return type; } unsigned getReg() const { assert(this->isReg() && "Expected a virtual or physical register."); return reg; } }; char MIRCanonicalizer::ID; char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", "Rename Register Operands Canonically", false, false); INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", "Rename Register Operands Canonically", false, false); static std::vector GetRPOList(MachineFunction &MF) { ReversePostOrderTraversal RPOT(&*MF.begin()); std::vector RPOList; for (auto MBB : RPOT) { RPOList.push_back(MBB); } return RPOList; } // Set a dummy vreg. We use this vregs register class to generate throw-away // vregs that are used to skip vreg numbers so that vreg numbers line up. static unsigned GetDummyVReg(const MachineFunction &MF) { for (auto &MBB : MF) { for (auto &MI : MBB) { for (auto &MO : MI.operands()) { if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; return MO.getReg(); } } } return ~0U; } static bool rescheduleCanonically(MachineBasicBlock *MBB) { bool Changed = false; // Calculates the distance of MI from the begining of its parent BB. auto getInstrIdx = [](const MachineInstr &MI) { unsigned i = 0; for (auto &CurMI : *MI.getParent()) { if (&CurMI == &MI) return i; i++; } return ~0U; }; // Pre-Populate vector of instructions to reschedule so that we don't // clobber the iterator. std::vector Instructions; for (auto &MI : *MBB) { Instructions.push_back(&MI); } for (auto *II : Instructions) { if (II->getNumOperands() == 0) continue; MachineOperand &MO = II->getOperand(0); if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); MachineInstr *Def = II; unsigned Distance = ~0U; MachineInstr *UseToBringDefCloserTo = nullptr; MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { MachineInstr *UseInst = UO.getParent(); const unsigned DefLoc = getInstrIdx(*Def); const unsigned UseLoc = getInstrIdx(*UseInst); const unsigned Delta = (UseLoc - DefLoc); if (UseInst->getParent() != Def->getParent()) continue; if (DefLoc >= UseLoc) continue; if (Delta < Distance) { Distance = Delta; UseToBringDefCloserTo = UseInst; } } const auto BBE = MBB->instr_end(); MachineBasicBlock::iterator DefI = BBE; MachineBasicBlock::iterator UseI = BBE; for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { if (DefI != BBE && UseI != BBE) break; if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) continue; if (&*BBI == Def) { DefI = BBI; continue; } if (&*BBI == UseToBringDefCloserTo) { UseI = BBI; continue; } } if (DefI == BBE || UseI == BBE) continue; DEBUG({ dbgs() << "Splicing "; DefI->dump(); dbgs() << " right before: "; UseI->dump(); }); Changed = true; MBB->splice(UseI, MBB, DefI); } return Changed; } /// Here we find our candidates. What makes an interesting candidate? /// An candidate for a canonicalization tree root is normally any kind of /// instruction that causes side effects such as a store to memory or a copy to /// a physical register or a return instruction. We use these as an expression /// tree root that we walk inorder to build a canonical walk which should result /// in canoncal vreg renaming. static std::vector populateCandidates(MachineBasicBlock *MBB) { std::vector Candidates; MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { MachineInstr *MI = &*II; bool DoesMISideEffect = false; if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { const unsigned Dst = MI->getOperand(0).getReg(); DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { if (DoesMISideEffect) break; DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); } } if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) continue; DEBUG(dbgs() << "Found Candidate: "; MI->dump();); Candidates.push_back(MI); } return Candidates; } void doCandidateWalk(std::vector &VRegs, std::queue &RegQueue, std::vector &VisitedMIs, const MachineBasicBlock *MBB) { const MachineFunction &MF = *MBB->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); while (!RegQueue.empty()) { auto TReg = RegQueue.front(); RegQueue.pop(); if (TReg.isFrameIndex()) { DEBUG(dbgs() << "Popping frame index.\n";); VRegs.push_back(TypedVReg(RSE_FrameIndex)); continue; } assert(TReg.isReg() && "Expected vreg or physreg."); unsigned Reg = TReg.getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { DEBUG({ dbgs() << "Popping vreg "; MRI.def_begin(Reg)->dump(); dbgs() << "\n"; }); if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { return TR.isReg() && TR.getReg() == Reg; })) { VRegs.push_back(TypedVReg(Reg)); } } else { DEBUG(dbgs() << "Popping physreg.\n";); VRegs.push_back(TypedVReg(Reg)); continue; } for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { MachineInstr *Def = RI->getParent(); if (Def->getParent() != MBB) continue; if (llvm::any_of(VisitedMIs, [&](const MachineInstr *VMI) { return Def == VMI; })) { break; } DEBUG({ dbgs() << "\n========================\n"; dbgs() << "Visited MI: "; Def->dump(); dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; dbgs() << "\n========================\n"; }); VisitedMIs.push_back(Def); for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { MachineOperand &MO = Def->getOperand(I); if (MO.isFI()) { DEBUG(dbgs() << "Pushing frame index.\n";); RegQueue.push(TypedVReg(RSE_FrameIndex)); } if (!MO.isReg()) continue; RegQueue.push(TypedVReg(MO.getReg())); } } } } // TODO: Work to remove this in the future. One day when we have named vregs // we should be able to form the canonical name based on some characteristic // we see in that point of the expression tree (like if we were to name based // on some sort of value numbering scheme). static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, const TargetRegisterClass *RC) { const unsigned VR_GAP = (++VRegGapIndex * 1000); DEBUG({ dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " << VR_GAP << "\n"; }); unsigned I = MRI.createVirtualRegister(RC); const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; while (I != E) { I = MRI.createVirtualRegister(RC); } } static std::map GetVRegRenameMap(const std::vector &VRegs, const std::vector &renamedInOtherBB, MachineRegisterInfo &MRI, const TargetRegisterClass *RC) { std::map VRegRenameMap; unsigned LastRenameReg = MRI.createVirtualRegister(RC); bool FirstCandidate = true; for (auto &vreg : VRegs) { if (vreg.isFrameIndex()) { // We skip one vreg for any frame index because there is a good chance // (especially when comparing SelectionDAG to GlobalISel generated MIR) // that in the other file we are just getting an incoming vreg that comes // from a copy from a frame index. So it's safe to skip by one. LastRenameReg = MRI.createVirtualRegister(RC); DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); continue; } else if (vreg.isCandidate()) { // After the first candidate, for every subsequent candidate, we skip mod // 10 registers so that the candidates are more likely to start at the // same vreg number making it more likely that the canonical walk from the // candidate insruction. We don't need to skip from the first candidate of // the BasicBlock because we already skip ahead several vregs for each BB. while (LastRenameReg % 10) { if (!FirstCandidate) break; LastRenameReg = MRI.createVirtualRegister(RC); DEBUG({ dbgs() << "Skipping rename for new candidate " << LastRenameReg << "\n"; }); } FirstCandidate = false; continue; } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { LastRenameReg = MRI.createVirtualRegister(RC); DEBUG({ dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; }); continue; } auto Reg = vreg.getReg(); if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); continue; } auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); LastRenameReg = Rename; if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { DEBUG(dbgs() << "Mapping vreg ";); if (MRI.reg_begin(Reg) != MRI.reg_end()) { DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); } else { DEBUG(dbgs() << Reg;); } DEBUG(dbgs() << " to ";); if (MRI.reg_begin(Rename) != MRI.reg_end()) { DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); } else { DEBUG(dbgs() << Rename;); } DEBUG(dbgs() << "\n";); VRegRenameMap.insert(std::pair(Reg, Rename)); } } return VRegRenameMap; } static bool doVRegRenaming(std::vector &RenamedInOtherBB, const std::map &VRegRenameMap, MachineRegisterInfo &MRI) { bool Changed = false; for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { auto VReg = I->first; auto Rename = I->second; RenamedInOtherBB.push_back(Rename); std::vector RenameMOs; for (auto &MO : MRI.reg_operands(VReg)) { RenameMOs.push_back(&MO); } for (auto *MO : RenameMOs) { Changed = true; MO->setReg(Rename); if (!MO->isDef()) MO->setIsKill(false); } } return Changed; } static bool doDefKillClear(MachineBasicBlock *MBB) { bool Changed = false; for (auto &MI : *MBB) { for (auto &MO : MI.operands()) { if (!MO.isReg()) continue; if (!MO.isDef() && MO.isKill()) { Changed = true; MO.setIsKill(false); } if (MO.isDef() && MO.isDead()) { Changed = true; MO.setIsDead(false); } } } return Changed; } static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector &bbNames, std::vector &renamedInOtherBB, unsigned &basicBlockNum, unsigned &VRegGapIndex) { if (CanonicalizeBasicBlockNumber != ~0U) { if (CanonicalizeBasicBlockNumber != basicBlockNum++) return false; DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); } if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { DEBUG({ dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() << "\n"; }); return false; } DEBUG({ dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; dbgs() << "\n\n================================================\n\n"; }); bool Changed = false; MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); const unsigned DummyVReg = GetDummyVReg(MF); const TargetRegisterClass *DummyRC = (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); if (!DummyRC) return false; bbNames.push_back(MBB->getName()); DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); Changed |= rescheduleCanonically(MBB); DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); std::vector Candidates = populateCandidates(MBB); std::vector VisitedMIs; std::copy(Candidates.begin(), Candidates.end(), std::back_inserter(VisitedMIs)); std::vector VRegs; for (auto candidate : Candidates) { VRegs.push_back(TypedVReg(RSE_NewCandidate)); std::queue RegQueue; // Here we walk the vreg operands of a non-root node along our walk. // The root nodes are the original candidates (stores normally). // These are normally not the root nodes (except for the case of copies to // physical registers). for (unsigned i = 1; i < candidate->getNumOperands(); i++) { if (candidate->mayStore() || candidate->isBranch()) break; MachineOperand &MO = candidate->getOperand(i); if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) continue; DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); RegQueue.push(TypedVReg(MO.getReg())); } // Here we walk the root candidates. We start from the 0th operand because // the root is normally a store to a vreg. for (unsigned i = 0; i < candidate->getNumOperands(); i++) { if (!candidate->mayStore() && !candidate->isBranch()) break; MachineOperand &MO = candidate->getOperand(i); // TODO: Do we want to only add vregs here? if (!MO.isReg() && !MO.isFI()) continue; DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : TypedVReg(RSE_FrameIndex)); } doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); } // If we have populated no vregs to rename then bail. // The rest of this function does the vreg remaping. if (VRegs.size() == 0) return Changed; // Skip some vregs, so we can recon where we'll land next. SkipVRegs(VRegGapIndex, MRI, DummyRC); auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); Changed |= doDefKillClear(MBB); DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); DEBUG(dbgs() << "\n\n================================================\n\n"); return Changed; } bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { static unsigned functionNum = 0; if (CanonicalizeFunctionNumber != ~0U) { if (CanonicalizeFunctionNumber != functionNum++) return false; DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); } // we need a valid vreg to create a vreg type for skipping all those // stray vreg numbers so reach alignment/canonical vreg values. std::vector RPOList = GetRPOList(MF); DEBUG( dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; dbgs() << "\n\n================================================\n\n"; dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; for (auto MBB : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() << "\n\n================================================\n\n"; ); std::vector BBNames; std::vector RenamedInOtherBB; unsigned GapIdx = 0; unsigned BBNum = 0; bool Changed = false; for (auto MBB : RPOList) Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); return Changed; }