From 35da384515ca7cbfb6571934d91c71f974dfd47e Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Sat, 31 Jan 2015 07:49:41 +0000 Subject: [PATCH] Change more of the guts of CodeGenRegister's RegUnit tracking to be based on bit vectors. This is a continuation of my prior work to move some of the inner workings for CodeGenRegister to use bit vectors when computing about register units. This is highly beneficial to TableGen runtime on targets with large, dense register files. This patch represents a ~40% runtime reduction over and above my earlier improvement on a stress test of this case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227678 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/TableGen/CodeGenRegisters.cpp | 83 +++++++++----------------- utils/TableGen/CodeGenRegisters.h | 15 ++--- utils/TableGen/RegisterInfoEmitter.cpp | 16 ++--- 3 files changed, 45 insertions(+), 69 deletions(-) diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index 1cf260d0ea1..850540ba5f4 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" @@ -109,7 +108,6 @@ CodeGenRegister::CodeGenRegister(Record *R, unsigned Enum) EnumValue(Enum), CostPerUse(R->getValueAsInt("CostPerUse")), CoveredBySubRegs(R->getValueAsBit("CoveredBySubRegs")), - NumNativeRegUnits(0), SubRegsComplete(false), SuperRegsComplete(false), TopoSig(~0u) @@ -155,7 +153,7 @@ namespace { // Iterate over all register units in a set of registers. class RegUnitIterator { CodeGenRegister::Set::const_iterator RegI, RegE; - CodeGenRegister::RegUnitList::const_iterator UnitI, UnitE; + CodeGenRegister::RegUnitList::iterator UnitI, UnitE; public: RegUnitIterator(const CodeGenRegister::Set &Regs): @@ -193,44 +191,23 @@ protected: }; } // namespace -// Merge two RegUnitLists maintaining the order and removing duplicates. -// Overwrites MergedRU in the process. -static void mergeRegUnits(CodeGenRegister::RegUnitList &MergedRU, - const CodeGenRegister::RegUnitList &RRU) { - CodeGenRegister::RegUnitList LRU = MergedRU; - MergedRU.clear(); - std::set_union(LRU.begin(), LRU.end(), RRU.begin(), RRU.end(), - std::back_inserter(MergedRU)); -} - // Return true of this unit appears in RegUnits. static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) { - return std::count(RegUnits.begin(), RegUnits.end(), Unit); + return RegUnits.test(Unit); } // Inherit register units from subregisters. // Return true if the RegUnits changed. bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) { - unsigned OldNumUnits = RegUnits.size(); - - SparseBitVector<> NewUnits; - for (unsigned RU : RegUnits) - NewUnits.set(RU); - + bool changed = false; for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end(); I != E; ++I) { CodeGenRegister *SR = I->second; // Merge the subregister's units into this register's RegUnits. - for (unsigned RU : SR->RegUnits) - NewUnits.set(RU); + changed |= (RegUnits |= SR->RegUnits); } - RegUnits.clear(); - RegUnits.reserve(NewUnits.count()); - for (unsigned RU : NewUnits) - RegUnits.push_back(RU); - - return OldNumUnits != RegUnits.size(); + return changed; } const CodeGenRegister::SubRegMap & @@ -376,14 +353,8 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { // sub-registers, the other registers won't contribute any more units. for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { CodeGenRegister *SR = ExplicitSubRegs[i]; - // Explicit sub-registers are usually disjoint, so this is a good way of - // computing the union. We may pick up a few duplicates that will be - // eliminated below. - unsigned N = RegUnits.size(); - RegUnits.append(SR->RegUnits.begin(), SR->RegUnits.end()); - std::inplace_merge(RegUnits.begin(), RegUnits.begin() + N, RegUnits.end()); + RegUnits |= SR->RegUnits; } - RegUnits.erase(std::unique(RegUnits.begin(), RegUnits.end()), RegUnits.end()); // Absent any ad hoc aliasing, we create one register unit per leaf register. // These units correspond to the maximal cliques in the register overlap @@ -402,19 +373,19 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { // Create a RegUnit representing this alias edge, and add it to both // registers. unsigned Unit = RegBank.newRegUnit(this, AR); - RegUnits.push_back(Unit); - AR->RegUnits.push_back(Unit); + RegUnits.set(Unit); + AR->RegUnits.set(Unit); } // Finally, create units for leaf registers without ad hoc aliases. Note that // a leaf register with ad hoc aliases doesn't get its own unit - it isn't // necessary. This means the aliasing leaf registers can share a single unit. if (RegUnits.empty()) - RegUnits.push_back(RegBank.newRegUnit(this)); + RegUnits.set(RegBank.newRegUnit(this)); // We have now computed the native register units. More may be adopted later // for balancing purposes. - NumNativeRegUnits = RegUnits.size(); + NativeRegUnits = RegUnits; return SubRegs; } @@ -548,7 +519,7 @@ CodeGenRegister::addSubRegsPreOrder(SetVector &OSet, // Get the sum of this register's unit weights. unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank) const { unsigned Weight = 0; - for (RegUnitList::const_iterator I = RegUnits.begin(), E = RegUnits.end(); + for (RegUnitList::iterator I = RegUnits.begin(), E = RegUnits.end(); I != E; ++I) { Weight += RegBank.getRegUnit(*I).Weight; } @@ -1424,9 +1395,10 @@ static void computeUberWeights(std::vector &UberSets, // Find singular determinants. for (CodeGenRegister::Set::iterator RegI = I->Regs.begin(), RegE = I->Regs.end(); RegI != RegE; ++RegI) { - if ((*RegI)->getRegUnits().size() == 1 - && (*RegI)->getWeight(RegBank) == I->Weight) - mergeRegUnits(I->SingularDeterminants, (*RegI)->getRegUnits()); + if ((*RegI)->getRegUnits().count() == 1 + && (*RegI)->getWeight(RegBank) == I->Weight) { + I->SingularDeterminants |= (*RegI)->getRegUnits(); + } } } } @@ -1444,13 +1416,14 @@ static void computeUberWeights(std::vector &UberSets, static bool normalizeWeight(CodeGenRegister *Reg, std::vector &UberSets, std::vector &RegSets, - std::set &NormalRegs, + SparseBitVector<> &NormalRegs, CodeGenRegister::RegUnitList &NormalUnits, CodeGenRegBank &RegBank) { - bool Changed = false; - if (!NormalRegs.insert(Reg->EnumValue).second) - return Changed; + if (NormalRegs.test(Reg->EnumValue)) + return false; + NormalRegs.set(Reg->EnumValue); + bool Changed = false; const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs(); for (CodeGenRegister::SubRegMap::const_iterator SRI = SRM.begin(), SRE = SRM.end(); SRI != SRE; ++SRI) { @@ -1474,8 +1447,8 @@ static bool normalizeWeight(CodeGenRegister *Reg, // A register unit's weight can be adjusted only if it is the singular unit // for this register, has not been used to normalize a subregister's set, // and has not already been used to singularly determine this UberRegSet. - unsigned AdjustUnit = Reg->getRegUnits().front(); - if (Reg->getRegUnits().size() != 1 + unsigned AdjustUnit = *Reg->getRegUnits().begin(); + if (Reg->getRegUnits().count() != 1 || hasRegUnit(NormalUnits, AdjustUnit) || hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) { // We don't have an adjustable unit, so adopt a new one. @@ -1493,7 +1466,7 @@ static bool normalizeWeight(CodeGenRegister *Reg, } // Mark these units normalized so superregisters can't change their weights. - mergeRegUnits(NormalUnits, Reg->getRegUnits()); + NormalUnits |= Reg->getRegUnits(); return Changed; } @@ -1518,7 +1491,7 @@ void CodeGenRegBank::computeRegUnitWeights() { Changed = false; for (auto &Reg : Registers) { CodeGenRegister::RegUnitList NormalUnits; - std::set NormalRegs; + SparseBitVector<> NormalRegs; Changed |= normalizeWeight(&Reg, UberSets, RegSets, NormalRegs, NormalUnits, *this); } @@ -1781,7 +1754,7 @@ void CodeGenRegBank::computeRegUnitLaneMasks() { for (auto &Register : Registers) { // Create an initial lane mask for all register units. const auto &RegUnits = Register.getRegUnits(); - CodeGenRegister::RegUnitLaneMaskList RegUnitLaneMasks(RegUnits.size(), 0); + CodeGenRegister::RegUnitLaneMaskList RegUnitLaneMasks(RegUnits.count(), 0); // Iterate through SubRegisters. typedef CodeGenRegister::SubRegMap SubRegMap; const SubRegMap &SubRegs = Register.getSubRegs(); @@ -1798,12 +1771,14 @@ void CodeGenRegBank::computeRegUnitLaneMasks() { // Distribute LaneMask to Register Units touched. for (const auto &SUI : SubRegister->getRegUnits()) { bool Found = false; - for (size_t u = 0, ue = RegUnits.size(); u < ue; ++u) { - if (SUI == RegUnits[u]) { + unsigned u = 0; + for (unsigned RU : RegUnits) { + if (SUI == RU) { RegUnitLaneMasks[u] |= LaneMask; assert(!Found); Found = true; } + ++u; } assert(Found); } diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h index 87a0dcf7af3..3fbf6e162c4 100644 --- a/utils/TableGen/CodeGenRegisters.h +++ b/utils/TableGen/CodeGenRegisters.h @@ -19,6 +19,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Record.h" @@ -197,23 +198,23 @@ namespace llvm { } // List of register units in ascending order. - typedef SmallVector RegUnitList; + typedef SparseBitVector<> RegUnitList; typedef SmallVector RegUnitLaneMaskList; // How many entries in RegUnitList are native? - unsigned NumNativeRegUnits; + RegUnitList NativeRegUnits; // Get the list of register units. // This is only valid after computeSubRegs() completes. const RegUnitList &getRegUnits() const { return RegUnits; } ArrayRef getRegUnitLaneMasks() const { - return makeArrayRef(RegUnitLaneMasks).slice(0, NumNativeRegUnits); + return makeArrayRef(RegUnitLaneMasks).slice(0, NativeRegUnits.count()); } // Get the native register units. This is a prefix of getRegUnits(). - ArrayRef getNativeRegUnits() const { - return makeArrayRef(RegUnits).slice(0, NumNativeRegUnits); + RegUnitList getNativeRegUnits() const { + return NativeRegUnits; } void setRegUnitLaneMasks(const RegUnitLaneMaskList &LaneMasks) { @@ -225,8 +226,8 @@ namespace llvm { bool inheritRegUnits(CodeGenRegBank &RegBank); // Adopt a register unit for pressure tracking. - // A unit is adopted iff its unit number is >= NumNativeRegUnits. - void adoptRegUnit(unsigned RUID) { RegUnits.push_back(RUID); } + // A unit is adopted iff its unit number is >= NativeRegUnits.count(). + void adoptRegUnit(unsigned RUID) { RegUnits.set(RUID); } // Get the sum of this register's register unit weights. unsigned getWeight(const CodeGenRegBank &RegBank) const; diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp index 1c3de4a2c2b..115f1efbd2d 100644 --- a/utils/TableGen/RegisterInfoEmitter.cpp +++ b/utils/TableGen/RegisterInfoEmitter.cpp @@ -573,11 +573,11 @@ typedef SmallVector MaskVec; // Differentially encode a sequence of numbers into V. The starting value and // terminating 0 are not added to V, so it will have the same size as List. static -DiffVec &diffEncode(DiffVec &V, unsigned InitVal, ArrayRef List) { +DiffVec &diffEncode(DiffVec &V, unsigned InitVal, SparseBitVector<> List) { assert(V.empty() && "Clear DiffVec before diffEncode."); uint16_t Val = uint16_t(InitVal); - for (unsigned i = 0; i != List.size(); ++i) { - uint16_t Cur = List[i]; + + for (uint16_t Cur : List) { V.push_back(Cur - Val); Val = Cur; } @@ -856,13 +856,13 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, // // Check the neighboring registers for arithmetic progressions. unsigned ScaleA = ~0u, ScaleB = ~0u; - ArrayRef RUs = Reg.getNativeRegUnits(); + SparseBitVector<> RUs = Reg.getNativeRegUnits(); if (I != Regs.begin() && - std::prev(I)->getNativeRegUnits().size() == RUs.size()) - ScaleB = RUs.front() - std::prev(I)->getNativeRegUnits().front(); + std::prev(I)->getNativeRegUnits().count() == RUs.count()) + ScaleB = *RUs.begin() - *std::prev(I)->getNativeRegUnits().begin(); if (std::next(I) != Regs.end() && - std::next(I)->getNativeRegUnits().size() == RUs.size()) - ScaleA = std::next(I)->getNativeRegUnits().front() - RUs.front(); + std::next(I)->getNativeRegUnits().count() == RUs.count()) + ScaleA = *std::next(I)->getNativeRegUnits().begin() - *RUs.begin(); unsigned Scale = std::min(ScaleB, ScaleA); // Default the scale to 0 if it can't be encoded in 4 bits. if (Scale >= 16)