Fix PR23384 (part 2 of 3) NFC

Summary:
The patch moves LSR cost comparison to target part.

Reviewers: qcolombet

Differential Revision: http://reviews.llvm.org/D30561

From: Evgeny Stupachenko <evstupac@gmail.com>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304750 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evgeny Stupachenko 2017-06-05 23:37:00 +00:00
parent 83def7c691
commit 17e210d01a
5 changed files with 106 additions and 72 deletions

View File

@ -267,6 +267,19 @@ public:
/// incurs significant execution cost.
bool isLoweredToCall(const Function *F) const;
struct LSRCost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
unsigned Insns;
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
unsigned ScaleCost;
};
/// Parameters that control the generic loop unrolling transformation.
struct UnrollingPreferences {
/// The cost threshold for the unrolled loop. Should be relative to the
@ -385,6 +398,10 @@ public:
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0) const;
/// \brief Return true if LSR cost of C1 is lower than C1.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) const;
/// \brief Return true if the target supports masked load/store
/// AVX2 and AVX-512 targets allow masks for consecutive load and store
bool isLegalMaskedStore(Type *DataType) const;
@ -809,6 +826,8 @@ public:
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;
virtual bool isLegalMaskedLoad(Type *DataType) = 0;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
@ -996,6 +1015,10 @@ public:
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace);
}
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) override {
return Impl.isLSRCostLess(C1, C2);
}
bool isLegalMaskedStore(Type *DataType) override {
return Impl.isLegalMaskedStore(DataType);
}

View File

@ -229,6 +229,13 @@ public:
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
}
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
bool isLegalMaskedStore(Type *DataType) { return false; }
bool isLegalMaskedLoad(Type *DataType) { return false; }

View File

@ -117,6 +117,10 @@ public:
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
}
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
TargetLoweringBase::AddrMode AM;

View File

@ -133,6 +133,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
Scale, AddrSpace);
}
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
return TTIImpl->isLSRCostLess(C1, C2);
}
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
return TTIImpl->isLegalMaskedStore(DataType);
}

View File

@ -950,39 +950,37 @@ namespace {
/// This class is used to measure and compare candidate formulae.
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
unsigned Insns;
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
unsigned ScaleCost;
TargetTransformInfo::LSRCost C;
public:
Cost()
: Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0),
ImmCost(0), SetupCost(0), ScaleCost(0) {}
Cost() {
C.Insns = 0;
C.NumRegs = 0;
C.AddRecCost = 0;
C.NumIVMuls = 0;
C.NumBaseAdds = 0;
C.ImmCost = 0;
C.SetupCost = 0;
C.ScaleCost = 0;
}
bool operator<(const Cost &Other) const;
bool isLess(Cost &Other, const TargetTransformInfo &TTI);
void Lose();
#ifndef NDEBUG
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
| ImmCost | SetupCost | ScaleCost) != ~0u)
|| ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
& ImmCost & SetupCost & ScaleCost) == ~0u);
return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
| C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
|| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
& C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
}
#endif
bool isLoser() {
assert(isValid() && "invalid cost");
return NumRegs == ~0u;
return C.NumRegs == ~0u;
}
void RateFormula(const TargetTransformInfo &TTI,
@ -1170,10 +1168,10 @@ void Cost::RateRegister(const SCEV *Reg,
}
// Otherwise, it will be an invariant with respect to Loop L.
++NumRegs;
++C.NumRegs;
return;
}
AddRecCost += 1; /// TODO: This should be a function of the stride.
C.AddRecCost += 1; /// TODO: This should be a function of the stride.
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
@ -1185,7 +1183,7 @@ void Cost::RateRegister(const SCEV *Reg,
}
}
}
++NumRegs;
++C.NumRegs;
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
@ -1194,9 +1192,9 @@ void Cost::RateRegister(const SCEV *Reg,
!(isa<SCEVAddRecExpr>(Reg) &&
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
++C.SetupCost;
NumIVMuls += isa<SCEVMulExpr>(Reg) &&
C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE.hasComputableLoopEvolution(Reg, L);
}
@ -1229,9 +1227,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
// Tally up the registers.
unsigned PrevAddRecCost = AddRecCost;
unsigned PrevNumRegs = NumRegs;
unsigned PrevNumBaseAdds = NumBaseAdds;
unsigned PrevAddRecCost = C.AddRecCost;
unsigned PrevNumRegs = C.NumRegs;
unsigned PrevNumBaseAdds = C.NumBaseAdds;
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Lose();
@ -1256,28 +1254,28 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
NumBaseAdds +=
C.NumBaseAdds +=
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
NumBaseAdds += (F.UnfoldedOffset != 0);
C.NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
// Tally up the non-zero immediates.
for (const LSRFixup &Fixup : LU.Fixups) {
int64_t O = Fixup.Offset;
int64_t Offset = (uint64_t)O + F.BaseOffset;
if (F.BaseGV)
ImmCost += 64; // Handle symbolic values conservatively.
C.ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
// Check with target if this offset with this instruction is
// specifically not supported.
if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
NumBaseAdds++;
C.NumBaseAdds++;
}
// If we don't count instruction cost exit here.
@ -1289,13 +1287,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
// additional instruction (at least fill).
unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
if (NumRegs > TTIRegNum) {
if (C.NumRegs > TTIRegNum) {
// Cost already exceeded TTIRegNum, then only newly added register can add
// new instructions.
if (PrevNumRegs > TTIRegNum)
Insns += (NumRegs - PrevNumRegs);
C.Insns += (C.NumRegs - PrevNumRegs);
else
Insns += (NumRegs - TTIRegNum);
C.Insns += (C.NumRegs - TTIRegNum);
}
// If ICmpZero formula ends with not 0, it could not be replaced by
@ -1308,56 +1306,54 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// For {-10, +, 1}:
// i = i + 1;
if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
Insns++;
C.Insns++;
// Each new AddRec adds 1 instruction to calculation.
Insns += (AddRecCost - PrevAddRecCost);
C.Insns += (C.AddRecCost - PrevAddRecCost);
// BaseAdds adds instructions for unfolded registers.
if (LU.Kind != LSRUse::ICmpZero)
Insns += NumBaseAdds - PrevNumBaseAdds;
C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
assert(isValid() && "invalid cost");
}
/// Set this cost to a losing value.
void Cost::Lose() {
Insns = ~0u;
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
ScaleCost = ~0u;
C.Insns = ~0u;
C.NumRegs = ~0u;
C.AddRecCost = ~0u;
C.NumIVMuls = ~0u;
C.NumBaseAdds = ~0u;
C.ImmCost = ~0u;
C.SetupCost = ~0u;
C.ScaleCost = ~0u;
}
/// Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && Insns != Other.Insns)
return Insns < Other.Insns;
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
ImmCost, SetupCost) <
std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
Other.SetupCost);
bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
C.Insns != Other.C.Insns)
return C.Insns < Other.C.Insns;
return TTI.isLSRCostLess(C, Other.C);
}
void Cost::print(raw_ostream &OS) const {
if (InsnsCost)
OS << Insns << " instruction" << (Insns == 1 ? " " : "s ");
OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
if (AddRecCost != 0)
OS << ", with addrec cost " << AddRecCost;
if (NumIVMuls != 0)
OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
if (ScaleCost != 0)
OS << ", plus " << ScaleCost << " scale cost";
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
OS << ", plus " << SetupCost << " setup cost";
OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
if (C.AddRecCost != 0)
OS << ", with addrec cost " << C.AddRecCost;
if (C.NumIVMuls != 0)
OS << ", plus " << C.NumIVMuls << " IV mul"
<< (C.NumIVMuls == 1 ? "" : "s");
if (C.NumBaseAdds != 0)
OS << ", plus " << C.NumBaseAdds << " base add"
<< (C.NumBaseAdds == 1 ? "" : "s");
if (C.ScaleCost != 0)
OS << ", plus " << C.ScaleCost << " scale cost";
if (C.ImmCost != 0)
OS << ", plus " << C.ImmCost << " imm cost";
if (C.SetupCost != 0)
OS << ", plus " << C.SetupCost << " setup cost";
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@ -4112,7 +4108,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Cost CostBest;
Regs.clear();
CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
if (CostF < CostBest)
if (CostF.isLess(CostBest, TTI))
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
@ -4580,7 +4576,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
NewCost = CurCost;
NewRegs = CurRegs;
NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
if (NewCost < SolutionCost) {
if (NewCost.isLess(SolutionCost, TTI)) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,