mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-08 21:10:35 +00:00
Fix PR23384 (part 2 of 3) NFC
Summary: The patch moves LSR cost comparison to target part. Reviewers: qcolombet Differential Revision: http://reviews.llvm.org/D30561 From: Evgeny Stupachenko <evstupac@gmail.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304750 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
83def7c691
commit
17e210d01a
@ -267,6 +267,19 @@ public:
|
||||
/// incurs significant execution cost.
|
||||
bool isLoweredToCall(const Function *F) const;
|
||||
|
||||
struct LSRCost {
|
||||
/// TODO: Some of these could be merged. Also, a lexical ordering
|
||||
/// isn't always optimal.
|
||||
unsigned Insns;
|
||||
unsigned NumRegs;
|
||||
unsigned AddRecCost;
|
||||
unsigned NumIVMuls;
|
||||
unsigned NumBaseAdds;
|
||||
unsigned ImmCost;
|
||||
unsigned SetupCost;
|
||||
unsigned ScaleCost;
|
||||
};
|
||||
|
||||
/// Parameters that control the generic loop unrolling transformation.
|
||||
struct UnrollingPreferences {
|
||||
/// The cost threshold for the unrolled loop. Should be relative to the
|
||||
@ -385,6 +398,10 @@ public:
|
||||
bool HasBaseReg, int64_t Scale,
|
||||
unsigned AddrSpace = 0) const;
|
||||
|
||||
/// \brief Return true if LSR cost of C1 is lower than C1.
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) const;
|
||||
|
||||
/// \brief Return true if the target supports masked load/store
|
||||
/// AVX2 and AVX-512 targets allow masks for consecutive load and store
|
||||
bool isLegalMaskedStore(Type *DataType) const;
|
||||
@ -809,6 +826,8 @@ public:
|
||||
int64_t BaseOffset, bool HasBaseReg,
|
||||
int64_t Scale,
|
||||
unsigned AddrSpace) = 0;
|
||||
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) = 0;
|
||||
virtual bool isLegalMaskedStore(Type *DataType) = 0;
|
||||
virtual bool isLegalMaskedLoad(Type *DataType) = 0;
|
||||
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
|
||||
@ -996,6 +1015,10 @@ public:
|
||||
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
|
||||
Scale, AddrSpace);
|
||||
}
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) override {
|
||||
return Impl.isLSRCostLess(C1, C2);
|
||||
}
|
||||
bool isLegalMaskedStore(Type *DataType) override {
|
||||
return Impl.isLegalMaskedStore(DataType);
|
||||
}
|
||||
|
@ -229,6 +229,13 @@ public:
|
||||
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
|
||||
}
|
||||
|
||||
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
|
||||
return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
|
||||
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
|
||||
std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
|
||||
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
|
||||
}
|
||||
|
||||
bool isLegalMaskedStore(Type *DataType) { return false; }
|
||||
|
||||
bool isLegalMaskedLoad(Type *DataType) { return false; }
|
||||
|
@ -117,6 +117,10 @@ public:
|
||||
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
|
||||
}
|
||||
|
||||
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
|
||||
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
|
||||
}
|
||||
|
||||
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
|
||||
TargetLoweringBase::AddrMode AM;
|
||||
|
@ -133,6 +133,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
|
||||
Scale, AddrSpace);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
|
||||
return TTIImpl->isLSRCostLess(C1, C2);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
|
||||
return TTIImpl->isLegalMaskedStore(DataType);
|
||||
}
|
||||
|
@ -950,39 +950,37 @@ namespace {
|
||||
|
||||
/// This class is used to measure and compare candidate formulae.
|
||||
class Cost {
|
||||
/// TODO: Some of these could be merged. Also, a lexical ordering
|
||||
/// isn't always optimal.
|
||||
unsigned Insns;
|
||||
unsigned NumRegs;
|
||||
unsigned AddRecCost;
|
||||
unsigned NumIVMuls;
|
||||
unsigned NumBaseAdds;
|
||||
unsigned ImmCost;
|
||||
unsigned SetupCost;
|
||||
unsigned ScaleCost;
|
||||
TargetTransformInfo::LSRCost C;
|
||||
|
||||
public:
|
||||
Cost()
|
||||
: Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0),
|
||||
ImmCost(0), SetupCost(0), ScaleCost(0) {}
|
||||
Cost() {
|
||||
C.Insns = 0;
|
||||
C.NumRegs = 0;
|
||||
C.AddRecCost = 0;
|
||||
C.NumIVMuls = 0;
|
||||
C.NumBaseAdds = 0;
|
||||
C.ImmCost = 0;
|
||||
C.SetupCost = 0;
|
||||
C.ScaleCost = 0;
|
||||
}
|
||||
|
||||
bool operator<(const Cost &Other) const;
|
||||
bool isLess(Cost &Other, const TargetTransformInfo &TTI);
|
||||
|
||||
void Lose();
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Once any of the metrics loses, they must all remain losers.
|
||||
bool isValid() {
|
||||
return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
|
||||
| ImmCost | SetupCost | ScaleCost) != ~0u)
|
||||
|| ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
|
||||
& ImmCost & SetupCost & ScaleCost) == ~0u);
|
||||
return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
|
||||
| C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
|
||||
|| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
|
||||
& C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool isLoser() {
|
||||
assert(isValid() && "invalid cost");
|
||||
return NumRegs == ~0u;
|
||||
return C.NumRegs == ~0u;
|
||||
}
|
||||
|
||||
void RateFormula(const TargetTransformInfo &TTI,
|
||||
@ -1170,10 +1168,10 @@ void Cost::RateRegister(const SCEV *Reg,
|
||||
}
|
||||
|
||||
// Otherwise, it will be an invariant with respect to Loop L.
|
||||
++NumRegs;
|
||||
++C.NumRegs;
|
||||
return;
|
||||
}
|
||||
AddRecCost += 1; /// TODO: This should be a function of the stride.
|
||||
C.AddRecCost += 1; /// TODO: This should be a function of the stride.
|
||||
|
||||
// Add the step value register, if it needs one.
|
||||
// TODO: The non-affine case isn't precisely modeled here.
|
||||
@ -1185,7 +1183,7 @@ void Cost::RateRegister(const SCEV *Reg,
|
||||
}
|
||||
}
|
||||
}
|
||||
++NumRegs;
|
||||
++C.NumRegs;
|
||||
|
||||
// Rough heuristic; favor registers which don't require extra setup
|
||||
// instructions in the preheader.
|
||||
@ -1194,9 +1192,9 @@ void Cost::RateRegister(const SCEV *Reg,
|
||||
!(isa<SCEVAddRecExpr>(Reg) &&
|
||||
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
|
||||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
|
||||
++SetupCost;
|
||||
++C.SetupCost;
|
||||
|
||||
NumIVMuls += isa<SCEVMulExpr>(Reg) &&
|
||||
C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
|
||||
SE.hasComputableLoopEvolution(Reg, L);
|
||||
}
|
||||
|
||||
@ -1229,9 +1227,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
||||
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
|
||||
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
|
||||
// Tally up the registers.
|
||||
unsigned PrevAddRecCost = AddRecCost;
|
||||
unsigned PrevNumRegs = NumRegs;
|
||||
unsigned PrevNumBaseAdds = NumBaseAdds;
|
||||
unsigned PrevAddRecCost = C.AddRecCost;
|
||||
unsigned PrevNumRegs = C.NumRegs;
|
||||
unsigned PrevNumBaseAdds = C.NumBaseAdds;
|
||||
if (const SCEV *ScaledReg = F.ScaledReg) {
|
||||
if (VisitedRegs.count(ScaledReg)) {
|
||||
Lose();
|
||||
@ -1256,28 +1254,28 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
||||
if (NumBaseParts > 1)
|
||||
// Do not count the base and a possible second register if the target
|
||||
// allows to fold 2 registers.
|
||||
NumBaseAdds +=
|
||||
C.NumBaseAdds +=
|
||||
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
|
||||
NumBaseAdds += (F.UnfoldedOffset != 0);
|
||||
C.NumBaseAdds += (F.UnfoldedOffset != 0);
|
||||
|
||||
// Accumulate non-free scaling amounts.
|
||||
ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
|
||||
C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
|
||||
|
||||
// Tally up the non-zero immediates.
|
||||
for (const LSRFixup &Fixup : LU.Fixups) {
|
||||
int64_t O = Fixup.Offset;
|
||||
int64_t Offset = (uint64_t)O + F.BaseOffset;
|
||||
if (F.BaseGV)
|
||||
ImmCost += 64; // Handle symbolic values conservatively.
|
||||
C.ImmCost += 64; // Handle symbolic values conservatively.
|
||||
// TODO: This should probably be the pointer size.
|
||||
else if (Offset != 0)
|
||||
ImmCost += APInt(64, Offset, true).getMinSignedBits();
|
||||
C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
|
||||
|
||||
// Check with target if this offset with this instruction is
|
||||
// specifically not supported.
|
||||
if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
|
||||
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
|
||||
NumBaseAdds++;
|
||||
C.NumBaseAdds++;
|
||||
}
|
||||
|
||||
// If we don't count instruction cost exit here.
|
||||
@ -1289,13 +1287,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
||||
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
|
||||
// additional instruction (at least fill).
|
||||
unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
|
||||
if (NumRegs > TTIRegNum) {
|
||||
if (C.NumRegs > TTIRegNum) {
|
||||
// Cost already exceeded TTIRegNum, then only newly added register can add
|
||||
// new instructions.
|
||||
if (PrevNumRegs > TTIRegNum)
|
||||
Insns += (NumRegs - PrevNumRegs);
|
||||
C.Insns += (C.NumRegs - PrevNumRegs);
|
||||
else
|
||||
Insns += (NumRegs - TTIRegNum);
|
||||
C.Insns += (C.NumRegs - TTIRegNum);
|
||||
}
|
||||
|
||||
// If ICmpZero formula ends with not 0, it could not be replaced by
|
||||
@ -1308,56 +1306,54 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
||||
// For {-10, +, 1}:
|
||||
// i = i + 1;
|
||||
if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
|
||||
Insns++;
|
||||
C.Insns++;
|
||||
// Each new AddRec adds 1 instruction to calculation.
|
||||
Insns += (AddRecCost - PrevAddRecCost);
|
||||
C.Insns += (C.AddRecCost - PrevAddRecCost);
|
||||
|
||||
// BaseAdds adds instructions for unfolded registers.
|
||||
if (LU.Kind != LSRUse::ICmpZero)
|
||||
Insns += NumBaseAdds - PrevNumBaseAdds;
|
||||
C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
|
||||
assert(isValid() && "invalid cost");
|
||||
}
|
||||
|
||||
/// Set this cost to a losing value.
|
||||
void Cost::Lose() {
|
||||
Insns = ~0u;
|
||||
NumRegs = ~0u;
|
||||
AddRecCost = ~0u;
|
||||
NumIVMuls = ~0u;
|
||||
NumBaseAdds = ~0u;
|
||||
ImmCost = ~0u;
|
||||
SetupCost = ~0u;
|
||||
ScaleCost = ~0u;
|
||||
C.Insns = ~0u;
|
||||
C.NumRegs = ~0u;
|
||||
C.AddRecCost = ~0u;
|
||||
C.NumIVMuls = ~0u;
|
||||
C.NumBaseAdds = ~0u;
|
||||
C.ImmCost = ~0u;
|
||||
C.SetupCost = ~0u;
|
||||
C.ScaleCost = ~0u;
|
||||
}
|
||||
|
||||
/// Choose the lower cost.
|
||||
bool Cost::operator<(const Cost &Other) const {
|
||||
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && Insns != Other.Insns)
|
||||
return Insns < Other.Insns;
|
||||
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
|
||||
ImmCost, SetupCost) <
|
||||
std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
|
||||
Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
|
||||
Other.SetupCost);
|
||||
bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
|
||||
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
|
||||
C.Insns != Other.C.Insns)
|
||||
return C.Insns < Other.C.Insns;
|
||||
return TTI.isLSRCostLess(C, Other.C);
|
||||
}
|
||||
|
||||
void Cost::print(raw_ostream &OS) const {
|
||||
if (InsnsCost)
|
||||
OS << Insns << " instruction" << (Insns == 1 ? " " : "s ");
|
||||
OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
|
||||
if (AddRecCost != 0)
|
||||
OS << ", with addrec cost " << AddRecCost;
|
||||
if (NumIVMuls != 0)
|
||||
OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
|
||||
if (NumBaseAdds != 0)
|
||||
OS << ", plus " << NumBaseAdds << " base add"
|
||||
<< (NumBaseAdds == 1 ? "" : "s");
|
||||
if (ScaleCost != 0)
|
||||
OS << ", plus " << ScaleCost << " scale cost";
|
||||
if (ImmCost != 0)
|
||||
OS << ", plus " << ImmCost << " imm cost";
|
||||
if (SetupCost != 0)
|
||||
OS << ", plus " << SetupCost << " setup cost";
|
||||
OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
|
||||
OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
|
||||
if (C.AddRecCost != 0)
|
||||
OS << ", with addrec cost " << C.AddRecCost;
|
||||
if (C.NumIVMuls != 0)
|
||||
OS << ", plus " << C.NumIVMuls << " IV mul"
|
||||
<< (C.NumIVMuls == 1 ? "" : "s");
|
||||
if (C.NumBaseAdds != 0)
|
||||
OS << ", plus " << C.NumBaseAdds << " base add"
|
||||
<< (C.NumBaseAdds == 1 ? "" : "s");
|
||||
if (C.ScaleCost != 0)
|
||||
OS << ", plus " << C.ScaleCost << " scale cost";
|
||||
if (C.ImmCost != 0)
|
||||
OS << ", plus " << C.ImmCost << " imm cost";
|
||||
if (C.SetupCost != 0)
|
||||
OS << ", plus " << C.SetupCost << " setup cost";
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
@ -4112,7 +4108,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
|
||||
Cost CostBest;
|
||||
Regs.clear();
|
||||
CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
|
||||
if (CostF < CostBest)
|
||||
if (CostF.isLess(CostBest, TTI))
|
||||
std::swap(F, Best);
|
||||
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
|
||||
dbgs() << "\n"
|
||||
@ -4580,7 +4576,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
|
||||
NewCost = CurCost;
|
||||
NewRegs = CurRegs;
|
||||
NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
|
||||
if (NewCost < SolutionCost) {
|
||||
if (NewCost.isLess(SolutionCost, TTI)) {
|
||||
Workspace.push_back(&F);
|
||||
if (Workspace.size() != Uses.size()) {
|
||||
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
|
||||
|
Loading…
Reference in New Issue
Block a user