mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-11 23:16:20 +00:00
[LSR] Canonicalize reg1 + ... + regN into reg1 + ... + 1*regN.
This commit introduces a canonical representation for the formulae. Basically, as soon as a formula has more that one base register, the scaled register field is used for one of them. The register put into the scaled register is preferably a loop variant. The commit refactors how the formulae are built in order to produce such representation. This yields a more accurate, but still perfectible, cost model. <rdar://problem/16731508> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209230 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8598e290c3
commit
50d4008b47
@ -238,7 +238,15 @@ struct Formula {
|
||||
int64_t Scale;
|
||||
|
||||
/// BaseRegs - The list of "base" registers for this use. When this is
|
||||
/// non-empty,
|
||||
/// non-empty. The canonical representation of a formula is
|
||||
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
|
||||
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
|
||||
/// #1 enforces that the scaled register is always used when at least two
|
||||
/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
|
||||
/// #2 enforces that 1 * reg is reg.
|
||||
/// This invariant can be temporarly broken while building a formula.
|
||||
/// However, every formula inserted into the LSRInstance must be in canonical
|
||||
/// form.
|
||||
SmallVector<const SCEV *, 4> BaseRegs;
|
||||
|
||||
/// ScaledReg - The 'scaled' register for this use. This should be non-null
|
||||
@ -256,6 +264,12 @@ struct Formula {
|
||||
|
||||
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
|
||||
|
||||
bool isCanonical() const;
|
||||
|
||||
void Canonicalize();
|
||||
|
||||
bool Unscale();
|
||||
|
||||
size_t getNumRegs() const;
|
||||
Type *getType() const;
|
||||
|
||||
@ -346,6 +360,52 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
|
||||
BaseRegs.push_back(Sum);
|
||||
HasBaseReg = true;
|
||||
}
|
||||
Canonicalize();
|
||||
}
|
||||
|
||||
/// \brief Check whether or not this formula statisfies the canonical
|
||||
/// representation.
|
||||
/// \see Formula::BaseRegs.
|
||||
bool Formula::isCanonical() const {
|
||||
if (ScaledReg)
|
||||
return Scale != 1 || !BaseRegs.empty();
|
||||
return BaseRegs.size() <= 1;
|
||||
}
|
||||
|
||||
/// \brief Helper method to morph a formula into its canonical representation.
|
||||
/// \see Formula::BaseRegs.
|
||||
/// Every formula having more than one base register, must use the ScaledReg
|
||||
/// field. Otherwise, we would have to do special cases everywhere in LSR
|
||||
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
|
||||
/// On the other hand, 1*reg should be canonicalized into reg.
|
||||
void Formula::Canonicalize() {
|
||||
if (isCanonical())
|
||||
return;
|
||||
// So far we did not need this case. This is easy to implement but it is
|
||||
// useless to maintain dead code. Beside it could hurt compile time.
|
||||
assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
|
||||
// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
|
||||
ScaledReg = BaseRegs.back();
|
||||
BaseRegs.pop_back();
|
||||
Scale = 1;
|
||||
size_t BaseRegsSize = BaseRegs.size();
|
||||
size_t Try = 0;
|
||||
// If ScaledReg is an invariant, try to find a variant expression.
|
||||
while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
|
||||
std::swap(ScaledReg, BaseRegs[Try++]);
|
||||
}
|
||||
|
||||
/// \brief Get rid of the scale in the formula.
|
||||
/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
|
||||
/// \return true if it was possible to get rid of the scale, false otherwise.
|
||||
/// \note After this operation the formula may not be in the canonical form.
|
||||
bool Formula::Unscale() {
|
||||
if (Scale != 1)
|
||||
return false;
|
||||
Scale = 0;
|
||||
BaseRegs.push_back(ScaledReg);
|
||||
ScaledReg = nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// getNumRegs - Return the total number of register operands used by this
|
||||
@ -776,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
|
||||
namespace {
|
||||
class LSRUse;
|
||||
}
|
||||
// Check if it is legal to fold 2 base registers.
|
||||
static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
|
||||
const Formula &F);
|
||||
|
||||
/// \brief Check if the addressing mode defined by \p F is completely
|
||||
/// folded in \p LU at isel time.
|
||||
/// This includes address-mode folding and special icmp tricks.
|
||||
/// This function returns true if \p LU can accommodate what \p F
|
||||
/// defines and up to 1 base + 1 scaled + offset.
|
||||
/// In other words, if \p F has several base registers, this function may
|
||||
/// still return true. Therefore, users still need to account for
|
||||
/// additional base registers and/or unfolded offsets to derive an
|
||||
/// accurate cost model.
|
||||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
const LSRUse &LU, const Formula &F);
|
||||
// Get the cost of the scaling factor used in F for LU.
|
||||
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
|
||||
const LSRUse &LU, const Formula &F);
|
||||
@ -922,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
||||
ScalarEvolution &SE, DominatorTree &DT,
|
||||
const LSRUse &LU,
|
||||
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
|
||||
assert(F.isCanonical() && "Cost is accurate only for canonical formula");
|
||||
// Tally up the registers.
|
||||
if (const SCEV *ScaledReg = F.ScaledReg) {
|
||||
if (VisitedRegs.count(ScaledReg)) {
|
||||
@ -945,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
||||
}
|
||||
|
||||
// Determine how many (unfolded) adds we'll need inside the loop.
|
||||
size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
|
||||
size_t NumBaseParts = F.getNumRegs();
|
||||
if (NumBaseParts > 1)
|
||||
// Do not count the base and a possible second register if the target
|
||||
// allows to fold 2 registers.
|
||||
NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
|
||||
NumBaseAdds +=
|
||||
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
|
||||
NumBaseAdds += (F.UnfoldedOffset != 0);
|
||||
|
||||
// Accumulate non-free scaling amounts.
|
||||
ScaleCost += getScalingFactorCost(TTI, LU, F);
|
||||
@ -1210,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
|
||||
|
||||
/// InsertFormula - If the given formula has not yet been inserted, add it to
|
||||
/// the list, and return true. Return false otherwise.
|
||||
/// The formula must be in canonical form.
|
||||
bool LSRUse::InsertFormula(const Formula &F) {
|
||||
assert(F.isCanonical() && "Invalid canonical representation");
|
||||
|
||||
if (!Formulae.empty() && RigidFormula)
|
||||
return false;
|
||||
|
||||
@ -1236,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
|
||||
|
||||
// Record registers now being used by this use.
|
||||
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
|
||||
if (F.ScaledReg)
|
||||
Regs.insert(F.ScaledReg);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1302,12 +1379,10 @@ void LSRUse::dump() const {
|
||||
}
|
||||
#endif
|
||||
|
||||
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
|
||||
/// be completely folded into the user instruction at isel time. This includes
|
||||
/// address-mode folding and special icmp tricks.
|
||||
static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
|
||||
Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale) {
|
||||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
LSRUse::KindType Kind, Type *AccessTy,
|
||||
GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale) {
|
||||
switch (Kind) {
|
||||
case LSRUse::Address:
|
||||
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
|
||||
@ -1358,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
|
||||
llvm_unreachable("Invalid LSRUse Kind!");
|
||||
}
|
||||
|
||||
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
||||
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
|
||||
GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
|
||||
int64_t Scale) {
|
||||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
int64_t MinOffset, int64_t MaxOffset,
|
||||
LSRUse::KindType Kind, Type *AccessTy,
|
||||
GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale) {
|
||||
// Check for overflow.
|
||||
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
|
||||
(MinOffset > 0))
|
||||
@ -1372,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
||||
return false;
|
||||
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
|
||||
|
||||
return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
|
||||
Scale) &&
|
||||
isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
|
||||
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
|
||||
HasBaseReg, Scale) &&
|
||||
isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
|
||||
HasBaseReg, Scale);
|
||||
}
|
||||
|
||||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
int64_t MinOffset, int64_t MaxOffset,
|
||||
LSRUse::KindType Kind, Type *AccessTy,
|
||||
const Formula &F) {
|
||||
// For the purpose of isAMCompletelyFolded either having a canonical formula
|
||||
// or a scale not equal to zero is correct.
|
||||
// Problems may arise from non canonical formulae having a scale == 0.
|
||||
// Strictly speaking it would best to just rely on canonical formulae.
|
||||
// However, when we generate the scaled formulae, we first check that the
|
||||
// scaling factor is profitable before computing the actual ScaledReg for
|
||||
// compile time sake.
|
||||
assert((F.isCanonical() || F.Scale != 0));
|
||||
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
|
||||
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
|
||||
}
|
||||
|
||||
/// isLegalUse - Test whether we know how to expand the current formula.
|
||||
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
||||
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
|
||||
GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
|
||||
int64_t Scale) {
|
||||
// We know how to expand completely foldable formulae.
|
||||
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
|
||||
BaseOffset, HasBaseReg, Scale) ||
|
||||
// Or formulae that use a base register produced by a sum of base
|
||||
// registers.
|
||||
(Scale == 1 &&
|
||||
isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
|
||||
BaseGV, BaseOffset, true, 0));
|
||||
}
|
||||
|
||||
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
||||
@ -1384,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
||||
F.BaseOffset, F.HasBaseReg, F.Scale);
|
||||
}
|
||||
|
||||
static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
|
||||
const Formula &F) {
|
||||
// If F is used as an Addressing Mode, it may fold one Base plus one
|
||||
// scaled register. If the scaled register is nil, do as if another
|
||||
// element of the base regs is a 1-scaled register.
|
||||
// This is possible if BaseRegs has at least 2 registers.
|
||||
|
||||
// If this is not an address calculation, this is not an addressing mode
|
||||
// use.
|
||||
if (LU.Kind != LSRUse::Address)
|
||||
return false;
|
||||
|
||||
// F is already scaled.
|
||||
if (F.Scale != 0)
|
||||
return false;
|
||||
|
||||
// We need to keep one register for the base and one to scale.
|
||||
if (F.BaseRegs.size() < 2)
|
||||
return false;
|
||||
|
||||
return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
|
||||
F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
|
||||
}
|
||||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
const LSRUse &LU, const Formula &F) {
|
||||
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
|
||||
F.Scale);
|
||||
}
|
||||
|
||||
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
|
||||
const LSRUse &LU, const Formula &F) {
|
||||
if (!F.Scale)
|
||||
return 0;
|
||||
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, F) && "Illegal formula in use.");
|
||||
|
||||
// If the use is not completely folded in that instruction, we will have to
|
||||
// pay an extra cost only for scale != 1.
|
||||
if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, F))
|
||||
return F.Scale != 1;
|
||||
|
||||
switch (LU.Kind) {
|
||||
case LSRUse::Address: {
|
||||
@ -1432,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
|
||||
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
|
||||
}
|
||||
case LSRUse::ICmpZero:
|
||||
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
|
||||
// Therefore, return 0 in case F.Scale == -1.
|
||||
return F.Scale != -1;
|
||||
|
||||
case LSRUse::Basic:
|
||||
case LSRUse::Special:
|
||||
// The use is completely folded, i.e., everything is folded into the
|
||||
// instruction.
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1462,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
|
||||
HasBaseReg = true;
|
||||
}
|
||||
|
||||
return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
|
||||
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
|
||||
HasBaseReg, Scale);
|
||||
}
|
||||
|
||||
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
|
||||
@ -1487,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
|
||||
// base and a scale.
|
||||
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
|
||||
|
||||
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
|
||||
BaseOffset, HasBaseReg, Scale);
|
||||
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
|
||||
BaseOffset, HasBaseReg, Scale);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@ -1644,8 +1738,19 @@ class LSRInstance {
|
||||
|
||||
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
|
||||
unsigned Depth = 0);
|
||||
|
||||
void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
|
||||
const Formula &Base, unsigned Depth,
|
||||
size_t Idx, bool IsScaledReg = false);
|
||||
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
|
||||
void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
|
||||
const Formula &Base, size_t Idx,
|
||||
bool IsScaledReg = false);
|
||||
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
|
||||
void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
|
||||
const Formula &Base,
|
||||
const SmallVectorImpl<int64_t> &Worklist,
|
||||
size_t Idx, bool IsScaledReg = false);
|
||||
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
|
||||
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
|
||||
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
|
||||
@ -2148,24 +2253,26 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
|
||||
// the uses will have all its uses outside the loop, for example.
|
||||
if (LU.Kind != Kind)
|
||||
return false;
|
||||
// Conservatively assume HasBaseReg is true for now.
|
||||
if (NewOffset < LU.MinOffset) {
|
||||
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
|
||||
LU.MaxOffset - NewOffset, HasBaseReg))
|
||||
return false;
|
||||
NewMinOffset = NewOffset;
|
||||
} else if (NewOffset > LU.MaxOffset) {
|
||||
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
|
||||
NewOffset - LU.MinOffset, HasBaseReg))
|
||||
return false;
|
||||
NewMaxOffset = NewOffset;
|
||||
}
|
||||
|
||||
// Check for a mismatched access type, and fall back conservatively as needed.
|
||||
// TODO: Be less conservative when the type is similar and can use the same
|
||||
// addressing modes.
|
||||
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
|
||||
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
|
||||
|
||||
// Conservatively assume HasBaseReg is true for now.
|
||||
if (NewOffset < LU.MinOffset) {
|
||||
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
|
||||
LU.MaxOffset - NewOffset, HasBaseReg))
|
||||
return false;
|
||||
NewMinOffset = NewOffset;
|
||||
} else if (NewOffset > LU.MaxOffset) {
|
||||
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
|
||||
NewOffset - LU.MinOffset, HasBaseReg))
|
||||
return false;
|
||||
NewMaxOffset = NewOffset;
|
||||
}
|
||||
|
||||
// Update the use.
|
||||
LU.MinOffset = NewMinOffset;
|
||||
LU.MaxOffset = NewMaxOffset;
|
||||
@ -2994,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
|
||||
/// InsertFormula - If the given formula has not yet been inserted, add it to
|
||||
/// the list, and return true. Return false otherwise.
|
||||
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
|
||||
// Do not insert formula that we will not be able to expand.
|
||||
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
|
||||
"Formula is illegal");
|
||||
if (!LU.InsertFormula(F))
|
||||
return false;
|
||||
|
||||
@ -3149,84 +3259,104 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
|
||||
return S;
|
||||
}
|
||||
|
||||
/// \brief Helper function for LSRInstance::GenerateReassociations.
|
||||
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
|
||||
const Formula &Base,
|
||||
unsigned Depth, size_t Idx,
|
||||
bool IsScaledReg) {
|
||||
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
|
||||
SmallVector<const SCEV *, 8> AddOps;
|
||||
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
|
||||
if (Remainder)
|
||||
AddOps.push_back(Remainder);
|
||||
|
||||
if (AddOps.size() == 1)
|
||||
return;
|
||||
|
||||
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
|
||||
JE = AddOps.end();
|
||||
J != JE; ++J) {
|
||||
|
||||
// Loop-variant "unknown" values are uninteresting; we won't be able to
|
||||
// do anything meaningful with them.
|
||||
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
|
||||
continue;
|
||||
|
||||
// Don't pull a constant into a register if the constant could be folded
|
||||
// into an immediate field.
|
||||
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, *J, Base.getNumRegs() > 1))
|
||||
continue;
|
||||
|
||||
// Collect all operands except *J.
|
||||
SmallVector<const SCEV *, 8> InnerAddOps(
|
||||
((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
|
||||
InnerAddOps.append(std::next(J),
|
||||
((const SmallVector<const SCEV *, 8> &)AddOps).end());
|
||||
|
||||
// Don't leave just a constant behind in a register if the constant could
|
||||
// be folded into an immediate field.
|
||||
if (InnerAddOps.size() == 1 &&
|
||||
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
|
||||
continue;
|
||||
|
||||
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
|
||||
if (InnerSum->isZero())
|
||||
continue;
|
||||
Formula F = Base;
|
||||
|
||||
// Add the remaining pieces of the add back into the new formula.
|
||||
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
|
||||
if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
|
||||
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
||||
InnerSumSC->getValue()->getZExtValue())) {
|
||||
F.UnfoldedOffset =
|
||||
(uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
|
||||
if (IsScaledReg)
|
||||
F.ScaledReg = nullptr;
|
||||
else
|
||||
F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
|
||||
} else if (IsScaledReg)
|
||||
F.ScaledReg = InnerSum;
|
||||
else
|
||||
F.BaseRegs[Idx] = InnerSum;
|
||||
|
||||
// Add J as its own register, or an unfolded immediate.
|
||||
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
|
||||
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
|
||||
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
||||
SC->getValue()->getZExtValue()))
|
||||
F.UnfoldedOffset =
|
||||
(uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
|
||||
else
|
||||
F.BaseRegs.push_back(*J);
|
||||
// We may have changed the number of register in base regs, adjust the
|
||||
// formula accordingly.
|
||||
F.Canonicalize();
|
||||
|
||||
if (InsertFormula(LU, LUIdx, F))
|
||||
// If that formula hadn't been seen before, recurse to find more like
|
||||
// it.
|
||||
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// GenerateReassociations - Split out subexpressions from adds and the bases of
|
||||
/// addrecs.
|
||||
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
||||
Formula Base,
|
||||
unsigned Depth) {
|
||||
Formula Base, unsigned Depth) {
|
||||
assert(Base.isCanonical() && "Input must be in the canonical form");
|
||||
// Arbitrarily cap recursion to protect compile time.
|
||||
if (Depth >= 3) return;
|
||||
if (Depth >= 3)
|
||||
return;
|
||||
|
||||
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
|
||||
const SCEV *BaseReg = Base.BaseRegs[i];
|
||||
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
|
||||
GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
|
||||
|
||||
SmallVector<const SCEV *, 8> AddOps;
|
||||
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
|
||||
if (Remainder)
|
||||
AddOps.push_back(Remainder);
|
||||
|
||||
if (AddOps.size() == 1) continue;
|
||||
|
||||
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
|
||||
JE = AddOps.end(); J != JE; ++J) {
|
||||
|
||||
// Loop-variant "unknown" values are uninteresting; we won't be able to
|
||||
// do anything meaningful with them.
|
||||
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
|
||||
continue;
|
||||
|
||||
// Don't pull a constant into a register if the constant could be folded
|
||||
// into an immediate field.
|
||||
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, *J, Base.getNumRegs() > 1))
|
||||
continue;
|
||||
|
||||
// Collect all operands except *J.
|
||||
SmallVector<const SCEV *, 8> InnerAddOps(
|
||||
((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
|
||||
InnerAddOps.append(std::next(J),
|
||||
((const SmallVector<const SCEV *, 8> &)AddOps).end());
|
||||
|
||||
// Don't leave just a constant behind in a register if the constant could
|
||||
// be folded into an immediate field.
|
||||
if (InnerAddOps.size() == 1 &&
|
||||
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
|
||||
continue;
|
||||
|
||||
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
|
||||
if (InnerSum->isZero())
|
||||
continue;
|
||||
Formula F = Base;
|
||||
|
||||
// Add the remaining pieces of the add back into the new formula.
|
||||
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
|
||||
if (InnerSumSC &&
|
||||
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
|
||||
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
||||
InnerSumSC->getValue()->getZExtValue())) {
|
||||
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
|
||||
InnerSumSC->getValue()->getZExtValue();
|
||||
F.BaseRegs.erase(F.BaseRegs.begin() + i);
|
||||
} else
|
||||
F.BaseRegs[i] = InnerSum;
|
||||
|
||||
// Add J as its own register, or an unfolded immediate.
|
||||
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
|
||||
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
|
||||
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
||||
SC->getValue()->getZExtValue()))
|
||||
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
|
||||
SC->getValue()->getZExtValue();
|
||||
else
|
||||
F.BaseRegs.push_back(*J);
|
||||
|
||||
if (InsertFormula(LU, LUIdx, F))
|
||||
// If that formula hadn't been seen before, recurse to find more like
|
||||
// it.
|
||||
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
|
||||
}
|
||||
}
|
||||
if (Base.Scale == 1)
|
||||
GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
|
||||
/* Idx */ -1, /* IsScaledReg */ true);
|
||||
}
|
||||
|
||||
/// GenerateCombinations - Generate a formula consisting of all of the
|
||||
@ -3234,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
||||
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
|
||||
Formula Base) {
|
||||
// This method is only interesting on a plurality of registers.
|
||||
if (Base.BaseRegs.size() <= 1) return;
|
||||
if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
|
||||
return;
|
||||
|
||||
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
|
||||
// processing the formula.
|
||||
Base.Unscale();
|
||||
Formula F = Base;
|
||||
F.BaseRegs.clear();
|
||||
SmallVector<const SCEV *, 4> Ops;
|
||||
@ -3255,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
|
||||
// rather than proceed with zero in a register.
|
||||
if (!Sum->isZero()) {
|
||||
F.BaseRegs.push_back(Sum);
|
||||
F.Canonicalize();
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
|
||||
void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
|
||||
const Formula &Base, size_t Idx,
|
||||
bool IsScaledReg) {
|
||||
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
|
||||
GlobalValue *GV = ExtractSymbol(G, SE);
|
||||
if (G->isZero() || !GV)
|
||||
return;
|
||||
Formula F = Base;
|
||||
F.BaseGV = GV;
|
||||
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
|
||||
return;
|
||||
if (IsScaledReg)
|
||||
F.ScaledReg = G;
|
||||
else
|
||||
F.BaseRegs[Idx] = G;
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
}
|
||||
|
||||
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
|
||||
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
|
||||
Formula Base) {
|
||||
// We can't add a symbolic offset if the address already contains one.
|
||||
if (Base.BaseGV) return;
|
||||
|
||||
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
|
||||
const SCEV *G = Base.BaseRegs[i];
|
||||
GlobalValue *GV = ExtractSymbol(G, SE);
|
||||
if (G->isZero() || !GV)
|
||||
continue;
|
||||
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
|
||||
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
|
||||
if (Base.Scale == 1)
|
||||
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
|
||||
/* IsScaledReg */ true);
|
||||
}
|
||||
|
||||
/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
|
||||
void LSRInstance::GenerateConstantOffsetsImpl(
|
||||
LSRUse &LU, unsigned LUIdx, const Formula &Base,
|
||||
const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
|
||||
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
|
||||
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
|
||||
E = Worklist.end();
|
||||
I != E; ++I) {
|
||||
Formula F = Base;
|
||||
F.BaseGV = GV;
|
||||
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
|
||||
continue;
|
||||
F.BaseRegs[i] = G;
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
|
||||
if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
|
||||
LU.AccessTy, F)) {
|
||||
// Add the offset to the base register.
|
||||
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
|
||||
// If it cancelled out, drop the base register, otherwise update it.
|
||||
if (NewG->isZero()) {
|
||||
if (IsScaledReg) {
|
||||
F.Scale = 0;
|
||||
F.ScaledReg = nullptr;
|
||||
} else
|
||||
F.DeleteBaseReg(F.BaseRegs[Idx]);
|
||||
F.Canonicalize();
|
||||
} else if (IsScaledReg)
|
||||
F.ScaledReg = NewG;
|
||||
else
|
||||
F.BaseRegs[Idx] = NewG;
|
||||
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t Imm = ExtractImmediate(G, SE);
|
||||
if (G->isZero() || Imm == 0)
|
||||
return;
|
||||
Formula F = Base;
|
||||
F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
|
||||
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
|
||||
return;
|
||||
if (IsScaledReg)
|
||||
F.ScaledReg = G;
|
||||
else
|
||||
F.BaseRegs[Idx] = G;
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
}
|
||||
|
||||
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
|
||||
@ -3290,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
|
||||
if (LU.MaxOffset != LU.MinOffset)
|
||||
Worklist.push_back(LU.MaxOffset);
|
||||
|
||||
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
|
||||
const SCEV *G = Base.BaseRegs[i];
|
||||
|
||||
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
|
||||
E = Worklist.end(); I != E; ++I) {
|
||||
Formula F = Base;
|
||||
F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
|
||||
if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
|
||||
LU.AccessTy, F)) {
|
||||
// Add the offset to the base register.
|
||||
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
|
||||
// If it cancelled out, drop the base register, otherwise update it.
|
||||
if (NewG->isZero()) {
|
||||
std::swap(F.BaseRegs[i], F.BaseRegs.back());
|
||||
F.BaseRegs.pop_back();
|
||||
} else
|
||||
F.BaseRegs[i] = NewG;
|
||||
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t Imm = ExtractImmediate(G, SE);
|
||||
if (G->isZero() || Imm == 0)
|
||||
continue;
|
||||
Formula F = Base;
|
||||
F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
|
||||
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
|
||||
continue;
|
||||
F.BaseRegs[i] = G;
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
}
|
||||
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
|
||||
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
|
||||
if (Base.Scale == 1)
|
||||
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
|
||||
/* IsScaledReg */ true);
|
||||
}
|
||||
|
||||
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
|
||||
@ -3421,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
|
||||
if (!IntTy) return;
|
||||
|
||||
// If this Formula already has a scaled register, we can't add another one.
|
||||
if (Base.Scale != 0) return;
|
||||
// Try to unscale the formula to generate a better scale.
|
||||
if (Base.Scale != 0 && !Base.Unscale())
|
||||
return;
|
||||
|
||||
assert(Base.Scale == 0 && "Unscale did not did its job!");
|
||||
|
||||
// Check each interesting stride.
|
||||
for (SmallSetVector<int64_t, 8>::const_iterator
|
||||
@ -3462,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
|
||||
Formula F = Base;
|
||||
F.ScaledReg = Quotient;
|
||||
F.DeleteBaseReg(F.BaseRegs[i]);
|
||||
// The canonical representation of 1*reg is reg, which is already in
|
||||
// Base. In that case, do not try to insert the formula, it will be
|
||||
// rejected anyway.
|
||||
if (F.Scale == 1 && F.BaseRegs.empty())
|
||||
continue;
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
}
|
||||
}
|
||||
@ -3626,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
||||
|
||||
// TODO: Use a more targeted data structure.
|
||||
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
|
||||
const Formula &F = LU.Formulae[L];
|
||||
Formula F = LU.Formulae[L];
|
||||
// FIXME: The code for the scaled and unscaled registers looks
|
||||
// very similar but slightly different. Investigate if they
|
||||
// could be merged. That way, we would not have to unscale the
|
||||
// Formula.
|
||||
F.Unscale();
|
||||
// Use the immediate in the scaled register.
|
||||
if (F.ScaledReg == OrigReg) {
|
||||
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
|
||||
@ -3652,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
||||
continue;
|
||||
|
||||
// OK, looks good.
|
||||
NewF.Canonicalize();
|
||||
(void)InsertFormula(LU, LUIdx, NewF);
|
||||
} else {
|
||||
// Use the immediate in a base register.
|
||||
@ -3685,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
||||
goto skip_formula;
|
||||
|
||||
// Ok, looks good.
|
||||
NewF.Canonicalize();
|
||||
(void)InsertFormula(LU, LUIdx, NewF);
|
||||
break;
|
||||
skip_formula:;
|
||||
@ -3938,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
|
||||
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
|
||||
E = LU.Formulae.end(); I != E; ++I) {
|
||||
const Formula &F = *I;
|
||||
if (F.BaseOffset == 0 || F.Scale != 0)
|
||||
if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
|
||||
continue;
|
||||
|
||||
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
|
||||
@ -4399,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
Loops, SE, DT);
|
||||
|
||||
if (LU.Kind == LSRUse::ICmpZero) {
|
||||
// An interesting way of "folding" with an icmp is to use a negated
|
||||
// scale, which we'll implement by inserting it into the other operand
|
||||
// of the icmp.
|
||||
assert(F.Scale == -1 &&
|
||||
"The only scale supported by ICmpZero uses is -1!");
|
||||
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
|
||||
// Expand ScaleReg as if it was part of the base regs.
|
||||
if (F.Scale == 1)
|
||||
Ops.push_back(
|
||||
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
|
||||
else {
|
||||
// An interesting way of "folding" with an icmp is to use a negated
|
||||
// scale, which we'll implement by inserting it into the other operand
|
||||
// of the icmp.
|
||||
assert(F.Scale == -1 &&
|
||||
"The only scale supported by ICmpZero uses is -1!");
|
||||
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
|
||||
}
|
||||
} else {
|
||||
// Otherwise just expand the scaled register and an explicit scale,
|
||||
// which is expected to be matched as part of the address.
|
||||
|
||||
// Flush the operand list to suppress SCEVExpander hoisting address modes.
|
||||
if (!Ops.empty() && LU.Kind == LSRUse::Address) {
|
||||
// Unless the addressing mode will not be folded.
|
||||
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
|
||||
isAMCompletelyFolded(TTI, LU, F)) {
|
||||
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
|
||||
Ops.clear();
|
||||
Ops.push_back(SE.getUnknown(FullV));
|
||||
}
|
||||
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
|
||||
ScaledS = SE.getMulExpr(ScaledS,
|
||||
SE.getConstant(ScaledS->getType(), F.Scale));
|
||||
if (F.Scale != 1)
|
||||
ScaledS =
|
||||
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
|
||||
Ops.push_back(ScaledS);
|
||||
}
|
||||
}
|
||||
@ -4495,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
}
|
||||
CI->setOperand(1, ICmpScaledV);
|
||||
} else {
|
||||
assert(F.Scale == 0 &&
|
||||
// A scale of 1 means that the scale has been expanded as part of the
|
||||
// base regs.
|
||||
assert((F.Scale == 0 || F.Scale == 1) &&
|
||||
"ICmp does not support folding a global value and "
|
||||
"a scale at the same time!");
|
||||
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
|
||||
|
@ -1,6 +1,9 @@
|
||||
; RUN: opt -S -loop-reduce < %s | FileCheck %s
|
||||
; Complex addressing mode are costly.
|
||||
; Make loop-reduce prefer unscaled accesses.
|
||||
; On X86, reg1 + 1*reg2 has the same cost as reg1 + 8*reg2.
|
||||
; Therefore, LSR currently prefers to fold as much computation as possible
|
||||
; in the addressing mode.
|
||||
; <rdar://problem/16730541>
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx"
|
||||
@ -18,8 +21,8 @@ for.body: ; preds = %for.body, %entry
|
||||
%tmp = add nsw i64 %indvars.iv, -1
|
||||
%arrayidx = getelementptr inbounds double* %b, i64 %tmp
|
||||
%tmp1 = load double* %arrayidx, align 8
|
||||
; The induction variable should carry the scaling factor: 1 * 8 = 8.
|
||||
; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8
|
||||
; The induction variable should carry the scaling factor: 1.
|
||||
; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 1
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
|
||||
%tmp2 = load double* %arrayidx2, align 8
|
||||
@ -27,8 +30,8 @@ for.body: ; preds = %for.body, %entry
|
||||
%arrayidx4 = getelementptr inbounds double* %a, i64 %indvars.iv
|
||||
store double %mul, double* %arrayidx4, align 8
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
; Comparison should be 19 * 8 = 152.
|
||||
; CHECK: icmp eq i32 {{%[^,]+}}, 152
|
||||
; Comparison should be 19 * 1 = 19.
|
||||
; CHECK: icmp eq i32 {{%[^,]+}}, 19
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 20
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
; CHECK-LABEL: count_up
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $8,
|
||||
; CHECK: incq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @count_up(double* %d, i64 %n) nounwind {
|
||||
@ -71,7 +71,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: count_up_signed
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $8,
|
||||
; CHECK: incq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @count_up_signed(double* %d, i64 %n) nounwind {
|
||||
@ -242,7 +242,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: another_count_down_signed
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $-8,
|
||||
; CHECK: decq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @another_count_down_signed(double* %d, i64 %n) nounwind {
|
||||
|
Loading…
x
Reference in New Issue
Block a user