mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-07 11:51:13 +00:00
[SystemZ] Implementation of getUnrollingPreferences().
This commit enables more unrolling for SystemZ by implementing the SystemZTargetTransformInfo::getUnrollingPreferences() method. It has been found that it is better to only unroll moderately, so the DefaultUnrollRuntimeCount has been moved into UnrollingPreferences in order to set this to a lower value for SystemZ (4). Reviewers: Evgeny Stupachenko, Ulrich Weigand. https://reviews.llvm.org/D24451 llvm-svn: 282570
This commit is contained in:
parent
8fe0e6eb01
commit
3c5fa71cd5
@ -264,6 +264,8 @@ public:
|
||||
/// transformation will select an unrolling factor based on the current cost
|
||||
/// threshold and other factors.
|
||||
unsigned Count;
|
||||
/// Default unroll count for loops with run-time trip count.
|
||||
unsigned DefaultUnrollRuntimeCount;
|
||||
// Set the maximum unrolling factor. The unrolling factor may be selected
|
||||
// using the appropriate cost threshold, but may not exceed this number
|
||||
// (set to UINT_MAX to disable). This does not apply in cases where the
|
||||
|
@ -238,6 +238,63 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
|
||||
return TTI::PSK_Software;
|
||||
}
|
||||
|
||||
void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
|
||||
TTI::UnrollingPreferences &UP) {
|
||||
// Find out if L contains a call, what the machine instruction count
|
||||
// estimate is, and how many stores there are.
|
||||
bool HasCall = false;
|
||||
unsigned NumStores = 0;
|
||||
for (auto &BB : L->blocks())
|
||||
for (auto &I : *BB) {
|
||||
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
|
||||
ImmutableCallSite CS(&I);
|
||||
if (const Function *F = CS.getCalledFunction()) {
|
||||
if (isLoweredToCall(F))
|
||||
HasCall = true;
|
||||
if (F->getIntrinsicID() == Intrinsic::memcpy ||
|
||||
F->getIntrinsicID() == Intrinsic::memset)
|
||||
NumStores++;
|
||||
} else { // indirect call.
|
||||
HasCall = true;
|
||||
}
|
||||
}
|
||||
if (isa<StoreInst>(&I)) {
|
||||
NumStores++;
|
||||
Type *MemAccessTy = I.getOperand(0)->getType();
|
||||
if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
|
||||
(getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
|
||||
NumStores++; // 128 bit fp/int stores get split.
|
||||
}
|
||||
}
|
||||
|
||||
// The z13 processor will run out of store tags if too many stores
|
||||
// are fed into it too quickly. Therefore make sure there are not
|
||||
// too many stores in the resulting unrolled loop.
|
||||
unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
|
||||
|
||||
if (HasCall) {
|
||||
// Only allow full unrolling if loop has any calls.
|
||||
UP.FullUnrollMaxCount = Max;
|
||||
UP.MaxCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
UP.MaxCount = Max;
|
||||
if (UP.MaxCount <= 1)
|
||||
return;
|
||||
|
||||
// Allow partial and runtime trip count unrolling.
|
||||
UP.Partial = UP.Runtime = true;
|
||||
|
||||
UP.PartialThreshold = 75;
|
||||
UP.DefaultUnrollRuntimeCount = 4;
|
||||
|
||||
// Allow expensive instructions in the pre-header of the loop.
|
||||
UP.AllowExpensiveTripCount = true;
|
||||
|
||||
UP.Force = true;
|
||||
}
|
||||
|
||||
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
|
||||
if (!Vector)
|
||||
// Discount the stack pointer. Also leave out %r0, since it can't
|
||||
|
@ -50,6 +50,8 @@ public:
|
||||
|
||||
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
|
||||
|
||||
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
|
||||
|
||||
/// @}
|
||||
|
||||
/// \name Vector TTI Implementations
|
||||
|
@ -102,10 +102,6 @@ static cl::opt<unsigned> PragmaUnrollThreshold(
|
||||
/// code expansion would result.
|
||||
static const unsigned NoThreshold = UINT_MAX;
|
||||
|
||||
/// Default unroll count for loops with run-time trip count if
|
||||
/// -unroll-count is not set
|
||||
static const unsigned DefaultUnrollRuntimeCount = 8;
|
||||
|
||||
/// Gather the various unrolling parameters based on the defaults, compiler
|
||||
/// flags, TTI overrides and user specified parameters.
|
||||
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
||||
@ -122,6 +118,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
||||
UP.PartialThreshold = UP.Threshold;
|
||||
UP.PartialOptSizeThreshold = 0;
|
||||
UP.Count = 0;
|
||||
UP.DefaultUnrollRuntimeCount = 8;
|
||||
UP.MaxCount = UINT_MAX;
|
||||
UP.FullUnrollMaxCount = UINT_MAX;
|
||||
UP.Partial = false;
|
||||
@ -803,7 +800,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
||||
// largest power-of-two factor that satisfies the threshold limit.
|
||||
// As we'll create fixup loop, do the type of unrolling only if
|
||||
// remainder loop is allowed.
|
||||
UP.Count = DefaultUnrollRuntimeCount;
|
||||
UP.Count = UP.DefaultUnrollRuntimeCount;
|
||||
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
|
||||
while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
|
||||
UP.Count >>= 1;
|
||||
@ -852,7 +849,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
||||
return false;
|
||||
}
|
||||
if (UP.Count == 0)
|
||||
UP.Count = DefaultUnrollRuntimeCount;
|
||||
UP.Count = UP.DefaultUnrollRuntimeCount;
|
||||
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
|
||||
|
||||
// Reduce unroll count to be the largest power-of-two factor of
|
||||
|
Loading…
Reference in New Issue
Block a user