From 3c5fa71cd52563bdf1f877046892ad6b4ad14e7d Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 28 Sep 2016 09:41:38 +0000 Subject: [PATCH] [SystemZ] Implementation of getUnrollingPreferences(). This commit enables more unrolling for SystemZ by implementing the SystemZTargetTransformInfo::getUnrollingPreferences() method. It has been found that it is better to only unroll moderately, so the DefaultUnrollRuntimeCount has been moved into UnrollingPreferences in order to set this to a lower value for SystemZ (4). Reviewers: Evgeny Stupachenko, Ulrich Weigand. https://reviews.llvm.org/D24451 llvm-svn: 282570 --- include/llvm/Analysis/TargetTransformInfo.h | 2 + .../SystemZ/SystemZTargetTransformInfo.cpp | 57 +++++++++++++++++++ .../SystemZ/SystemZTargetTransformInfo.h | 2 + lib/Transforms/Scalar/LoopUnrollPass.cpp | 9 +-- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 88b1b28b96e..f191ed58499 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -264,6 +264,8 @@ public: /// transformation will select an unrolling factor based on the current cost /// threshold and other factors. unsigned Count; + /// Default unroll count for loops with run-time trip count. + unsigned DefaultUnrollRuntimeCount; // Set the maximum unrolling factor. The unrolling factor may be selected // using the appropriate cost threshold, but may not exceed this number // (set to UINT_MAX to disable). This does not apply in cases where the diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 5ff5b21f49b..b10c0e09a0d 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -238,6 +238,63 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } +void SystemZTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + // Find out if L contains a call, what the machine instruction count + // estimate is, and how many stores there are. + bool HasCall = false; + unsigned NumStores = 0; + for (auto &BB : L->blocks()) + for (auto &I : *BB) { + if (isa(&I) || isa(&I)) { + ImmutableCallSite CS(&I); + if (const Function *F = CS.getCalledFunction()) { + if (isLoweredToCall(F)) + HasCall = true; + if (F->getIntrinsicID() == Intrinsic::memcpy || + F->getIntrinsicID() == Intrinsic::memset) + NumStores++; + } else { // indirect call. + HasCall = true; + } + } + if (isa(&I)) { + NumStores++; + Type *MemAccessTy = I.getOperand(0)->getType(); + if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) && + (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128)) + NumStores++; // 128 bit fp/int stores get split. + } + } + + // The z13 processor will run out of store tags if too many stores + // are fed into it too quickly. Therefore make sure there are not + // too many stores in the resulting unrolled loop. + unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX); + + if (HasCall) { + // Only allow full unrolling if loop has any calls. + UP.FullUnrollMaxCount = Max; + UP.MaxCount = 1; + return; + } + + UP.MaxCount = Max; + if (UP.MaxCount <= 1) + return; + + // Allow partial and runtime trip count unrolling. + UP.Partial = UP.Runtime = true; + + UP.PartialThreshold = 75; + UP.DefaultUnrollRuntimeCount = 4; + + // Allow expensive instructions in the pre-header of the loop. + UP.AllowExpensiveTripCount = true; + + UP.Force = true; +} + unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 9ae736d8413..a870dd9ea01 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -50,6 +50,8 @@ public: TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + /// @} /// \name Vector TTI Implementations diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index dbbffee96e2..a8442e64832 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -102,10 +102,6 @@ static cl::opt PragmaUnrollThreshold( /// code expansion would result. static const unsigned NoThreshold = UINT_MAX; -/// Default unroll count for loops with run-time trip count if -/// -unroll-count is not set -static const unsigned DefaultUnrollRuntimeCount = 8; - /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( @@ -122,6 +118,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.PartialThreshold = UP.Threshold; UP.PartialOptSizeThreshold = 0; UP.Count = 0; + UP.DefaultUnrollRuntimeCount = 8; UP.MaxCount = UINT_MAX; UP.FullUnrollMaxCount = UINT_MAX; UP.Partial = false; @@ -803,7 +800,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, // largest power-of-two factor that satisfies the threshold limit. // As we'll create fixup loop, do the type of unrolling only if // remainder loop is allowed. - UP.Count = DefaultUnrollRuntimeCount; + UP.Count = UP.DefaultUnrollRuntimeCount; UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { UP.Count >>= 1; @@ -852,7 +849,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, return false; } if (UP.Count == 0) - UP.Count = DefaultUnrollRuntimeCount; + UP.Count = UP.DefaultUnrollRuntimeCount; UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; // Reduce unroll count to be the largest power-of-two factor of