Mark that SpeculativeExecution preserves Globals Alias Analysis.

A few benchmarks with lots of accesses to global variables in the hot loops regressed a lot since r266399, which added the SpeculativeExecution pass to the default pipeline. The problem is that this pass doesn't mark Globals Alias Analysis as preserved. Globals Alias Analysis is computed in a module pass, whereas SpeculativeExecution is a function pass, and a lot of passes dependent on the Globals Alias Analysis to optimize these benchmarks are also function passes. As such, the Globals Alias Analysis information cannot be recomputed between SpeculativeExecution and the following function passes needing that information. SpeculativeExecution doesn't invalidate Globals Alias Analysis, so mark it as such to fix those performance regressions. Differential Revision: http://reviews.llvm.org/D19806 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268370 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-11 13:44:28 +00:00 · 2016-05-03 08:33:26 +00:00 · 2016-05-03 08:33:26 +00:00 · e3e438a4cd
commit e3e438a4cd
parent 6386c04b9e
2 changed files with 28 additions and 0 deletions
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@ -62,6 +62,7 @@
 //===----------------------------------------------------------------------===//

 #include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Instructions.h"
@ -138,6 +139,7 @@ INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",

 void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
  AU.addRequired<TargetTransformInfoWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
 }

 bool SpeculativeExecution::runOnFunction(Function &F) {
--- a/test/Transforms/PhaseOrdering/globalaa-retained.ll
+++ b/test/Transforms/PhaseOrdering/globalaa-retained.ll
@ -0,0 +1,26 @@
+; RUN: opt -O3 -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+@v = internal unnamed_addr global i32 0, align 4
+@p = common global i32* null, align 8
+
+; Function Attrs: norecurse nounwind
+define void @f(i32 %n) {
+entry:
+  %0 = load i32, i32* @v, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @v, align 4
+  %1 = load i32*, i32** @p, align 8
+  store i32 %n, i32* %1, align 4
+  %2 = load i32, i32* @v, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* @v, align 4
+  ret void
+}
+
+; check variable v is loaded only once after optimization, which should be
+; prove that globalsAA survives until the optimization that can use it to
+; optimize away the duplicate load/stores on variable v.
+; CHECK:     load i32, i32* @v, align 4
+; CHECK-NOT: load i32, i32* @v, align 4