From 84f83c27273f5345a31ca9877dad2da12d0a0e75 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Tue, 13 Mar 2012 18:07:41 +0000 Subject: [PATCH] enhance jump threading to preserve TBAA information when PRE'ing loads, fixing rdar://11039258, an issue that came up when inspecting clang's bootstrapped codegen. llvm-svn: 152635 --- include/llvm/Analysis/Loads.h | 8 +++- lib/Analysis/Loads.cpp | 16 +++++-- lib/Transforms/Scalar/JumpThreading.cpp | 13 ++++- test/Transforms/JumpThreading/thread-loads.ll | 48 +++++++++++++++++-- 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index 1574262dd6d..5f0aefbeb01 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -20,6 +20,7 @@ namespace llvm { class AliasAnalysis; class TargetData; +class MDNode; /// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the @@ -41,10 +42,15 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, /// MaxInstsToScan specifies the maximum instructions to scan in the block. /// If it is set to 0, it will scan the whole block. You can also optionally /// specify an alias analysis implementation, which makes this more precise. +/// +/// If TBAATag is non-null and a load or store is found, the TBAA tag from the +/// load or store is recorded there. If there is no TBAA tag or if no access +/// is found, it is left unmodified. Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = 6, - AliasAnalysis *AA = 0); + AliasAnalysis *AA = 0, + MDNode **TBAATag = 0); } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 0e6bcbfae4f..873a27543dd 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -17,6 +17,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Operator.h" using namespace llvm; @@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, /// MaxInstsToScan specifies the maximum instructions to scan in the block. If /// it is set to 0, it will scan the whole block. You can also optionally /// specify an alias analysis implementation, which makes this more precise. +/// +/// If TBAATag is non-null and a load or store is found, the TBAA tag from the +/// load or store is recorded there. If there is no TBAA tag or if no access +/// is found, it is left unmodified. Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA) { + AliasAnalysis *AA, + MDNode **TBAATag) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; // If we're using alias analysis to disambiguate get the size of *Ptr. @@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { + if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); return LI; + } if (StoreInst *SI = dyn_cast(Inst)) { // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { + if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa); return SI->getOperand(0); + } // If Ptr is an alloca and this is a store to a different alloca, ignore // the store. This is a trivial form of alias analysis that is important diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 4ef5298dc0c..429b61b6e50 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -857,6 +857,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (BBIt != LoadBB->begin()) return false; + // If all of the loads and stores that feed the value have the same TBAA tag, + // then we can propagate it onto any newly inserted loads. + MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); SmallPtrSet PredsScanned; typedef SmallVector, 8> AvailablePredsTy; @@ -875,11 +878,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); - Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6); + MDNode *ThisTBAATag = 0; + Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6, + 0, &ThisTBAATag); if (!PredAvailable) { OneUnavailablePred = PredBB; continue; } + + // If tbaa tags disagree or are not present, forget about them. + if (TBAATag != ThisTBAATag) TBAATag = 0; // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. @@ -939,6 +947,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { LI->getAlignment(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); + if (TBAATag) + NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag); + AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll index cce23ea319c..78d36e7053c 100644 --- a/test/Transforms/JumpThreading/thread-loads.ll +++ b/test/Transforms/JumpThreading/thread-loads.ll @@ -1,12 +1,12 @@ ; RUN: opt < %s -jump-threading -S | FileCheck %s -; rdar://6402033 -; Test that we can thread through the block with the partially redundant load (%2). target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin7" -define i32 @foo(i32* %P) nounwind { -; CHECK: foo +; Test that we can thread through the block with the partially redundant load (%2). +; rdar://6402033 +define i32 @test1(i32* %P) nounwind { +; CHECK: @test1 entry: %0 = tail call i32 (...)* @f1() nounwind ; [#uses=1] %1 = icmp eq i32 %0, 0 ; [#uses=1] @@ -39,3 +39,43 @@ bb3: ; preds = %bb1 declare i32 @f1(...) declare i32 @f2(...) + + +;; Check that we preserve TBAA information. +; rdar://11039258 + +define i32 @test2(i32* %P) nounwind { +; CHECK: @test2 +entry: + %0 = tail call i32 (...)* @f1() nounwind ; [#uses=1] + %1 = icmp eq i32 %0, 0 ; [#uses=1] + br i1 %1, label %bb1, label %bb + +bb: ; preds = %entry +; CHECK: bb1.thread: +; CHECK: store{{.*}}, !tbaa !0 +; CHECK: br label %bb3 + store i32 42, i32* %P, align 4, !tbaa !0 + br label %bb1 + +bb1: ; preds = %entry, %bb + %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] + %2 = load i32* %P, align 4, !tbaa !0 + %3 = icmp sgt i32 %2, 36 + br i1 %3, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + %4 = tail call i32 (...)* @f2() nounwind + ret i32 %res.0 + +bb3: ; preds = %bb1 +; CHECK: bb3: +; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ] +; CHECK: ret i32 %res.01 + ret i32 %res.0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +