Reapply Move "auto-init" instructions to the dominator of their users

Original patch (50b2a113db197a97f60ad2aace8b7382dc9b8c31) ignored the
fact that -ftrivial-auto-var-init could affect function parameters with
the sret attribute.
Just do not move instruction that don't affect alloca.
Also add missing test case for volatile instruction.

Differential Revision: https://reviews.llvm.org/D148507
This commit is contained in:
serge-sans-paille 2023-04-17 10:13:31 +02:00
parent 53791896de
commit afa13ba18d
No known key found for this signature in database
GPG Key ID: 7B24DA8C9551659F
21 changed files with 725 additions and 0 deletions

View File

@ -0,0 +1,29 @@
//===- MoveAutoInit.h - Move insts marked as auto-init Pass --*- C++ -*-======//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass moves instructions marked as auto-init closer to their use if
// profitable, generally because it moves them under a guard, potentially
// skipping the overhead of the auto-init under some execution paths.
//
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
#define LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
#include "llvm/IR/PassManager.h"
namespace llvm {
class MoveAutoInitPass : public PassInfoMixin<MoveAutoInitPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H

View File

@ -245,6 +245,7 @@
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"

View File

@ -124,6 +124,7 @@
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
@ -696,6 +697,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(MemCpyOptPass());
FPM.addPass(DSEPass());
FPM.addPass(MoveAutoInitPass());
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
@ -1809,6 +1812,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Nuke dead stores.
MainFPM.addPass(DSEPass());
MainFPM.addPass(MoveAutoInitPass());
MainFPM.addPass(MergedLoadStoreMotionPass());
LoopPassManager LPM;

View File

@ -331,6 +331,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
FUNCTION_PASS("jump-threading", JumpThreadingPass())

View File

@ -58,6 +58,7 @@ add_llvm_component_library(LLVMTransformUtils
MetaRenamer.cpp
MisExpect.cpp
ModuleUtils.cpp
MoveAutoInit.cpp
NameAnonGlobals.cpp
PredicateInfo.cpp
PromoteMemoryToRegister.cpp

View File

@ -0,0 +1,233 @@
//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass moves instruction maked as auto-init closer to the basic block that
// use it, eventually removing it from some control path of the function.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
#define DEBUG_TYPE "move-auto-init"
STATISTIC(NumMoved, "Number of instructions moved");
static cl::opt<unsigned> MoveAutoInitThreshold(
"move-auto-init-threshold", cl::Hidden, cl::init(128),
cl::desc("Maximum instructions to analyze per moved initialization"));
static bool hasAutoInitMetadata(const Instruction &I) {
return I.hasMetadata(LLVMContext::MD_annotation) &&
any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(),
[](const MDOperand &Op) {
return cast<MDString>(Op.get())->getString() == "auto-init";
});
}
static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) {
MemoryLocation ML;
if (auto *MI = dyn_cast<MemIntrinsic>(&I))
ML = MemoryLocation::getForDest(MI);
else if (auto *SI = dyn_cast<StoreInst>(&I))
ML = MemoryLocation::get(SI);
else
assert(false && "memory location set");
if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr)))
return ML;
else
return {};
}
/// Finds a BasicBlock in the CFG where instruction `I` can be moved to while
/// not changing the Memory SSA ordering and being guarded by at least one
/// condition.
static BasicBlock *usersDominator(const MemoryLocation &ML, Instruction *I,
DominatorTree &DT, MemorySSA &MSSA) {
BasicBlock *CurrentDominator = nullptr;
MemoryUseOrDef &IMA = *MSSA.getMemoryAccess(I);
BatchAAResults AA(MSSA.getAA());
SmallPtrSet<MemoryAccess *, 8> Visited;
auto AsMemoryAccess = [](User *U) { return cast<MemoryAccess>(U); };
SmallVector<MemoryAccess *> WorkList(map_range(IMA.users(), AsMemoryAccess));
while (!WorkList.empty()) {
MemoryAccess *MA = WorkList.pop_back_val();
if (!Visited.insert(MA).second)
continue;
if (Visited.size() > MoveAutoInitThreshold)
return nullptr;
bool FoundClobberingUser = false;
if (auto *M = dyn_cast<MemoryUseOrDef>(MA)) {
Instruction *MI = M->getMemoryInst();
// If this memory instruction may not clobber `I`, we can skip it.
// LifetimeEnd is a valid user, but we do not want it in the user
// dominator.
if (AA.getModRefInfo(MI, ML) != ModRefInfo::NoModRef &&
!MI->isLifetimeStartOrEnd() && MI != I) {
FoundClobberingUser = true;
CurrentDominator = CurrentDominator
? DT.findNearestCommonDominator(CurrentDominator,
MI->getParent())
: MI->getParent();
}
}
if (!FoundClobberingUser) {
auto UsersAsMemoryAccesses = map_range(MA->users(), AsMemoryAccess);
append_range(WorkList, UsersAsMemoryAccesses);
}
}
return CurrentDominator;
}
static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) {
BasicBlock &EntryBB = F.getEntryBlock();
SmallVector<std::pair<Instruction *, BasicBlock *>> JobList;
//
// Compute movable instructions.
//
for (Instruction &I : EntryBB) {
if (!hasAutoInitMetadata(I))
continue;
std::optional<MemoryLocation> ML = writeToAlloca(I);
if (!ML)
continue;
if (I.isVolatile())
continue;
BasicBlock *UsersDominator = usersDominator(ML.value(), &I, DT, MSSA);
if (!UsersDominator)
continue;
if (UsersDominator == &EntryBB)
continue;
// Traverse the CFG to detect cycles `UsersDominator` would be part of.
SmallPtrSet<BasicBlock *, 8> TransitiveSuccessors;
SmallVector<BasicBlock *> WorkList(successors(UsersDominator));
bool HasCycle = false;
while (!WorkList.empty()) {
BasicBlock *CurrBB = WorkList.pop_back_val();
if (CurrBB == UsersDominator)
// No early exit because we want to compute the full set of transitive
// successors.
HasCycle = true;
for (BasicBlock *Successor : successors(CurrBB)) {
if (!TransitiveSuccessors.insert(Successor).second)
continue;
WorkList.push_back(Successor);
}
}
// Don't insert if that could create multiple execution of I,
// but we can insert it in the non back-edge predecessors, if it exists.
if (HasCycle) {
BasicBlock *UsersDominatorHead = UsersDominator;
while (BasicBlock *UniquePredecessor =
UsersDominatorHead->getUniquePredecessor())
UsersDominatorHead = UniquePredecessor;
if (UsersDominatorHead == &EntryBB)
continue;
BasicBlock *DominatingPredecessor = nullptr;
for (BasicBlock *Pred : predecessors(UsersDominatorHead)) {
// If one of the predecessor of the dominator also transitively is a
// successor, moving to the dominator would do the inverse of loop
// hoisting, and we don't want that.
if (TransitiveSuccessors.count(Pred))
continue;
DominatingPredecessor =
DominatingPredecessor
? DT.findNearestCommonDominator(DominatingPredecessor, Pred)
: Pred;
}
if (!DominatingPredecessor || DominatingPredecessor == &EntryBB)
continue;
UsersDominator = DominatingPredecessor;
}
// CatchSwitchInst blocks can only have one instruction, so they are not
// good candidates for insertion.
while (isa<CatchSwitchInst>(UsersDominator->getFirstInsertionPt())) {
for (BasicBlock *Pred : predecessors(UsersDominator))
UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred);
}
// We finally found a place where I can be moved while not introducing extra
// execution, and guarded by at least one condition.
if (UsersDominator != &EntryBB)
JobList.emplace_back(&I, UsersDominator);
}
//
// Perform the actual substitution.
//
if (JobList.empty())
return false;
MemorySSAUpdater MSSAU(&MSSA);
// Reverse insertion to respect relative order between instructions:
// if two instructions are moved from the same BB to the same BB, we insert
// the second one in the front, then the first on top of it.
for (auto &Job : reverse(JobList)) {
Job.first->moveBefore(&*Job.second->getFirstInsertionPt());
MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
MemorySSA::InsertionPlace::Beginning);
}
if (VerifyMemorySSA)
MSSA.verifyMemorySSA();
NumMoved += JobList.size();
return true;
}
PreservedAnalyses MoveAutoInitPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
if (!runMoveAutoInit(F, DT, MSSA))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<MemorySSAAnalysis>();
PA.preserveSet<CFGAnalyses>();
return PA;
}

View File

@ -205,6 +205,7 @@
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass

View File

@ -106,6 +106,7 @@
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo

View File

@ -146,6 +146,7 @@
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop

View File

@ -133,6 +133,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass

View File

@ -140,6 +140,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass

View File

@ -146,6 +146,7 @@
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop

View File

@ -171,6 +171,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass

View File

@ -136,6 +136,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass

View File

@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
@__const.foo.buffer = private unnamed_addr constant [8 x i32] [i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766], align 16
define void @foo(i32 %x) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BUFFER:%.*]] = alloca [8 x i32], align 16
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[BUFFER]], ptr align 16 @__const.foo.buffer, i64 32, i1 false), !annotation !0
; CHECK-NEXT: call void @dump(ptr [[BUFFER]])
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%buffer = alloca [8 x i32], align 16
call void @llvm.memcpy.p0.p0.i64(ptr align 16 %buffer, ptr align 16 @__const.foo.buffer, i64 32, i1 false), !annotation !0
%tobool = icmp ne i32 %x, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
call void @dump(ptr %buffer)
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
declare void @dump(ptr)
!0 = !{!"auto-init"}

View File

@ -0,0 +1,100 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; Checks that move-auto-init can move instruction passed unclobbering memory
; instructions.
; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[TMP4:%.*]] = alloca [100 x i8], align 16
; CHECK-NEXT: [[TMP5:%.*]] = alloca [2 x i8], align 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [100 x i8], ptr [[TMP4]], i64 0, i64 0
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 100, ptr nonnull [[TMP6]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8], ptr [[TMP5]], i64 0, i64 0
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[TMP7]]) #[[ATTR3]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8], ptr [[TMP5]], i64 0, i64 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP1:%.*]], 0
; CHECK-NEXT: br i1 [[TMP9]], label [[TMP15:%.*]], label [[TMP10:%.*]]
; CHECK: 10:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(100) [[TMP6]], i8 -86, i64 100, i1 false), !annotation !0
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP0:%.*]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [100 x i8], ptr [[TMP4]], i64 0, i64 [[TMP11]]
; CHECK-NEXT: store i8 12, ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP6]], align 16
; CHECK-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32
; CHECK-NEXT: br label [[TMP22:%.*]]
; CHECK: 15:
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP2:%.*]], 0
; CHECK-NEXT: br i1 [[TMP16]], label [[TMP22]], label [[TMP17:%.*]]
; CHECK: 17:
; CHECK-NEXT: store i8 -86, ptr [[TMP7]], align 1, !annotation !0
; CHECK-NEXT: store i8 -86, ptr [[TMP8]], align 1, !annotation !0
; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8], ptr [[TMP5]], i64 0, i64 [[TMP18]]
; CHECK-NEXT: store i8 12, ptr [[TMP19]], align 1
; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP21:%.*]] = sext i8 [[TMP20]] to i32
; CHECK-NEXT: br label [[TMP22]]
; CHECK: 22:
; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ [[TMP14]], [[TMP10]] ], [ [[TMP21]], [[TMP17]] ], [ 0, [[TMP15]] ]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[TMP7]]) #[[ATTR3]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 100, ptr nonnull [[TMP6]]) #[[ATTR3]]
; CHECK-NEXT: ret i32 [[TMP23]]
;
%4 = alloca [100 x i8], align 16
%5 = alloca [2 x i8], align 1
%6 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 0
call void @llvm.lifetime.start.p0i8(i64 100, i8* nonnull %6) #3
; This memset must move.
call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 16 dereferenceable(100) %6, i8 -86, i64 100, i1 false), !annotation !0
%7 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 0
call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %7) #3
; This store must move.
store i8 -86, i8* %7, align 1, !annotation !0
%8 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 1
; This store must move.
store i8 -86, i8* %8, align 1, !annotation !0
%9 = icmp eq i32 %1, 0
br i1 %9, label %15, label %10
10:
%11 = sext i32 %0 to i64
%12 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 %11
store i8 12, i8* %12, align 1
%13 = load i8, i8* %6, align 16
%14 = sext i8 %13 to i32
br label %22
15:
%16 = icmp eq i32 %2, 0
br i1 %16, label %22, label %17
17:
%18 = sext i32 %0 to i64
%19 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 %18
store i8 12, i8* %19, align 1
%20 = load i8, i8* %7, align 1
%21 = sext i8 %20 to i32
br label %22
22:
%23 = phi i32 [ %14, %10 ], [ %21, %17 ], [ 0, %15 ]
call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %7) #3
call void @llvm.lifetime.end.p0i8(i64 100, i8* nonnull %6) #3
ret i32 %23
}
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { mustprogress nofree nosync nounwind readnone uwtable willreturn }
attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
attributes #2 = { argmemonly mustprogress nofree nounwind willreturn writeonly }
attributes #3 = { nounwind }
!0 = !{!"auto-init"}

View File

@ -0,0 +1,70 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
; In that case, the store to %val happens before the fence and cannot go past
; it.
define void @foo(i32 %x) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: fence acquire
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @dump(ptr [[VAL]])
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%val = alloca i32, align 4
store i32 -1431655766, ptr %val, align 4, !annotation !0
%tobool = icmp ne i32 %x, 0
fence acquire
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
call void @dump(ptr %val)
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
; In that case, the store to %val happens after the fence and it is moved within
; the true branch as expected.
define void @bar(i32 %x) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: fence acquire
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
; CHECK-NEXT: call void @dump(ptr [[VAL]])
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%val = alloca i32, align 4
%tobool = icmp ne i32 %x, 0
fence acquire
store i32 -1431655766, ptr %val, align 4, !annotation !0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
call void @dump(ptr %val)
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
declare void @dump(ptr)
!0 = !{!"auto-init"}

View File

@ -0,0 +1,102 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(i32 %x) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BUFFER:%.*]] = alloca [80 x i32], align 16
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[BUFFER]], i8 -86, i64 320, i1 false), !annotation !0
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[DO_COND:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [80 x i32], ptr [[BUFFER]], i64 0, i64 0
; CHECK-NEXT: call void @dump(ptr [[ARRAYIDX]])
; CHECK-NEXT: br label [[DO_COND]]
; CHECK: do.cond:
; CHECK-NEXT: [[DEC]] = add nsw i32 [[X_ADDR_0]], -1
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X_ADDR_0]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[DO_BODY]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: ret void
;
entry:
%buffer = alloca [80 x i32], align 16
call void @llvm.memset.p0.i64(ptr align 16 %buffer, i8 -86, i64 320, i1 false), !annotation !0
br label %do.body
do.body: ; preds = %do.cond, %entry
%x.addr.0 = phi i32 [ %x, %entry ], [ %dec, %do.cond ]
%arrayidx = getelementptr inbounds [80 x i32], ptr %buffer, i64 0, i64 0
call void @dump(ptr %arrayidx)
br label %do.cond
do.cond: ; preds = %do.body
%dec = add nsw i32 %x.addr.0, -1
%tobool = icmp ne i32 %x.addr.0, 0
br i1 %tobool, label %do.body, label %do.end
do.end: ; preds = %do.cond
ret void
}
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
declare void @dump(ptr )
define void @bar(i32 %x, i32 %y) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BUFFER:%.*]] = alloca [80 x i32], align 16
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[Y:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[BUFFER]], i8 -86, i64 320, i1 false), !annotation !0
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], [[Y]]
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[DEC:%.*]], [[DO_COND:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [80 x i32], ptr [[BUFFER]], i64 0, i64 0
; CHECK-NEXT: call void @dump(ptr [[ARRAYIDX]])
; CHECK-NEXT: br label [[DO_COND]]
; CHECK: do.cond:
; CHECK-NEXT: [[DEC]] = add nsw i32 [[X_ADDR_0]], -1
; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[X_ADDR_0]], 0
; CHECK-NEXT: br i1 [[TOBOOL1]], label [[DO_BODY]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%buffer = alloca [80 x i32], align 16
call void @llvm.memset.p0.i64(ptr align 16 %buffer, i8 -86, i64 320, i1 false), !annotation !0
%tobool = icmp ne i32 %y, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
%add = add nsw i32 %x, %y
br label %do.body
do.body: ; preds = %do.cond, %if.then
%x.addr.0 = phi i32 [ %add, %if.then ], [ %dec, %do.cond ]
%arrayidx = getelementptr inbounds [80 x i32], ptr %buffer, i64 0, i64 0
call void @dump(ptr %arrayidx)
br label %do.cond
do.cond: ; preds = %do.body
%dec = add nsw i32 %x.addr.0, -1
%tobool1 = icmp ne i32 %x.addr.0, 0
br i1 %tobool1, label %do.body, label %do.end
do.end: ; preds = %do.cond
br label %if.end
if.end: ; preds = %do.end, %entry
ret void
}
!0 = !{!"auto-init"}

View File

@ -0,0 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(i32 %x) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
; CHECK-NEXT: call void @dump(ptr [[VAL]])
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%val = alloca i32, align 4
store i32 -1431655766, ptr %val, align 4, !annotation !0
%tobool = icmp ne i32 %x, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
call void @dump(ptr %val)
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
declare void @dump(ptr)
!0 = !{!"auto-init"}

View File

@ -0,0 +1,62 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
; Checks that auto-init memory isntruction are mot moved when writing to an sret argument.
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
%struct.S = type { i64 }
@pattern = private unnamed_addr constant %struct.S { i64 -1 }, align 4
define void @f(ptr noalias sret(%struct.S) align 4 %0, i32 noundef %1) {
; CHECK-LABEL: @f(
; CHECK-NEXT: [[TMP3:%.*]] = alloca ptr, align 4
; CHECK-NEXT: [[TMP4:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_S:%.*]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = bitcast ptr [[TMP0:%.*]] to ptr
; CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP3]], align 4
; CHECK-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr [[TMP0]] to ptr
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 @pattern, i32 8, i1 false), !annotation !0
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 42
; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP13:%.*]]
; CHECK: 10:
; CHECK-NEXT: call void @g(ptr sret([[STRUCT_S]]) align 4 [[TMP5]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr [[TMP0]] to ptr
; CHECK-NEXT: [[TMP12:%.*]] = bitcast ptr [[TMP5]] to ptr
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP12]], i32 8, i1 false)
; CHECK-NEXT: br label [[TMP13]]
; CHECK: 13:
; CHECK-NEXT: ret void
;
%3 = alloca ptr, align 4
%4 = alloca i32, align 4
%5 = alloca %struct.S, align 4
%6 = bitcast ptr %0 to ptr
store ptr %6, ptr %3, align 4
store i32 %1, ptr %4, align 4
%7 = bitcast ptr %0 to ptr
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %7, ptr align 4 @pattern, i32 8, i1 false), !annotation !0
%8 = load i32, ptr %4, align 4
%9 = icmp eq i32 %8, 42
br i1 %9, label %10, label %13
10: ; preds = %2
call void @g(ptr sret(%struct.S) align 4 %5)
%11 = bitcast ptr %0 to ptr
%12 = bitcast ptr %5 to ptr
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %11, ptr align 4 %12, i32 8, i1 false)
br label %13
13: ; preds = %10, %2
ret void
}
declare void @g(ptr sret(%struct.S) align 4, ...)
declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0
!0 = !{!"auto-init"}

View File

@ -0,0 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
; Make sure that volatile operationsa re not moved. This is overly conservative.
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(i32 %x) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: store volatile i32 -1431655766, ptr [[VAL]], align 4, !annotation !0
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @dump(ptr [[VAL]])
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%val = alloca i32, align 4
store volatile i32 -1431655766, ptr %val, align 4, !annotation !0
%tobool = icmp ne i32 %x, 0
br i1 %tobool, label %if.then, label %if.end
if.then:
call void @dump(ptr %val)
br label %if.end
if.end:
ret void
}
declare void @dump(ptr)
!0 = !{!"auto-init"}