[LoopInterchange] Move instructions from preheader to outer loop header.

Instructions defined in the original inner loop preheader may depend on
values defined in the outer loop header, but the inner loop header will
become the entry block in the loop nest. Move the instructions from the
preheader to the outer loop header, so we do not break dominance. We
also have to check for unsafe instructions in the preheader. If there
are no unsafe instructions, all instructions should be movable.

Currently we move all instructions except the terminator and rely on
LICM to hoist out invariant instructions later.

Fixes PR45743
This commit is contained in:
Florian Hahn 2020-08-10 10:52:33 +01:00
parent 1ff94ee798
commit e8f6a1389e
3 changed files with 167 additions and 3 deletions

View File

@ -625,6 +625,13 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
containsUnsafeInstructions(OuterLoopLatch))
return false;
// Also make sure the inner loop preheader does not contain any unsafe
// instructions. Note that all instructions in the preheader will be moved to
// the outer loop header when interchanging.
if (InnerLoopPreHeader != OuterLoopHeader &&
containsUnsafeInstructions(InnerLoopPreHeader))
return false;
LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
// We have a perfect loop nest.
return true;
@ -1306,6 +1313,21 @@ bool LoopInterchangeTransform::transform() {
LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
}
// Instructions in the original inner loop preheader may depend on values
// defined in the outer loop header. Move them there, because the original
// inner loop preheader will become the entry into the interchanged loop nest.
// Currently we move all instructions and rely on LICM to move invariant
// instructions outside the loop nest.
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
if (InnerLoopPreHeader != OuterLoopHeader) {
SmallPtrSet<Instruction *, 4> NeedsMoving;
for (Instruction &I :
make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
std::prev(InnerLoopPreHeader->end()))))
I.moveBefore(OuterLoopHeader->getTerminator());
}
Transformed |= adjustLoopLinks();
if (!Transformed) {
LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");

View File

@ -20,11 +20,11 @@ define void @lcssa_08(i32 %n, i32 %m) {
; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP24]], label [[INNER_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: outer.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M:%.*]] to i64
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ]
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M]], 0
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64
; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]]
; CHECK: inner.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64
@ -41,8 +41,9 @@ define void @lcssa_08(i32 %n, i32 %m) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br label [[INNER_CRIT_EDGE:%.*]]
; CHECK: inner.for.body.split:
; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ]
; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]]
; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]]
; CHECK: inner.crit_edge:
; CHECK-NEXT: br label [[OUTER_LATCH]]

View File

@ -0,0 +1,141 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-interchange -S %s | FileCheck %s
@global = external local_unnamed_addr global [2 x [10 x i32]], align 16
; We need to move %tmp4 from the inner loop pre header to the outer loop header
; before interchanging.
define void @test1() local_unnamed_addr #0 {
; CHECK-LABEL: @test1(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[INNER_PH:%.*]]
; CHECK: outer.header.preheader:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ]
; CHECK-NEXT: [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED:%.*]], [[OUTER_HEADER_PREHEADER]] ], [ [[RED_NEXT:%.*]], [[OUTER_LATCH]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
; CHECK-NEXT: br label [[INNER_SPLIT1:%.*]]
; CHECK: inner.ph:
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[TMP0:%.*]], [[INNER_SPLIT:%.*]] ]
; CHECK-NEXT: [[OUTER_RED]] = phi i32 [ [[RED_NEXT_LCSSA:%.*]], [[INNER_SPLIT]] ], [ 0, [[INNER_PH]] ]
; CHECK-NEXT: br label [[OUTER_HEADER_PREHEADER]]
; CHECK: inner.split1:
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
; CHECK-NEXT: [[RED_NEXT]] = or i32 [[INNER_RED]], 20
; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
; CHECK-NEXT: br label [[OUTER_LATCH]]
; CHECK: inner.split:
; CHECK-NEXT: [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[OUTER_LATCH]] ]
; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 400
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[INNER]]
; CHECK: outer.latch:
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
; CHECK-NEXT: br i1 [[EC_2]], label [[INNER_SPLIT]], label [[OUTER_HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
bb:
br label %outer.header
outer.header: ; preds = %bb11, %bb
%outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
%outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
br label %inner.ph
inner.ph: ; preds = %bb1
%tmp4 = add nsw i64 %outer.iv, 9
br label %inner
inner: ; preds = %bb5, %bb3
%inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
%inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
%ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
store i32 0, i32* %ptr
%red.next = or i32 %inner.red, 20
%inner.iv.next = add nsw i64 %inner.iv, 1
%ec.1 = icmp eq i64 %inner.iv.next, 400
br i1 %ec.1, label %outer.latch, label %inner
outer.latch: ; preds = %bb5
%red.next.lcssa = phi i32 [ %red.next, %inner ]
%outer.iv.next = add nsw i64 %outer.iv, 1
%ec.2 = icmp eq i64 %outer.iv.next, 400
br i1 %ec.2, label %exit, label %outer.header
exit: ; preds = %bb11
ret void
}
declare void @side_effect()
; Cannot interchange, as the inner loop preheader contains a call to a function
; with side effects.
define void @test2() {
; CHECK-LABEL: @test2(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ]
; CHECK-NEXT: [[OUTER_RED:%.*]] = phi i32 [ 0, [[BB]] ], [ [[RED_NEXT_LCSSA:%.*]], [[OUTER_LATCH]] ]
; CHECK-NEXT: br label [[INNER_PH:%.*]]
; CHECK: inner.ph:
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
; CHECK-NEXT: call void @side_effect()
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
; CHECK-NEXT: [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED]], [[INNER_PH]] ], [ [[RED_NEXT:%.*]], [[INNER]] ]
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
; CHECK-NEXT: [[RED_NEXT]] = or i32 [[INNER_RED]], 20
; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
; CHECK-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
; CHECK: outer.latch:
; CHECK-NEXT: [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[INNER]] ]
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
bb:
br label %outer.header
outer.header: ; preds = %bb11, %bb
%outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
%outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
br label %inner.ph
inner.ph: ; preds = %bb1
%tmp4 = add nsw i64 %outer.iv, 9
call void @side_effect()
br label %inner
inner: ; preds = %bb5, %bb3
%inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
%inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
%ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
store i32 0, i32* %ptr
%red.next = or i32 %inner.red, 20
%inner.iv.next = add nsw i64 %inner.iv, 1
%ec.1 = icmp eq i64 %inner.iv.next, 400
br i1 %ec.1, label %outer.latch, label %inner
outer.latch: ; preds = %bb5
%red.next.lcssa = phi i32 [ %red.next, %inner ]
%outer.iv.next = add nsw i64 %outer.iv, 1
%ec.2 = icmp eq i64 %outer.iv.next, 400
br i1 %ec.2, label %exit, label %outer.header
exit: ; preds = %bb11
ret void
}