Wei Mi 3e13826e61 [LSR] Canonicalize formula and put recursive Reg related with current loop in ScaledReg.
After rL294814, LSR formula can have multiple SCEVAddRecExprs inside of its BaseRegs.
Previous canonicalization will swap the first SCEVAddRecExpr in BaseRegs with ScaledReg.
But now we want to swap the SCEVAddRecExpr Reg related with current loop with ScaledReg.
Otherwise, we may generate code like this: RegA + lsr.iv + RegB, where loop invariant
parts RegA and RegB are not grouped together and cannot be promoted outside of loop.
With this patch, it will ensure lsr.iv to be generated later in the expr:
RegA + RegB + lsr.iv, so that RegA + RegB can be promoted outside of loop.

Differential Revision: https://reviews.llvm.org/D26781


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295884 91177308-0d34-0410-b5e6-96231b3b80d8
2017-02-22 21:47:08 +00:00

66 lines
2.9 KiB
LLVM

; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s | FileCheck %s
; Check LSR formula canonicalization will put loop invariant regs before
; induction variable of current loop, so exprs involving loop invariant regs
; can be promoted outside of current loop.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(i32 %size, i32 %nsteps, i8* nocapture %maxarray, i8* nocapture readnone %buffer, i32 %init) local_unnamed_addr #0 {
entry:
%cmp25 = icmp sgt i32 %nsteps, 0
br i1 %cmp25, label %for.cond1.preheader.lr.ph, label %for.end12
for.cond1.preheader.lr.ph: ; preds = %entry
%cmp223 = icmp sgt i32 %size, 1
%t0 = sext i32 %init to i64
%wide.trip.count = zext i32 %size to i64
%wide.trip.count31 = zext i32 %nsteps to i64
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc10, %for.cond1.preheader.lr.ph
%indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.inc10 ]
br i1 %cmp223, label %for.body3.lr.ph, label %for.inc10
for.body3.lr.ph: ; preds = %for.cond1.preheader
%t1 = add nsw i64 %indvars.iv28, %t0
%t2 = trunc i64 %indvars.iv28 to i8
br label %for.body3
; Make sure loop invariant items are grouped together so that load address can
; be represented in one getelementptr.
; CHECK-LABEL: for.body3:
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ 1, %for.body3.lr.ph ], [ {{.*}}, %for.body3 ]
; CHECK-NOT: = phi i64
; CHECK-NEXT: [[LOADADDR:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
; CHECK-NEXT: = load i8, i8* [[LOADADDR]], align 1
; CHECK: br i1 %exitcond, label %for.inc10.loopexit, label %for.body3
for.body3: ; preds = %for.body3, %for.body3.lr.ph
%indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
%t5 = trunc i64 %indvars.iv to i8
%t3 = add nsw i64 %t1, %indvars.iv
%arrayidx = getelementptr inbounds i8, i8* %maxarray, i64 %t3
%t4 = load i8, i8* %arrayidx, align 1
%add5 = add i8 %t4, %t5
%add6 = add i8 %add5, %t2
%arrayidx9 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv
store i8 %add6, i8* %arrayidx9, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.inc10.loopexit, label %for.body3
for.inc10.loopexit: ; preds = %for.body3
br label %for.inc10
for.inc10: ; preds = %for.inc10.loopexit, %for.cond1.preheader
%indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1
%exitcond32 = icmp eq i64 %indvars.iv.next29, %wide.trip.count31
br i1 %exitcond32, label %for.end12.loopexit, label %for.cond1.preheader
for.end12.loopexit: ; preds = %for.inc10
br label %for.end12
for.end12: ; preds = %for.end12.loopexit, %entry
ret void
}