mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-10 22:46:25 +00:00
Avoid creating canonical induction variables for non-native types.
For example, on 32-bit architecture, don't promote all uses of the IV to 64-bits just because one use is a 64-bit cast. Alternate implementation of the patch by Arnaud de Grandmaison. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127884 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
84161c2f68
commit
5fd5b125ff
@ -28,6 +28,7 @@ class IVUsers;
|
||||
class ScalarEvolution;
|
||||
class SCEV;
|
||||
class IVUsers;
|
||||
class TargetData;
|
||||
|
||||
/// IVStrideUse - Keep track of one use of a strided induction variable.
|
||||
/// The Expr member keeps track of the expression, User is the actual user
|
||||
@ -122,6 +123,7 @@ class IVUsers : public LoopPass {
|
||||
LoopInfo *LI;
|
||||
DominatorTree *DT;
|
||||
ScalarEvolution *SE;
|
||||
TargetData *TD;
|
||||
SmallPtrSet<Instruction*,16> Processed;
|
||||
|
||||
/// IVUses - A list of all tracked IV uses of induction variable expressions
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Assembly/Writer.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
@ -83,7 +84,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
|
||||
return false; // Void and FP expressions cannot be reduced.
|
||||
|
||||
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
|
||||
if (SE->getTypeSizeInBits(I->getType()) > 64)
|
||||
// Also avoid creating IVs of non-native types. For example, we don't want a
|
||||
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
|
||||
uint64_t Width = SE->getTypeSizeInBits(I->getType());
|
||||
if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
|
||||
return false;
|
||||
|
||||
if (!Processed.insert(I))
|
||||
@ -167,6 +171,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
|
||||
LI = &getAnalysis<LoopInfo>();
|
||||
DT = &getAnalysis<DominatorTree>();
|
||||
SE = &getAnalysis<ScalarEvolution>();
|
||||
TD = getAnalysisIfAvailable<TargetData>();
|
||||
|
||||
// Find all uses of induction variables in this loop, and categorize
|
||||
// them by stride. Start by finding all of the PHI nodes in the header for
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: opt < %s -indvars -S | not grep {sext}
|
||||
; ModuleID = '<stdin>'
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
target triple = "x86_64-apple-darwin9.6"
|
||||
@a = external global i32* ; <i32**> [#uses=3]
|
||||
@b = external global i32* ; <i32**> [#uses=3]
|
||||
|
@ -13,7 +13,7 @@
|
||||
; d[(i+2)&15] = e[(i+2)&15]+f[(i+2)&15]+K[i+2];
|
||||
; }
|
||||
;}
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
target triple = "x86_64-apple-darwin9.6"
|
||||
@a = external global i32* ; <i32**> [#uses=3]
|
||||
@b = external global i32* ; <i32**> [#uses=3]
|
||||
|
@ -11,7 +11,7 @@
|
||||
; count without casting.
|
||||
|
||||
; ModuleID = 'ada.bc'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32"
|
||||
target triple = "i686-pc-linux-gnu"
|
||||
|
||||
define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind {
|
||||
|
@ -9,7 +9,7 @@
|
||||
; be able to reconstruct the full getelementptr, despite it having a few
|
||||
; obstacles set in its way.
|
||||
|
||||
target datalayout = "e-p:64:64:64"
|
||||
target datalayout = "e-p:64:64:64-n:32:64"
|
||||
|
||||
define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
|
||||
entry:
|
||||
|
@ -4,7 +4,7 @@
|
||||
; Indvars should be able to eliminate all of the sign extensions
|
||||
; inside the loop.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
@pow_2_tab = external constant [0 x float] ; <[0 x float]*> [#uses=1]
|
||||
@pow_2_025_tab = external constant [0 x float] ; <[0 x float]*> [#uses=1]
|
||||
@i_pow_2_tab = external constant [0 x float] ; <[0 x float]*> [#uses=1]
|
||||
|
@ -6,7 +6,7 @@
|
||||
; inner loop to i64.
|
||||
; TODO: it should promote hiPart to i64 in the outer loop too.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
|
||||
define void @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bandEdgeIndex, float %tmp1) nounwind {
|
||||
entry:
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: not grep and %t
|
||||
; RUN: not grep zext %t
|
||||
|
||||
target datalayout = "-p:64:64:64"
|
||||
target datalayout = "-p:64:64:64-n:32:64"
|
||||
|
||||
define void @foo(double* %d, i64 %n) nounwind {
|
||||
entry:
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: grep {icmp ugt i8\\\*} %t | count 1
|
||||
; RUN: grep {icmp sgt i8\\\*} %t | count 1
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
|
||||
%struct.CKenCodeCodec = type <{ i8 }>
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
; Indvars should be able to expand the pointer-arithmetic
|
||||
; IV into an integer IV indexing into a simple getelementptr.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-n:32:64"
|
||||
|
||||
define void @foo(i8* %A, i64 %n) nounwind {
|
||||
entry:
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: not grep inttoptr %t
|
||||
; RUN: not grep ptrtoint %t
|
||||
; RUN: grep scevgep %t
|
||||
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
|
||||
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n:32:64"
|
||||
|
||||
; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
; FIXME: This test should pass with or without TargetData. Until opt
|
||||
; supports running tests without targetdata, just hardware this in.
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
|
||||
%struct.Q = type { [10 x %struct.N] }
|
||||
%struct.N = type { %struct.S }
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: opt < %s -indvars -S \
|
||||
; RUN: | grep {\[%\]p.2.ip.1 = getelementptr \\\[3 x \\\[3 x double\\\]\\\]\\* \[%\]p, i64 2, i64 \[%\]tmp, i64 1}
|
||||
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
|
||||
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n:32:64"
|
||||
|
||||
; Indvars shouldn't expand this to
|
||||
; %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19
|
||||
|
@ -6,7 +6,7 @@
|
||||
; Indvars shouldn't leave getelementptrs expanded out as
|
||||
; inttoptr+ptrtoint in its output in common cases.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
%struct.Foo = type { i32, i32, [10 x i32], i32 }
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
; that indvars can promote the induction variable to i64
|
||||
; without needing casts.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
|
||||
|
||||
define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
|
||||
entry:
|
||||
|
@ -1,7 +1,8 @@
|
||||
; Check that the index of 'P[outer]' is pulled out of the loop.
|
||||
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
|
||||
; RUN: opt < %s -loop-reduce -S | \
|
||||
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
|
||||
|
||||
target datalayout = "e-p:32:32:32-n:8:16:32"
|
||||
declare i1 @pred()
|
||||
|
||||
declare i32 @foo()
|
||||
|
@ -1,7 +1,8 @@
|
||||
; Check that the index of 'P[outer]' is pulled out of the loop.
|
||||
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
|
||||
; RUN: opt < %s -loop-reduce -S | \
|
||||
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
|
||||
|
||||
target datalayout = "e-p:32:32:32-n:32"
|
||||
declare i1 @pred()
|
||||
|
||||
define void @test([10000 x i32]* %P, i32 %outer) {
|
||||
|
@ -1,7 +1,9 @@
|
||||
; Check that this test makes INDVAR and related stuff dead, because P[indvar]
|
||||
; gets reduced, making INDVAR dead.
|
||||
|
||||
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | not grep INDVAR
|
||||
; RUN: opt < %s -loop-reduce -S | not grep INDVAR
|
||||
|
||||
target datalayout = "e-p:32:32:32-n:32"
|
||||
|
||||
declare i1 @pred()
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
; mul uint %i, 3
|
||||
|
||||
target datalayout = "e-p:32:32"
|
||||
target datalayout = "e-p:32:32-n:32"
|
||||
target triple = "i686-apple-darwin8"
|
||||
@flags2 = external global [8193 x i8], align 32 ; <[8193 x i8]*> [#uses=1]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user