diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 32d22ed5402..a58356542db 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -175,10 +175,12 @@ private: bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride); - unsigned CheckForIVReuse(const SCEVHandle&, IVExpr&, const Type*, + unsigned CheckForIVReuse(bool, const SCEVHandle&, + IVExpr&, const Type*, const std::vector& UsersToProcess); - bool ValidStride(int64_t, const std::vector& UsersToProcess); + bool ValidStride(bool, int64_t, + const std::vector& UsersToProcess); void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, @@ -937,8 +939,8 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses, /// isZero - returns true if the scalar evolution expression is zero. /// -static bool isZero(SCEVHandle &V) { - if (SCEVConstant *SC = dyn_cast(V)) +static bool isZero(const SCEVHandle &V) { + if (const SCEVConstant *SC = dyn_cast(V)) return SC->getValue()->isZero(); return false; } @@ -946,7 +948,8 @@ static bool isZero(SCEVHandle &V) { /// ValidStride - Check whether the given Scale is valid for all loads and /// stores in UsersToProcess. /// -bool LoopStrengthReduce::ValidStride(int64_t Scale, +bool LoopStrengthReduce::ValidStride(bool HasBaseReg, + int64_t Scale, const std::vector& UsersToProcess) { for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { // If this is a load or other access, pass the type of the access in. @@ -959,6 +962,7 @@ bool LoopStrengthReduce::ValidStride(int64_t Scale, TargetLowering::AddrMode AM; if (SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm)) AM.BaseOffs = SC->getValue()->getSExtValue(); + AM.HasBaseReg = HasBaseReg || !isZero(UsersToProcess[i].Base); AM.Scale = Scale; // If load[imm+r*scale] is illegal, bail out. @@ -970,9 +974,11 @@ bool LoopStrengthReduce::ValidStride(int64_t Scale, /// CheckForIVReuse - Returns the multiple if the stride is the multiple /// of a previous stride and it is a legal value for the target addressing -/// mode scale component. This allows the users of this stride to be rewritten -/// as prev iv * factor. It returns 0 if no reuse is possible. -unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, +/// mode scale component and optional base reg. This allows the users of +/// this stride to be rewritten as prev iv * factor. It returns 0 if no +/// reuse is possible. +unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, + const SCEVHandle &Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { if (!TLI) return 0; @@ -992,7 +998,7 @@ unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, // stores; if it can be used for some and not others, we might as well use // the original stride everywhere, since we have to create the IV for it // anyway. - if (ValidStride(Scale, UsersToProcess)) + if (ValidStride(HasBaseReg, Scale, UsersToProcess)) for (std::vector::iterator II = SI->second.IVs.begin(), IE = SI->second.IVs.end(); II != IE; ++II) // FIXME: Only handle base == 0 for now. @@ -1061,7 +1067,18 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // UsersToProcess base values. SCEVHandle CommonExprs = RemoveCommonExpressionsFromUseBases(UsersToProcess, SE); + + // If we managed to find some expressions in common, we'll need to carry + // their value in a register and add it in for each use. This will take up + // a register operand, which potentially restricts what stride values are + // valid. + bool HaveCommonExprs = !isZero(CommonExprs); + // Keep track if every use in UsersToProcess is an address. If they all are, + // we may be able to rewrite the entire collection of them in terms of a + // smaller-stride IV. + bool AllUsesAreAddresses = true; + // Next, figure out what we can represent in the immediate fields of // instructions. If we can represent anything there, move it to the imm // fields of the BasedUsers. We do this so that it increases the commonality @@ -1085,29 +1102,53 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, isAddress = true; } else if (IntrinsicInst *II = dyn_cast(UsersToProcess[i].Inst)) { - // Addressing modes can also be folded into prefetches. - if (II->getIntrinsicID() == Intrinsic::prefetch && - II->getOperand(1) == UsersToProcess[i].OperandValToReplace) - isAddress = true; + // Addressing modes can also be folded into prefetches and a variety + // of intrinsics. + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::prefetch: + case Intrinsic::x86_sse2_loadu_dq: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + case Intrinsic::x86_sse2_storel_dq: + if (II->getOperand(1) == UsersToProcess[i].OperandValToReplace) + isAddress = true; + break; + case Intrinsic::x86_sse2_loadh_pd: + case Intrinsic::x86_sse2_loadl_pd: + if (II->getOperand(2) == UsersToProcess[i].OperandValToReplace) + isAddress = true; + break; + } } + + // If this use isn't an address, then not all uses are addresses. + if (!isAddress) + AllUsesAreAddresses = false; MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, UsersToProcess[i].Imm, isAddress, L, SE); } } - // Check if it is possible to reuse a IV with stride that is factor of this - // stride. And the multiple is a number that can be encoded in the scale - // field of the target addressing mode. And we will have a valid - // instruction after this substition, including the immediate field, if any. + // If all uses are addresses, check if it is possible to reuse an IV with a + // stride that is a factor of this stride. And that the multiple is a number + // that can be encoded in the scale field of the target addressing mode. And + // that we will have a valid instruction after this substition, including the + // immediate field, if any. PHINode *NewPHI = NULL; Value *IncV = NULL; IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty), SE->getIntegerSCEV(0, Type::Int32Ty), 0, 0); - unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV, - CommonExprs->getType(), - UsersToProcess); + unsigned RewriteFactor = 0; + if (AllUsesAreAddresses) + RewriteFactor = CheckForIVReuse(HaveCommonExprs, Stride, ReuseIV, + CommonExprs->getType(), + UsersToProcess); if (RewriteFactor != 0) { DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << " :\n"; diff --git a/test/CodeGen/X86/2007-08-10-LEA16Use32.ll b/test/CodeGen/X86/2007-08-10-LEA16Use32.ll index f6a848255fd..1a0bcf94e5b 100644 --- a/test/CodeGen/X86/2007-08-10-LEA16Use32.ll +++ b/test/CodeGen/X86/2007-08-10-LEA16Use32.ll @@ -1,4 +1,8 @@ ; RUN: llvm-as < %s | llc -march=x86 | grep {leal} +; XFAIL: * +; This test is XFAIL'd because strength-reduction was improved to +; avoid emitting the lea, so it longer tests whether the 16-bit +; lea is avoided. @X = global i16 0 ; [#uses=1] @Y = global i16 0 ; [#uses=1] diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll new file mode 100644 index 00000000000..f443c76015d --- /dev/null +++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll @@ -0,0 +1,34 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep lea | count 1 +; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea + +; For x86 there's an lea above the loop. In both cases, there shouldn't +; be any lea instructions inside the loop. + +@B = external global [1000 x i8], align 32 +@A = external global [1000 x i8], align 32 +@P = external global [1000 x i8], align 32 + +define void @foo(i32 %m, i32 %p) { +entry: + %tmp1 = icmp sgt i32 %m, 0 + br i1 %tmp1, label %bb, label %return + +bb: + %i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] + %tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0 + %tmp3 = load i8* %tmp2, align 4 + %tmp4 = mul i8 %tmp3, 2 + %tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0 + store i8 %tmp4, i8* %tmp5, align 4 + %tmp8 = mul i32 %i.019.0, 9 + %tmp0 = add i32 %tmp8, %p + %tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0 + store i8 17, i8* %tmp10, align 4 + %indvar.next = add i32 %i.019.0, 1 + %exitcond = icmp eq i32 %indvar.next, %m + br i1 %exitcond, label %return, label %bb + +return: + ret void +} + diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll new file mode 100644 index 00000000000..97f33d8adbc --- /dev/null +++ b/test/CodeGen/X86/stride-reuse.ll @@ -0,0 +1,30 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep lea +; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea + +@B = external global [1000 x float], align 32 +@A = external global [1000 x float], align 32 +@P = external global [1000 x i32], align 32 + +define void @foo(i32 %m) { +entry: + %tmp1 = icmp sgt i32 %m, 0 + br i1 %tmp1, label %bb, label %return + +bb: + %i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] + %tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0 + %tmp3 = load float* %tmp2, align 4 + %tmp4 = mul float %tmp3, 2.000000e+00 + %tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0 + store float %tmp4, float* %tmp5, align 4 + %tmp8 = shl i32 %i.019.0, 1 + %tmp9 = add i32 %tmp8, 64 + %tmp10 = getelementptr [1000 x i32]* @P, i32 0, i32 %i.019.0 + store i32 %tmp9, i32* %tmp10, align 4 + %indvar.next = add i32 %i.019.0, 1 + %exitcond = icmp eq i32 %indvar.next, %m + br i1 %exitcond, label %return, label %bb + +return: + ret void +}