mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-16 08:08:01 +00:00
Fix LSR to tolerate cases where ScalarEvolution initially
misses an opportunity to fold add operands, but folds them after LSR has separated them out. This fixes rdar://7886751. llvm-svn: 102157
This commit is contained in:
parent
4196c77b3b
commit
38949c2f1f
@ -16,3 +16,15 @@ In addition to being much more complicated, it involves i65 arithmetic,
|
||||
which is very inefficient when expanded into code.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
In test/CodeGen/X86/lsr-delayed-fold.ll,
|
||||
|
||||
ScalarEvolution is forming this expression:
|
||||
|
||||
((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32)))
|
||||
|
||||
This could be folded to
|
||||
|
||||
(-1 * (trunc i64 undef to i32))
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -2060,8 +2060,11 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
||||
LU.Kind, LU.AccessTy, TLI, SE))
|
||||
continue;
|
||||
|
||||
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
|
||||
if (InnerSum->isZero())
|
||||
continue;
|
||||
Formula F = Base;
|
||||
F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
|
||||
F.BaseRegs[i] = InnerSum;
|
||||
F.BaseRegs.push_back(*J);
|
||||
if (InsertFormula(LU, LUIdx, F))
|
||||
// If that formula hadn't been seen before, recurse to find more like
|
||||
|
28
test/CodeGen/X86/lsr-delayed-fold.ll
Normal file
28
test/CodeGen/X86/lsr-delayed-fold.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: llc -march=x86-64 < %s > /dev/null
|
||||
; rdar://7886751
|
||||
|
||||
; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
|
||||
; but LSR should tolerate this.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin11.0"
|
||||
|
||||
define fastcc void @formatValue(i64 %arg5) nounwind {
|
||||
bb12: ; preds = %bb11
|
||||
%t = trunc i64 %arg5 to i32 ; <i32> [#uses=1]
|
||||
%t13 = sub i64 0, %arg5 ; <i64> [#uses=1]
|
||||
%t14 = and i64 %t13, 4294967295 ; <i64> [#uses=1]
|
||||
br label %bb15
|
||||
|
||||
bb15: ; preds = %bb21, %bb12
|
||||
%t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2]
|
||||
%t17 = mul i64 %t14, %t16 ; <i64> [#uses=1]
|
||||
%t18 = add i64 undef, %t17 ; <i64> [#uses=1]
|
||||
%t19 = trunc i64 %t18 to i32 ; <i32> [#uses=1]
|
||||
%t22 = icmp eq i32 %t19, %t ; <i1> [#uses=1]
|
||||
%t23 = add i64 %t16, 1 ; <i64> [#uses=1]
|
||||
br i1 %t22, label %bb24, label %bb15
|
||||
|
||||
bb24: ; preds = %bb21, %bb11
|
||||
unreachable
|
||||
}
|
Loading…
Reference in New Issue
Block a user