Prevent the scalarizer from caching incorrect entries

The scalarizer can cache incorrect entries when walking up a chain of
insertelement instructions. This occurs when it encounters more than one
instruction that it is not actively searching for, as it unconditionally caches
every element it finds. The fix is to only cache the first element that it
isn't searching for so we don't overwrite correct entries.

Reviewers: hfinkel

Differential Revision: http://reviews.llvm.org/D11559


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244448 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Fraser Cormack 2015-08-10 14:48:47 +00:00
parent 2295449608
commit 779046e433
2 changed files with 38 additions and 2 deletions

View File

@ -227,10 +227,16 @@ Value *Scatterer::operator[](unsigned I) {
if (!Idx)
break;
unsigned J = Idx->getZExtValue();
CV[J] = Insert->getOperand(1);
V = Insert->getOperand(0);
if (I == J)
if (I == J) {
CV[J] = Insert->getOperand(1);
return CV[J];
} else if (!CV[J]) {
// Only cache the first entry we find for each index we're not actively
// searching for. This prevents us from going too far up the chain and
// caching incorrect entries.
CV[J] = Insert->getOperand(1);
}
}
CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
V->getName() + ".i" + Twine(I));

View File

@ -0,0 +1,30 @@
; RUN: opt -scalarizer -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Check that vector element 1 is scalarized correctly from a chain of
; insertelement instructions
define void @func(i32 %x) {
; CHECK-LABEL: @func(
; CHECK-NOT: phi i32 [ %x, %entry ], [ %inc.pos.y, %loop ]
; CHECK: phi i32 [ %inc, %entry ], [ %inc.pos.y, %loop ]
; CHECK: ret void
entry:
%vecinit = insertelement <2 x i32> <i32 0, i32 0>, i32 %x, i32 1
%inc = add i32 %x, 1
%0 = insertelement <2 x i32> %vecinit, i32 %inc, i32 1
br label %loop
loop:
%pos = phi <2 x i32> [ %0, %entry ], [ %new.pos.y, %loop ]
%i = phi i32 [ 0, %entry ], [ %new.i, %loop ]
%pos.y = extractelement <2 x i32> %pos, i32 1
%inc.pos.y = add i32 %pos.y, 1
%new.pos.y = insertelement <2 x i32> %pos, i32 %inc.pos.y, i32 1
%new.i = add i32 %i, 1
%cmp2 = icmp slt i32 %new.i, 1
br i1 %cmp2, label %loop, label %exit
exit:
ret void
}