mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-06 12:04:48 +00:00
LoopVectorize: Fix a bug in the scalarization of instructions.
Before if-conversion we could check if a value is loop invariant if it was declared inside the basic block. Now that loops have multiple blocks this check is incorrect. This fixes External/SPEC/CINT95/099_go/099_go git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170756 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
139e407d52
commit
8386acd734
@ -284,7 +284,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
|
|||||||
|
|
||||||
// If the src is an instruction that appeared earlier in the basic block
|
// If the src is an instruction that appeared earlier in the basic block
|
||||||
// then it should already be vectorized.
|
// then it should already be vectorized.
|
||||||
if (SrcInst && SrcInst->getParent() == Instr->getParent()) {
|
if (SrcInst && OrigLoop->contains(SrcInst)) {
|
||||||
assert(WidenMap.count(SrcInst) && "Source operand is unavailable");
|
assert(WidenMap.count(SrcInst) && "Source operand is unavailable");
|
||||||
// The parameter is a vector value from earlier.
|
// The parameter is a vector value from earlier.
|
||||||
Params.push_back(WidenMap[SrcInst]);
|
Params.push_back(WidenMap[SrcInst]);
|
||||||
|
48
test/Transforms/LoopVectorize/scalar-store.ll
Normal file
48
test/Transforms/LoopVectorize/scalar-store.ll
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S -enable-if-conversion | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-macosx10.9.0"
|
||||||
|
|
||||||
|
; When we scalarize a store, make sure that the addresses are extracted
|
||||||
|
; from a vector. We had a bug where the addresses were the old scalar
|
||||||
|
; addresses.
|
||||||
|
|
||||||
|
; CHECK: @foo
|
||||||
|
; CHECK: select
|
||||||
|
; CHECK: extractelement
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK: extractelement
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK: extractelement
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK: extractelement
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK: ret
|
||||||
|
define i32 @foo(i32* nocapture %a) nounwind uwtable ssp {
|
||||||
|
br label %1
|
||||||
|
|
||||||
|
; <label>:1 ; preds = %7, %0
|
||||||
|
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
|
||||||
|
%2 = mul nsw i64 %indvars.iv, 7
|
||||||
|
%3 = getelementptr inbounds i32* %a, i64 %2
|
||||||
|
%4 = load i32* %3, align 4
|
||||||
|
%5 = icmp sgt i32 %4, 3
|
||||||
|
br i1 %5, label %6, label %7
|
||||||
|
|
||||||
|
; <label>:6 ; preds = %1
|
||||||
|
%tmp = add i32 %4, 4
|
||||||
|
%tmp1 = mul i32 %tmp, %4
|
||||||
|
br label %7
|
||||||
|
|
||||||
|
; <label>:7 ; preds = %6, %1
|
||||||
|
%x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
|
||||||
|
%8 = add nsw i32 %x.0, 3
|
||||||
|
store i32 %8, i32* %3, align 4
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp eq i32 %lftr.wideiv, 256
|
||||||
|
br i1 %exitcond, label %9, label %1
|
||||||
|
|
||||||
|
; <label>:9 ; preds = %7
|
||||||
|
ret i32 0
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user