mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-16 23:19:37 +00:00
LoopVectorizer: Fix 15830. When scalarizing and unrolling stores make sure that the order in which the elements are scalarized is the same as the original order.
This fixes a miscompilation in FreeBSD's regex library. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180121 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3d7b39e7d4
commit
a7d9a6ee63
@ -1085,10 +1085,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
|
|||||||
// Create a new entry in the WidenMap and initialize it to Undef or Null.
|
// Create a new entry in the WidenMap and initialize it to Undef or Null.
|
||||||
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
|
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
|
||||||
|
|
||||||
// For each scalar that we create:
|
// For each vector unroll 'part':
|
||||||
for (unsigned Width = 0; Width < VF; ++Width) {
|
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||||
// For each vector unroll 'part':
|
// For each scalar that we create:
|
||||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
for (unsigned Width = 0; Width < VF; ++Width) {
|
||||||
Instruction *Cloned = Instr->clone();
|
Instruction *Cloned = Instr->clone();
|
||||||
if (!IsVoidRetTy)
|
if (!IsVoidRetTy)
|
||||||
Cloned->setName(Instr->getName() + ".cloned");
|
Cloned->setName(Instr->getName() + ".cloned");
|
||||||
|
36
test/Transforms/LoopVectorize/bsd_regex.ll
Normal file
36
test/Transforms/LoopVectorize/bsd_regex.ll
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s
|
||||||
|
|
||||||
|
;PR 15830.
|
||||||
|
|
||||||
|
;CHECK: foo
|
||||||
|
; When scalarizing stores we need to preserve the original order.
|
||||||
|
; Make sure that we are extracting in the correct order (0101, and not 0011).
|
||||||
|
;CHECK: extractelement <2 x i64> {{.*}}, i32 0
|
||||||
|
;CHECK: extractelement <2 x i64> {{.*}}, i32 1
|
||||||
|
;CHECK: extractelement <2 x i64> {{.*}}, i32 0
|
||||||
|
;CHECK: extractelement <2 x i64> {{.*}}, i32 1
|
||||||
|
;CHECK: store
|
||||||
|
;CHECK: store
|
||||||
|
;CHECK: store
|
||||||
|
;CHECK: store
|
||||||
|
;CHECK: ret
|
||||||
|
|
||||||
|
define i32 @foo(i32* nocapture %A) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%0 = shl nsw i64 %indvars.iv, 2
|
||||||
|
%arrayidx = getelementptr inbounds i32* %A, i64 %0
|
||||||
|
store i32 4, i32* %arrayidx, align 4
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp eq i32 %lftr.wideiv, 10000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret i32 undef
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user