From a7d9a6ee63bec70fecea79b85a30108ed3e8fabd Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 23 Apr 2013 17:12:42 +0000 Subject: [PATCH] LoopVectorizer: Fix 15830. When scalarizing and unrolling stores make sure that the order in which the elements are scalarized is the same as the original order. This fixes a miscompilation in FreeBSD's regex library. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++--- test/Transforms/LoopVectorize/bsd_regex.ll | 36 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 test/Transforms/LoopVectorize/bsd_regex.ll diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 162587c5659..0988a4032fd 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1085,10 +1085,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); - // For each scalar that we create: - for (unsigned Width = 0; Width < VF; ++Width) { - // For each vector unroll 'part': - for (unsigned Part = 0; Part < UF; ++Part) { + // For each vector unroll 'part': + for (unsigned Part = 0; Part < UF; ++Part) { + // For each scalar that we create: + for (unsigned Width = 0; Width < VF; ++Width) { Instruction *Cloned = Instr->clone(); if (!IsVoidRetTy) Cloned->setName(Instr->getName() + ".cloned"); diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll new file mode 100644 index 00000000000..a2aef1c368f --- /dev/null +++ b/test/Transforms/LoopVectorize/bsd_regex.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s + +;PR 15830. + +;CHECK: foo +; When scalarizing stores we need to preserve the original order. +; Make sure that we are extracting in the correct order (0101, and not 0011). +;CHECK: extractelement <2 x i64> {{.*}}, i32 0 +;CHECK: extractelement <2 x i64> {{.*}}, i32 1 +;CHECK: extractelement <2 x i64> {{.*}}, i32 0 +;CHECK: extractelement <2 x i64> {{.*}}, i32 1 +;CHECK: store +;CHECK: store +;CHECK: store +;CHECK: store +;CHECK: ret + +define i32 @foo(i32* nocapture %A) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %0 + store i32 4, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} + +