Fix PR19657 (scalar loads not combined into vector load)

If we have common uses on separate paths in the tree; process the one with greater common depth first.
This makes sure that we do not assume we need to extract a load when it is actually going to be part of a vectorized tree.

Review: http://reviews.llvm.org/D3800


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210310 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Karthik Bhat 2014-06-06 06:20:08 +00:00
parent dc4c7fb098
commit b73a9b384e
2 changed files with 87 additions and 2 deletions

View File

@ -914,8 +914,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
buildTree_rec(Left, Depth + 1);
buildTree_rec(Right, Depth + 1);
BasicBlock *LeftBB = getSameBlock(Left);
BasicBlock *RightBB = getSameBlock(Right);
// If we have common uses on separate paths in the tree make sure we
// process the one with greater common depth first.
// We can use block numbering to determine the subtree traversal as
// earler user has to come in between the common use and the later user.
if (LeftBB && RightBB && LeftBB == RightBB &&
getLastIndex(Right) > getLastIndex(Left)) {
buildTree_rec(Right, Depth + 1);
buildTree_rec(Left, Depth + 1);
} else {
buildTree_rec(Left, Depth + 1);
buildTree_rec(Right, Depth + 1);
}
return;
}

View File

@ -0,0 +1,73 @@
; RUN: opt < %s -O1 -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
;CHECK: load <2 x double>*
;CHECK: fadd <2 x double>
;CHECK: store <2 x double>
; Function Attrs: nounwind uwtable
define void @foo(double* %x) #0 {
%1 = alloca double*, align 8
store double* %x, double** %1, align 8
%2 = load double** %1, align 8
%3 = getelementptr inbounds double* %2, i64 0
%4 = load double* %3, align 8
%5 = load double** %1, align 8
%6 = getelementptr inbounds double* %5, i64 0
%7 = load double* %6, align 8
%8 = fadd double %4, %7
%9 = load double** %1, align 8
%10 = getelementptr inbounds double* %9, i64 0
%11 = load double* %10, align 8
%12 = fadd double %8, %11
%13 = load double** %1, align 8
%14 = getelementptr inbounds double* %13, i64 0
store double %12, double* %14, align 8
%15 = load double** %1, align 8
%16 = getelementptr inbounds double* %15, i64 1
%17 = load double* %16, align 8
%18 = load double** %1, align 8
%19 = getelementptr inbounds double* %18, i64 1
%20 = load double* %19, align 8
%21 = fadd double %17, %20
%22 = load double** %1, align 8
%23 = getelementptr inbounds double* %22, i64 1
%24 = load double* %23, align 8
%25 = fadd double %21, %24
%26 = load double** %1, align 8
%27 = getelementptr inbounds double* %26, i64 1
store double %25, double* %27, align 8
%28 = load double** %1, align 8
%29 = getelementptr inbounds double* %28, i64 2
%30 = load double* %29, align 8
%31 = load double** %1, align 8
%32 = getelementptr inbounds double* %31, i64 2
%33 = load double* %32, align 8
%34 = fadd double %30, %33
%35 = load double** %1, align 8
%36 = getelementptr inbounds double* %35, i64 2
%37 = load double* %36, align 8
%38 = fadd double %34, %37
%39 = load double** %1, align 8
%40 = getelementptr inbounds double* %39, i64 2
store double %38, double* %40, align 8
%41 = load double** %1, align 8
%42 = getelementptr inbounds double* %41, i64 3
%43 = load double* %42, align 8
%44 = load double** %1, align 8
%45 = getelementptr inbounds double* %44, i64 3
%46 = load double* %45, align 8
%47 = fadd double %43, %46
%48 = load double** %1, align 8
%49 = getelementptr inbounds double* %48, i64 3
%50 = load double* %49, align 8
%51 = fadd double %47, %50
%52 = load double** %1, align 8
%53 = getelementptr inbounds double* %52, i64 3
store double %51, double* %53, align 8
ret void
}
attributes #0 = { nounwind }