mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-28 22:20:37 +00:00
Fix PR19657 (scalar loads not combined into vector load)
If we have common uses on separate paths in the tree; process the one with greater common depth first. This makes sure that we do not assume we need to extract a load when it is actually going to be part of a vectorized tree. Review: http://reviews.llvm.org/D3800 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210310 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dc4c7fb098
commit
b73a9b384e
@ -914,8 +914,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
|
||||
ValueList Left, Right;
|
||||
reorderInputsAccordingToOpcode(VL, Left, Right);
|
||||
BasicBlock *LeftBB = getSameBlock(Left);
|
||||
BasicBlock *RightBB = getSameBlock(Right);
|
||||
// If we have common uses on separate paths in the tree make sure we
|
||||
// process the one with greater common depth first.
|
||||
// We can use block numbering to determine the subtree traversal as
|
||||
// earler user has to come in between the common use and the later user.
|
||||
if (LeftBB && RightBB && LeftBB == RightBB &&
|
||||
getLastIndex(Right) > getLastIndex(Left)) {
|
||||
buildTree_rec(Right, Depth + 1);
|
||||
buildTree_rec(Left, Depth + 1);
|
||||
} else {
|
||||
buildTree_rec(Left, Depth + 1);
|
||||
buildTree_rec(Right, Depth + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
73
test/Transforms/SLPVectorizer/X86/pr19657.ll
Normal file
73
test/Transforms/SLPVectorizer/X86/pr19657.ll
Normal file
@ -0,0 +1,73 @@
|
||||
; RUN: opt < %s -O1 -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;CHECK: load <2 x double>*
|
||||
;CHECK: fadd <2 x double>
|
||||
;CHECK: store <2 x double>
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @foo(double* %x) #0 {
|
||||
%1 = alloca double*, align 8
|
||||
store double* %x, double** %1, align 8
|
||||
%2 = load double** %1, align 8
|
||||
%3 = getelementptr inbounds double* %2, i64 0
|
||||
%4 = load double* %3, align 8
|
||||
%5 = load double** %1, align 8
|
||||
%6 = getelementptr inbounds double* %5, i64 0
|
||||
%7 = load double* %6, align 8
|
||||
%8 = fadd double %4, %7
|
||||
%9 = load double** %1, align 8
|
||||
%10 = getelementptr inbounds double* %9, i64 0
|
||||
%11 = load double* %10, align 8
|
||||
%12 = fadd double %8, %11
|
||||
%13 = load double** %1, align 8
|
||||
%14 = getelementptr inbounds double* %13, i64 0
|
||||
store double %12, double* %14, align 8
|
||||
%15 = load double** %1, align 8
|
||||
%16 = getelementptr inbounds double* %15, i64 1
|
||||
%17 = load double* %16, align 8
|
||||
%18 = load double** %1, align 8
|
||||
%19 = getelementptr inbounds double* %18, i64 1
|
||||
%20 = load double* %19, align 8
|
||||
%21 = fadd double %17, %20
|
||||
%22 = load double** %1, align 8
|
||||
%23 = getelementptr inbounds double* %22, i64 1
|
||||
%24 = load double* %23, align 8
|
||||
%25 = fadd double %21, %24
|
||||
%26 = load double** %1, align 8
|
||||
%27 = getelementptr inbounds double* %26, i64 1
|
||||
store double %25, double* %27, align 8
|
||||
%28 = load double** %1, align 8
|
||||
%29 = getelementptr inbounds double* %28, i64 2
|
||||
%30 = load double* %29, align 8
|
||||
%31 = load double** %1, align 8
|
||||
%32 = getelementptr inbounds double* %31, i64 2
|
||||
%33 = load double* %32, align 8
|
||||
%34 = fadd double %30, %33
|
||||
%35 = load double** %1, align 8
|
||||
%36 = getelementptr inbounds double* %35, i64 2
|
||||
%37 = load double* %36, align 8
|
||||
%38 = fadd double %34, %37
|
||||
%39 = load double** %1, align 8
|
||||
%40 = getelementptr inbounds double* %39, i64 2
|
||||
store double %38, double* %40, align 8
|
||||
%41 = load double** %1, align 8
|
||||
%42 = getelementptr inbounds double* %41, i64 3
|
||||
%43 = load double* %42, align 8
|
||||
%44 = load double** %1, align 8
|
||||
%45 = getelementptr inbounds double* %44, i64 3
|
||||
%46 = load double* %45, align 8
|
||||
%47 = fadd double %43, %46
|
||||
%48 = load double** %1, align 8
|
||||
%49 = getelementptr inbounds double* %48, i64 3
|
||||
%50 = load double* %49, align 8
|
||||
%51 = fadd double %47, %50
|
||||
%52 = load double** %1, align 8
|
||||
%53 = getelementptr inbounds double* %52, i64 3
|
||||
store double %51, double* %53, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue
Block a user