mirror of
https://github.com/RPCS3/llvm.git
synced 2025-03-03 16:18:37 +00:00
[SLP vectorizer] Allow phi node reordering in tryToVectorizeList.
In tryToVectorizeList, under a very limited circumstance (when entered from tryToVectorizePair), the values may be reordered (swapped) and the SLP tree is built with the new order. This extends that to the case when starting from phis in vectorizeChainsInBlock when there are exactly two phis. The textual order of phi nodes shouldn't really matter. Without this change, the loop body in the accompnaying test case is fully vectorized when we swap the orde of the phis but not with this order. While this doesn't solve the phi-ordering problem in a general way (for more than 2 phis), this is simple fix that piggybacks on an existing mechanism and is useful in cases like multiplying two complex numbers. Differential revision: https://reviews.llvm.org/D32065 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300574 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
248df79ddd
commit
6e087c5152
@ -4146,8 +4146,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
if (AllowReorder && R.shouldReorder()) {
|
||||
// Conceptually, there is nothing actually preventing us from trying to
|
||||
// reorder a larger list. In fact, we do exactly this when vectorizing
|
||||
// reductions. However, at this point, we only expect to get here from
|
||||
// tryToVectorizePair().
|
||||
// reductions. However, at this point, we only expect to get here when
|
||||
// there are exactly two operations.
|
||||
assert(Ops.size() == 2);
|
||||
assert(BuildVectorSlice.empty());
|
||||
Value *ReorderedOps[] = {Ops[1], Ops[0]};
|
||||
@ -4904,7 +4904,13 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
||||
// Try to vectorize them.
|
||||
unsigned NumElts = (SameTypeIt - IncIt);
|
||||
DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
|
||||
if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {
|
||||
// The order in which the phi nodes appear in the program does not matter.
|
||||
// So allow tryToVectorizeList to reorder them if it is beneficial. This
|
||||
// is done when there are exactly two elements since tryToVectorizeList
|
||||
// asserts that there are only two values when AllowReorder is true.
|
||||
bool AllowReorder = NumElts == 2;
|
||||
if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
|
||||
None, AllowReorder)) {
|
||||
// Success start over because instructions might have been changed.
|
||||
HaveVectorizedPhiNodes = true;
|
||||
Changed = true;
|
||||
|
54
test/Transforms/SLPVectorizer/X86/reorder_phi.ll
Normal file
54
test/Transforms/SLPVectorizer/X86/reorder_phi.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
%struct.complex = type { float, float }
|
||||
|
||||
; CHECK-LABEL: void @foo
|
||||
define void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %Result) {
|
||||
|
||||
entry:
|
||||
%0 = add i64 256, 0
|
||||
br label %loop
|
||||
|
||||
; CHECK-LABEL: loop
|
||||
; CHECK: [[REG0:%[0-9]+]] = phi <2 x float> {{.*}}[ [[REG1:%[0-9]+]], %loop ]
|
||||
; CHECK: [[REG2:%[0-9]+]] = load <2 x float>, <2 x float>*
|
||||
; CHECK: [[REG3:%[0-9]+]] = fmul <2 x float> [[REG2]]
|
||||
; CHECK: [[REG4:%[0-9]+]] = fmul <2 x float>
|
||||
; CHECK: fsub <2 x float> [[REG3]], [[REG4]]
|
||||
; CHECK: fadd <2 x float> [[REG3]], [[REG4]]
|
||||
; CHECK: shufflevector <2 x float>
|
||||
; CHECK: [[REG1]] = fadd <2 x float>{{.*}}[[REG0]]
|
||||
loop:
|
||||
|
||||
%1 = phi i64 [ 0, %entry ], [ %20, %loop ]
|
||||
%2 = phi float [ 0.000000e+00, %entry ], [ %19, %loop ]
|
||||
%3 = phi float [ 0.000000e+00, %entry ], [ %18, %loop ]
|
||||
%4 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 0
|
||||
%5 = load float, float* %4, align 4
|
||||
%6 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 1
|
||||
%7 = load float, float* %6, align 4
|
||||
%8 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 0
|
||||
%9 = load float, float* %8, align 4
|
||||
%10 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 1
|
||||
%11 = load float, float* %10, align 4
|
||||
%12 = fmul float %5, %9
|
||||
%13 = fmul float %7, %11
|
||||
%14 = fsub float %12, %13
|
||||
%15 = fmul float %7, %9
|
||||
%16 = fmul float %5, %11
|
||||
%17 = fadd float %15, %16
|
||||
%18 = fadd float %3, %14
|
||||
%19 = fadd float %2, %17
|
||||
%20 = add nuw nsw i64 %1, 1
|
||||
%21 = icmp eq i64 %20, %0
|
||||
br i1 %21, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%22 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result, i32 0, i32 0
|
||||
store float %18, float* %22, align 4
|
||||
%23 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result, i32 0, i32 1
|
||||
store float %19, float* %23, align 4
|
||||
|
||||
ret void
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user