From a16ecd430057db4513e3a891f4d639ea08555a9b Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Thu, 14 Apr 2016 17:42:47 +0000 Subject: [PATCH] [DivergenceAnalysis] Treat PHI with incoming undef as constant Summary: If a PHI has an incoming undef, we can pretend that it is equal to one non-undef, non-self incoming value. This is particularly relevant in combination with the StructurizeCFG pass, which introduces PHI nodes with undefs. Previously, this lead to branch conditions that were uniform before StructurizeCFG to become non-uniform afterwards, which confused the SIAnnotateControlFlow pass. This fixes a crash when Mesa radeonsi compiles a shader from dEQP-GLES3.functional.shaders.switch.switch_in_for_loop_dynamic_vertex Reviewers: arsenm, tstellarAMD, jingyue Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D19013 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266347 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/Instructions.h | 5 +++ lib/Analysis/DivergenceAnalysis.cpp | 2 +- lib/IR/Instructions.cpp | 18 ++++++++ .../DivergenceAnalysis/AMDGPU/phi-undef.ll | 28 +++++++++++++ test/CodeGen/AMDGPU/branch-uniformity.ll | 41 +++++++++++++++++++ 5 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll create mode 100644 test/CodeGen/AMDGPU/branch-uniformity.ll diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index df3bb7158b8..7ff7900ad51 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -2693,6 +2693,11 @@ public: /// same value, return the value, otherwise return null. Value *hasConstantValue() const; + /// hasConstantOrUndefValue - Whether the specified PHI node always merges + /// together the same value, assuming undefs are equal to a unique + /// non-undef value. + bool hasConstantOrUndefValue() const; + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::PHI; diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 0e1cfcfe259..fd32b57c89e 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -146,7 +146,7 @@ void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) { for (auto I = IPostDom->begin(); isa(I); ++I) { // A PHINode is uniform if it returns the same value no matter which path is // taken. - if (!cast(I)->hasConstantValue() && DV.insert(&*I).second) + if (!cast(I)->hasConstantOrUndefValue() && DV.insert(&*I).second) Worklist.push_back(&*I); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index fcc9cc4ab2e..ac43eef78ef 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -154,6 +154,24 @@ Value *PHINode::hasConstantValue() const { return ConstantValue; } +/// hasConstantOrUndefValue - Whether the specified PHI node always merges +/// together the same value, assuming that undefs result in the same value as +/// non-undefs. +/// Unlike \ref hasConstantValue, this does not return a value because the +/// unique non-undef incoming value need not dominate the PHI node. +bool PHINode::hasConstantOrUndefValue() const { + Value *ConstantValue = nullptr; + for (unsigned i = 0, e = getNumIncomingValues(); i != e; ++i) { + Value *Incoming = getIncomingValue(i); + if (Incoming != this && !isa(Incoming)) { + if (ConstantValue && ConstantValue != Incoming) + return false; + ConstantValue = Incoming; + } + } + return true; +} + //===----------------------------------------------------------------------===// // LandingPadInst Implementation //===----------------------------------------------------------------------===// diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll new file mode 100644 index 00000000000..da9c3a9391b --- /dev/null +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll @@ -0,0 +1,28 @@ +; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s + +; CHECK-LABEL: 'test1': +; CHECK-NEXT: DIVERGENT: i32 %bound +; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound +; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body +; CHECK-NEXT: DIVERGENT: br i1 %break, label %end, label %header +; Note: %counter is not divergent! +define amdgpu_ps void @test1(i32 %bound) { +entry: + br label %header + +header: + %counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ] + %break = icmp sge i32 %counter, %bound + br i1 %break, label %footer, label %body + +body: + %counter.next = add i32 %counter, 1 + br label %footer + +footer: + %counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ] + br i1 %break, label %end, label %header + +end: + ret void +} diff --git a/test/CodeGen/AMDGPU/branch-uniformity.ll b/test/CodeGen/AMDGPU/branch-uniformity.ll new file mode 100644 index 00000000000..d1a1f93f021 --- /dev/null +++ b/test/CodeGen/AMDGPU/branch-uniformity.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=amdgcn-- < %s | FileCheck %s + +; The branch instruction in LOOP49 has a uniform condition, but PHI instructions +; introduced by the structurizecfg pass previously caused a false divergence +; which ended up in an assertion (or incorrect code) because +; SIAnnotateControlFlow and structurizecfg had different ideas about which +; branches are uniform. +; +; CHECK-LABEL: {{^}}main: +; CHECK: ; %LOOP49 +; CHECK: v_cmp_ne_i32_e32 vcc, +; CHECK: s_cbranch_vccnz +; CHECK: ; %ENDIF53 +define amdgpu_vs float @main(i32 %in) { +main_body: + %cmp = mul i32 %in, 2 + br label %LOOP + +LOOP: ; preds = %ENDLOOP48, %main_body + %counter = phi i32 [ 0, %main_body ], [ %counter.next, %ENDLOOP48 ] + %v.LOOP = phi i32 [ 0, %main_body ], [ %v.ENDLOOP48, %ENDLOOP48 ] + %tmp7 = icmp slt i32 %cmp, %counter + br i1 %tmp7, label %IF, label %LOOP49 + +IF: ; preds = %LOOP + %r = bitcast i32 %v.LOOP to float + ret float %r + +LOOP49: ; preds = %LOOP + %tmp8 = icmp ne i32 %counter, 0 + br i1 %tmp8, label %ENDLOOP48, label %ENDIF53 + +ENDLOOP48: ; preds = %ENDIF53, %LOOP49 + %v.ENDLOOP48 = phi i32 [ %v.LOOP, %LOOP49 ], [ %v.ENDIF53, %ENDIF53 ] + %counter.next = add i32 %counter, 1 + br label %LOOP + +ENDIF53: ; preds = %LOOP49 + %v.ENDIF53 = add i32 %v.LOOP, %counter + br label %ENDLOOP48 +}