mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-27 21:50:29 +00:00
815b02e9ce
two fixes with one about error verify-regalloc reported, and another about live range update of phi after rematerialization. r265547: Replace analyzeSiblingValues with new algorithm to fix its compile time issue. The patch is to solve PR17409 and its duplicates. analyzeSiblingValues is a N x N complexity algorithm where N is the number of siblings generated by reg splitting. Although it causes siginificant compile time issue when N is large, it is also important for performance since it removes redundent spills and enables rematerialization. To solve the compile time issue, the patch removes analyzeSiblingValues and replaces it with lower cost alternatives containing two parts. The first part creates a new spill hoisting method in postOptimization of register allocation. It does spill hoisting at once after all the spills are generated instead of inside every instance of selectOrSplit. The second part queries the define expr of the original register for rematerializaiton and keep it always available during register allocation even if it is already dead. It deletes those dead instructions only in postOptimization. With the two parts in the patch, it can remove analyzeSiblingValues without sacrificing performance. Patches on top of r265547: r265610 "Fix the compare-clang diff error introduced by r265547." r265639 "Fix the sanitizer bootstrap error in r265547." r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]" Differential Revision: http://reviews.llvm.org/D15302 Differential Revision: http://reviews.llvm.org/D18934 Differential Revision: http://reviews.llvm.org/D18935 Differential Revision: http://reviews.llvm.org/D18936 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266162 91177308-0d34-0410-b5e6-96231b3b80d8
71 lines
3.0 KiB
LLVM
71 lines
3.0 KiB
LLVM
; RUN: llc -verify-regalloc < %s | FileCheck %s
|
|
; Check all spills are rematerialized.
|
|
; CHECK-NOT: Spill
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
@b = common global double 0.000000e+00, align 8
|
|
@a = common global i32 0, align 4
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define i32 @uniform_testdata(i32 %p1) {
|
|
entry:
|
|
%cmp3 = icmp sgt i32 %p1, 0
|
|
br i1 %cmp3, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%tmp = add i32 %p1, -1
|
|
%xtraiter = and i32 %p1, 7
|
|
%lcmp.mod = icmp eq i32 %xtraiter, 0
|
|
br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader
|
|
|
|
for.body.prol.preheader: ; preds = %for.body.preheader
|
|
br label %for.body.prol
|
|
|
|
for.body.prol: ; preds = %for.body.prol, %for.body.prol.preheader
|
|
%i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
|
|
%prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ]
|
|
%tmp1 = load double, double* @b, align 8
|
|
%call.prol = tail call double @pow(double %tmp1, double 2.500000e-01)
|
|
%inc.prol = add nuw nsw i32 %i.04.prol, 1
|
|
%prol.iter.sub = add i32 %prol.iter, -1
|
|
%prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0
|
|
br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol
|
|
|
|
for.body.preheader.split.loopexit: ; preds = %for.body.prol
|
|
%inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ]
|
|
br label %for.body.preheader.split
|
|
|
|
for.body.preheader.split: ; preds = %for.body.preheader.split.loopexit, %for.body.preheader
|
|
%i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ]
|
|
%tmp2 = icmp ult i32 %tmp, 7
|
|
br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split
|
|
|
|
for.body.preheader.split.split: ; preds = %for.body.preheader.split
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.body.preheader.split.split
|
|
%i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ]
|
|
%tmp3 = load double, double* @b, align 8
|
|
%call = tail call double @pow(double %tmp3, double 2.500000e-01)
|
|
%tmp4 = load double, double* @b, align 8
|
|
%call.1 = tail call double @pow(double %tmp4, double 2.500000e-01)
|
|
%inc.7 = add nsw i32 %i.04, 8
|
|
%exitcond.7 = icmp eq i32 %inc.7, %p1
|
|
br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body
|
|
|
|
for.end.loopexit.unr-lcssa: ; preds = %for.body
|
|
br label %for.end.loopexit
|
|
|
|
for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
%tmp5 = load i32, i32* @a, align 4
|
|
ret i32 %tmp5
|
|
}
|
|
|
|
; Function Attrs: nounwind
|
|
declare double @pow(double, double)
|