Make physreg coalescing independent on the number of uses of the virtual register.

The damage done by physreg coalescing only depends on the number of instructions
the extended physreg live range covers. This fixes PR9438.

The heuristic is still luck-based, and physreg coalescing really should be
disabled completely. We need a register allocator with better hinting support
before that is possible.

Convert a test to FileCheck and force spilling by inserting an extra call. The
previous spilling behavior was dependent on misguided physreg coalescing
decisions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127351 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jakob Stoklund Olesen 2011-03-09 19:27:06 +00:00
parent dda386c44d
commit 5d96e5a1cc
3 changed files with 37 additions and 6 deletions

View File

@ -1038,9 +1038,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
if (Length > Threshold &&
std::distance(mri_->use_nodbg_begin(CP.getSrcReg()),
mri_->use_nodbg_end()) * Threshold < Length) {
if (Length > Threshold) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
if (!CP.isFlipped() &&

View File

@ -0,0 +1,22 @@
; RUN: llc -mcpu=yonah < %s
; PR9438
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-unknown-freebsd9.0"
; The 'call fastcc' ties down %ebx, %ecx, and %edx.
; A MUL8r ties down %al, leaving no GR32_ABCD registers available.
; The coalescer can easily overallocate physical registers,
; and register allocation fails.
declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind
define i32 @cvtchar(i8* nocapture %sp) nounwind {
%temp.i = alloca [2 x i8], align 1
%tmp1 = load i8* %sp, align 1
%div = udiv i8 %tmp1, 10
%rem = urem i8 %div, 10
%arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0
store i8 %rem, i8* %arrayidx.i, align 1
%call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind
ret i32 undef
}

View File

@ -1,10 +1,20 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | not grep pcmpeqd
; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; This testcase should need to spill the -1 value on x86-32,
; This testcase should need to spill the -1 value on both x86-32 and x86-64,
; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
; should use a constant-pool load instead.
; Constant pool all-ones vector:
; CHECK: .long 4294967295
; CHECK-NEXT: .long 4294967295
; CHECK-NEXT: .long 4294967295
; CHECK-NEXT: .long 4294967295
; No pcmpeqd instructions, everybody uses the constant pool.
; CHECK: program_1:
; CHECK-NOT: pcmpeqd
%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
%struct._cl_image_format_t = type <{ i32, i32, i32 }>
%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
@ -57,6 +67,7 @@ forbody: ; preds = %forcond
%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1]
%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1]
call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1]
%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1]
%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]