mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 20:57:15 +00:00
Don't fold indexed loads into TCRETURNmi64.
We don't have enough GR64_TC registers when calling a varargs function with 6 arguments. Since %al holds the number of vector registers used, only %r11 is available as a scratch register. This means that addressing modes using both base and index registers can't be folded into TCRETURNmi64. <rdar://problem/12282281> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163761 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
253353c9cf
commit
aa0cfea9a4
@ -204,6 +204,9 @@ namespace {
|
||||
bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
|
||||
SDValue &Scale, SDValue &Index, SDValue &Disp,
|
||||
SDValue &Segment);
|
||||
bool SelectSingleRegAddr(SDNode *Parent, SDValue N, SDValue &Base,
|
||||
SDValue &Scale, SDValue &Index, SDValue &Disp,
|
||||
SDValue &Segment);
|
||||
bool SelectLEAAddr(SDValue N, SDValue &Base,
|
||||
SDValue &Scale, SDValue &Index, SDValue &Disp,
|
||||
SDValue &Segment);
|
||||
@ -1319,6 +1322,31 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// SelectSingleRegAddr - Like SelectAddr, but reject any address that would
|
||||
/// require more than one allocatable register.
|
||||
///
|
||||
/// This is used for a TCRETURNmi64 instruction when used to tail call a
|
||||
/// variadic function with 6 arguments: Only %r11 is available from GR64_TC.
|
||||
/// The other scratch register, %rax, is needed to pass in the number of vector
|
||||
/// registers used in the variadic arguments.
|
||||
///
|
||||
bool X86DAGToDAGISel::SelectSingleRegAddr(SDNode *Parent, SDValue N,
|
||||
SDValue &Base,
|
||||
SDValue &Scale, SDValue &Index,
|
||||
SDValue &Disp, SDValue &Segment) {
|
||||
if (!SelectAddr(Parent, N, Base, Scale, Index, Disp, Segment))
|
||||
return false;
|
||||
// Anything %RIP relative is fine.
|
||||
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Base))
|
||||
if (Reg->getReg() == X86::RIP)
|
||||
return true;
|
||||
// Check that the index register is 0.
|
||||
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Index))
|
||||
if (Reg->getReg() == 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
|
||||
/// match a load whose top elements are either undef or zeros. The load flavor
|
||||
/// is derived from the type of N, which is either v4f32 or v2f64.
|
||||
|
@ -1041,7 +1041,13 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
|
||||
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def : Pat<(X86tcret (load addr:$dst), imm:$off),
|
||||
// When calling a variadic function with 6 arguments, 7 scratch registers are
|
||||
// needed since %al holds the number of vector registers used. That leaves %r11
|
||||
// as the only remaining GR64_TC register for the addressing mode.
|
||||
//
|
||||
// The single_reg_addr pattern rejects any addressing modes that would need
|
||||
// more than one register.
|
||||
def : Pat<(X86tcret (load single_reg_addr:$dst), imm:$off),
|
||||
(TCRETURNmi64 addr:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
|
@ -543,6 +543,10 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
|
||||
def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
|
||||
[tglobaltlsaddr], []>;
|
||||
|
||||
// Same as addr, but reject addressing modes requiring more than one register.
|
||||
def single_reg_addr : ComplexPattern<iPTR, 5, "SelectSingleRegAddr", [],
|
||||
[SDNPWantParent]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Predicate Definitions.
|
||||
def HasCMov : Predicate<"Subtarget->hasCMov()">;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: llc < %s -verify-machineinstrs | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin11.4.0"
|
||||
|
||||
@ -93,4 +93,38 @@ define { i64, i64 } @crash(i8* %this) {
|
||||
ret { i64, i64 } %mrv7
|
||||
}
|
||||
|
||||
; <rdar://problem/12282281> Fold an indexed load into the tail call instruction.
|
||||
; Calling a varargs function with 6 arguments requires 7 registers (%al is the
|
||||
; vector count for varargs functions). This leaves %r11 as the only available
|
||||
; scratch register.
|
||||
;
|
||||
; It is not possible to fold an indexed load into TCRETURNmi64 in that case.
|
||||
;
|
||||
; typedef int (*funcptr)(void*, ...);
|
||||
; extern const funcptr funcs[];
|
||||
; int f(int n) {
|
||||
; return funcs[n](0, 0, 0, 0, 0, 0);
|
||||
; }
|
||||
;
|
||||
; CHECK: rdar12282281
|
||||
; CHECK: jmpq *%r11 # TAILCALL
|
||||
@funcs = external constant [0 x i32 (i8*, ...)*]
|
||||
|
||||
define i32 @rdar12282281(i32 %n) nounwind uwtable ssp {
|
||||
entry:
|
||||
%idxprom = sext i32 %n to i64
|
||||
%arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom
|
||||
%0 = load i32 (i8*, ...)** %arrayidx, align 8
|
||||
%call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; Same thing, using a fixed offset. The load should foid.
|
||||
; CHECK: rdar12282281fixed
|
||||
; CHECK: jmpq *8(%r11) # TAILCALL
|
||||
define i32 @rdar12282281fixed() nounwind uwtable ssp {
|
||||
entry:
|
||||
%0 = load i32 (i8*, ...)** getelementptr inbounds ([0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 1), align 8
|
||||
%call.i = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
|
||||
ret i32 %call.i
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user