[X86] Allow load folding into PUSH instructions

Adds pushes to the folding tables.
This also required a fix to the TD definition, since the memory forms of 
the push instructions did not have the right mayLoad/mayStore flags.

Differential Revision: http://reviews.llvm.org/D11340

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243010 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Kuperstein 2015-07-23 12:23:45 +00:00
parent 7463ffc5f1
commit e31d7f83c5
4 changed files with 67 additions and 9 deletions

View File

@ -181,6 +181,11 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
// TODO: This feature ought to be renamed.
// What it really refers to are CPUs where instruction that cause MSROM
// lookups are expensive, so alternative sequences should be preferred.
// The best examples of this are the memory forms of CALL and PUSH
// instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"CallRegIndirect", "true",
"Call register indirect">;

View File

@ -332,6 +332,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
{ X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
{ X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
{ X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
{ X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
{ X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
{ X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
{ X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
{ X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
@ -4878,10 +4881,14 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
bool isCallRegIndirect = Subtarget.callRegIndirect();
bool isTwoAddrFold = false;
// For CPUs that favor the register form of a call,
// do not fold loads into calls.
if (isCallRegIndirect &&
(MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r))
// For CPUs that favor the register form of a call or push,
// do not fold loads into calls or pushes, unless optimizing for size
// aggressively.
if (isCallRegIndirect &&
!MF.getFunction()->hasFnAttribute(Attribute::MinSize) &&
(MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r ||
MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r ||
MI->getOpcode() == X86::PUSH64r))
return nullptr;
unsigned NumOps = MI->getDesc().getNumOperands();

View File

@ -1022,12 +1022,8 @@ def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
IIC_PUSH_REG>, OpSize16;
def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
IIC_PUSH_MEM>, OpSize16;
def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
"push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
@ -1041,6 +1037,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
} // mayStore, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
IIC_PUSH_MEM>, OpSize16;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
} // mayLoad, mayStore, SchedRW
}
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
@ -1073,9 +1077,11 @@ def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
} // mayStore, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>;
} // mayStore, SchedRW
} // mayLoad, mayStore, SchedRW
}
let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,

View File

@ -0,0 +1,40 @@
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM
declare void @foo(i32 %r)
define void @test(i32 %a, i32 %b) optsize {
; CHECK-LABEL: test:
; CHECK: movl [[EAX:%e..]], (%esp)
; CHECK-NEXT: pushl [[EAX]]
; CHECK-NEXT: calll
; CHECK-NEXT: addl $4, %esp
; CHECK: nop
; NORMAL: pushl (%esp)
; SLM: movl (%esp), [[RELOAD:%e..]]
; SLM-NEXT: pushl [[RELOAD]]
; CHECK: calll
; CHECK-NEXT: addl $4, %esp
%c = add i32 %a, %b
call void @foo(i32 %c)
call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
call void @foo(i32 %c)
ret void
}
define void @test_min(i32 %a, i32 %b) minsize {
; CHECK-LABEL: test_min:
; CHECK: movl [[EAX:%e..]], (%esp)
; CHECK-NEXT: pushl [[EAX]]
; CHECK-NEXT: calll
; CHECK-NEXT: addl $4, %esp
; CHECK: nop
; CHECK: pushl (%esp)
; CHECK: calll
; CHECK-NEXT: addl $4, %esp
%c = add i32 %a, %b
call void @foo(i32 %c)
call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
call void @foo(i32 %c)
ret void
}