[X86] Optimization for replacing LEA with MOV at frame index elimination time

Summary:
Replace a LEA instruction of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'

MOV is preferable over LEA because usually there are more issue-slots available to execute MOVs than LEAs. Latest processors also support zero-latency MOVs.

Fixes pr29022.

Reviewers: hfinkel, delena, igorb, myatsina, mkuper

Differential Revision: https://reviews.llvm.org/D24705

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282385 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Zvi Rackover 2016-09-26 06:42:07 +00:00
parent 56b0418212
commit 9209299b97
18 changed files with 83 additions and 38 deletions

View File

@ -595,6 +595,35 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
llvm_unreachable("Unused function on X86. Otherwise need a test case.");
}
// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
// TODO: In this case we should be really trying first to entirely eliminate
// this instruction which is a plain copy.
static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
MachineInstr &MI = *II;
unsigned Opc = II->getOpcode();
// Check if this is a LEA of the form 'lea (%esp), %ebx'
if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
MI.getOperand(2).getImm() != 1 ||
MI.getOperand(3).getReg() != X86::NoRegister ||
MI.getOperand(4).getImm() != 0 ||
MI.getOperand(5).getReg() != X86::NoRegister)
return false;
unsigned BasePtr = MI.getOperand(1).getReg();
// In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
// be replaced with a 32-bit operand MOV which will zero extend the upper
// 32-bits of the super register.
if (Opc == X86::LEA64_32r)
BasePtr = getX86SubSuperRegister(BasePtr, 32);
unsigned NewDestReg = MI.getOperand(0).getReg();
const X86InstrInfo *TII =
MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
MI.getOperand(1).isKill());
MI.eraseFromParent();
return true;
}
void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
@ -669,7 +698,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int Offset = FIOffset + Imm;
assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
"Requesting 64-bit offset in 32-bit immediate!");
MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
} else {
// Offset is symbolic. This is extremely rare.
uint64_t Offset = FIOffset +

View File

@ -25,7 +25,7 @@ declare i32 @func_int(i32, i32)
; X64-LABEL: testf16_inp
; X64: vaddps {{.*}}, {{%ymm[0-1]}}
; X64: vaddps {{.*}}, {{%ymm[0-1]}}
; X64: leaq {{.*}}(%rsp), %rdi
; X64: movq %rsp, %rdi
; X64: call
; X64: ret

View File

@ -22,7 +22,7 @@ declare i32 @func_int(i32, i32)
; X64-LABEL: testf16_inp
; X64: vaddps {{.*}}, {{%zmm[0-1]}}
; X64: leaq {{.*}}(%rsp), %rdi
; X64: movq %rsp, %rdi
; X64: call
; X64: ret

View File

@ -6,7 +6,7 @@
;
; CHECK: callq _Z3fooPcjPKc
; CHECK: callq _Z3fooPcjPKc
; CHECK: leaq (%rsp), %rdi
; CHECK: movq %rsp, %rdi
; CHECK: movl $4, %esi
; CHECK: testl {{%[a-z]+}}, {{%[a-z]+}}
; CHECK: je .LBB0_4

View File

@ -38,7 +38,7 @@ entry:
; CHECK: subq ${{[0-9]+}}, %rsp
;
; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
; CHECK: movq %rsp, %rsi
; CHECK: callq _t2_helper
;
; CHECK: movq %rbp, %rsp
@ -89,7 +89,7 @@ entry:
; CHECK: movq %rsp, %rbx
;
; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
; CHECK: movq %rbx, %rdx
; CHECK: callq _t4_helper
;
; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp

View File

@ -414,7 +414,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
; SSE-NEXT: subq $64, %rsp
; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, (%rsp)
; SSE-NEXT: leaq (%rsp), %rax
; SSE-NEXT: movq %rsp, %rax
; SSE-NEXT: movb (%rdi,%rax), %al
; SSE-NEXT: movq %rbp, %rsp
; SSE-NEXT: popq %rbp
@ -427,7 +427,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
; AVX-NEXT: andq $-32, %rsp
; AVX-NEXT: subq $64, %rsp
; AVX-NEXT: vmovaps %ymm0, (%rsp)
; AVX-NEXT: leaq (%rsp), %rax
; AVX-NEXT: movq %rsp, %rax
; AVX-NEXT: movb (%rdi,%rax), %al
; AVX-NEXT: movq %rbp, %rsp
; AVX-NEXT: popq %rbp

View File

@ -254,7 +254,7 @@ entry:
call void @test20sret(%struct.a* sret %tmp)
ret void
; CHECK-LABEL: test20:
; CHECK: leaq (%rsp), %rdi
; CHECK: movq %rsp, %rdi
; CHECK: callq _test20sret
}
declare void @test20sret(%struct.a* sret)

View File

@ -83,7 +83,7 @@ entry:
ret void
; CHECK-LABEL: test4:
; CHECK: subl $28
; CHECK: leal (%esp), %ecx
; CHECK: movl %esp, %ecx
; CHECK: calll _test4fastccsret
; CHECK: addl $28
}

View File

@ -19,7 +19,7 @@ entry:
; CHECK-W64-LABEL: test1
; CHECK-W64: push
; CHECK-W64-NEXT: movq %rsp, %rbp
; CHECK-W64-NEXT: leaq (%rbp), %rax
; CHECK-W64-NEXT: movq %rbp, %rax
; CHECK-W64-NEXT: pop
; CHECK-W64-NEXT: ret
; CHECK-64-LABEL: test1
@ -54,7 +54,7 @@ entry:
; CHECK-W64-LABEL: test2
; CHECK-W64: push
; CHECK-W64-NEXT: movq %rsp, %rbp
; CHECK-W64-NEXT: leaq (%rbp), %rax
; CHECK-W64-NEXT: movq %rbp, %rax
; CHECK-W64-NEXT: pop
; CHECK-W64-NEXT: ret
; CHECK-64-LABEL: test2

View File

@ -21,7 +21,7 @@ define void @test1(i8* nocapture readonly %src, i32 %len) #0 {
%call1 = tail call <4 x float> @_mm_castsi128_ps(<2 x i64> %tmp0)
ret void
; CHECK-LABEL: test1:
; CHECK: leal{{.*}}
; CHECK: movl %esp,
; CHECK: calll _memcpy
; CHECK: movaps __xmm@{{[0-9a-f]+}}, %xmm1
; CHECK: calll __mm_xor_si128

View File

@ -67,7 +67,7 @@
; X64: callq check_a
; X64: callq bar1
; X64: callq bar1
; X64: leaq (%rsp), %rdi
; X64: movq %rsp, %rdi
; X64: callq check_f
; X64: callq bar1
; X64: callq bar3

View File

@ -0,0 +1,15 @@
; RUN: llc < %s -mcpu=skx -mtriple x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mcpu=skx -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s --check-prefix=X32
define i32 @A() {
; CHECK: movq %rsp, %rdi
; CHECK-NEXT: call
; X32: movl %esp, %edi
; X32-NEXT: call
%alloc = alloca i32, align 8
%call = call i32 @foo(i32* %alloc)
ret i32 %call
}
declare i32 @foo(i32*)

View File

@ -27,7 +27,7 @@ declare <16 x float> @func_float16(<16 x float>, <16 x float>)
; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
; NOT_WIN: leaq {{.*}}(%rsp), %rdi
; NOT_WIN: movq %rsp, %rdi
; NOT_WIN: call
; NOT_WIN: ret

View File

@ -813,7 +813,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind {
; X32-LABEL: test_MM_GET_EXCEPTION_MASK:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: leal (%esp), %eax
; X32-NEXT: movl %esp, %eax
; X32-NEXT: stmxcsr (%eax)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: andl $8064, %eax # imm = 0x1F80
@ -840,7 +840,7 @@ define i32 @test_MM_GET_EXCEPTION_STATE() nounwind {
; X32-LABEL: test_MM_GET_EXCEPTION_STATE:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: leal (%esp), %eax
; X32-NEXT: movl %esp, %eax
; X32-NEXT: stmxcsr (%eax)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: andl $63, %eax
@ -866,7 +866,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind {
; X32-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: leal (%esp), %eax
; X32-NEXT: movl %esp, %eax
; X32-NEXT: stmxcsr (%eax)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: andl $32768, %eax # imm = 0x8000
@ -892,7 +892,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind {
; X32-LABEL: test_MM_GET_ROUNDING_MODE:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: leal (%esp), %eax
; X32-NEXT: movl %esp, %eax
; X32-NEXT: stmxcsr (%eax)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: andl $24576, %eax # imm = 0x6000
@ -918,7 +918,7 @@ define i32 @test_mm_getcsr() nounwind {
; X32-LABEL: test_mm_getcsr:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: leal (%esp), %eax
; X32-NEXT: movl %esp, %eax
; X32-NEXT: stmxcsr (%eax)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: popl %ecx
@ -1427,7 +1427,7 @@ define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind {
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: leal (%esp), %ecx
; X32-NEXT: movl %esp, %ecx
; X32-NEXT: stmxcsr (%ecx)
; X32-NEXT: movl (%esp), %edx
; X32-NEXT: andl $-8065, %edx # imm = 0xE07F
@ -1464,7 +1464,7 @@ define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind {
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: leal (%esp), %ecx
; X32-NEXT: movl %esp, %ecx
; X32-NEXT: stmxcsr (%ecx)
; X32-NEXT: movl (%esp), %edx
; X32-NEXT: andl $-64, %edx
@ -1500,7 +1500,7 @@ define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind {
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: leal (%esp), %ecx
; X32-NEXT: movl %esp, %ecx
; X32-NEXT: stmxcsr (%ecx)
; X32-NEXT: movl (%esp), %edx
; X32-NEXT: andl $-32769, %edx # imm = 0xFFFF7FFF
@ -1580,7 +1580,7 @@ define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind {
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: leal (%esp), %ecx
; X32-NEXT: movl %esp, %ecx
; X32-NEXT: stmxcsr (%ecx)
; X32-NEXT: movl (%esp), %edx
; X32-NEXT: andl $-24577, %edx # imm = 0x9FFF
@ -1655,7 +1655,7 @@ define void @test_mm_setcsr(i32 %a0) nounwind {
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: leal (%esp), %ecx
; X32-NEXT: movl %esp, %ecx
; X32-NEXT: movl %eax, (%esp)
; X32-NEXT: ldmxcsr (%ecx)
; X32-NEXT: popl %eax

View File

@ -38,7 +38,7 @@ declare swiftcc { i16, i8 } @gen(i32)
; in memroy. The caller provides space for the return value and passes
; the address in %rax. The first input argument will be in %rdi.
; CHECK-LABEL: test2:
; CHECK: leaq (%rsp), %rax
; CHECK: movq %rsp, %rax
; CHECK: callq gen2
; CHECK: movl (%rsp)
; CHECK-DAG: addl 4(%rsp)
@ -46,7 +46,7 @@ declare swiftcc { i16, i8 } @gen(i32)
; CHECK-DAG: addl 12(%rsp)
; CHECK-DAG: addl 16(%rsp)
; CHECK-O0-LABEL: test2:
; CHECK-O0-DAG: leaq (%rsp), %rax
; CHECK-O0-DAG: movq %rsp, %rax
; CHECK-O0: callq gen2
; CHECK-O0-DAG: movl (%rsp)
; CHECK-O0-DAG: movl 4(%rsp)

View File

@ -138,7 +138,7 @@ entry:
; The this pointer goes to ECX.
; (through %ecx in the -O0 build).
; WIN32: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x
; WIN32: leal {{[0-9]*}}(%esp), %ecx
; WIN32: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx
; WIN32: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}}
; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
; WIN32: retl
@ -158,16 +158,16 @@ define void @test6_f(%struct.test6* %x) nounwind {
; The sret pointer is (%esp)
; WIN32: leal {{4?}}(%esp), %eax
; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax
; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
; The sret pointer is %ecx
; The %x argument is moved to (%esp). It will be the this pointer.
; MINGW_X86: leal {{4?}}(%esp), %ecx
; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %ecx
; MINGW_X86-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
; MINGW_X86-NEXT: calll _test6_g
; CYGWIN: leal {{4?}}(%esp), %ecx
; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %ecx
; CYGWIN-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
; CYGWIN-NEXT: calll _test6_g
@ -191,11 +191,11 @@ define void @test7_f(%struct.test7* %x) nounwind {
; CYGWIN: movl {{16|20}}(%esp), %ecx
; The sret pointer is (%esp)
; WIN32: leal {{4?}}(%esp), %eax
; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax
; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
; MINGW_X86: leal {{4?}}(%esp), %eax
; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %eax
; MINGW_X86-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
; CYGWIN: leal {{4?}}(%esp), %eax
; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %eax
; CYGWIN-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
%tmp = alloca %struct.test7, align 4

View File

@ -110,7 +110,7 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="
%gep = getelementptr [300 x i8], [300 x i8]* %alloca, i32 0, i32 0
call void @external(i8* %gep)
; CHECK: subq $32, %rsp
; CHECK: leaq (%rbx), %rcx
; CHECK: movq %rbx, %rcx
; CHECK: callq external
; CHECK: addq $32, %rsp

View File

@ -170,7 +170,7 @@
; OBJ: PtrParent: 0x0
; OBJ: PtrEnd: 0x0
; OBJ: PtrNext: 0x0
; OBJ: CodeSize: 0x3D
; OBJ: CodeSize: 0x3C
; OBJ: DbgStart: 0x0
; OBJ: DbgEnd: 0x0
; OBJ: FunctionType: baz (0x1004)
@ -189,7 +189,7 @@
; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x8, LineOffset: 1}
; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x7, LineOffset: 1}
; OBJ-NEXT: ChangeLineOffset: 1
; OBJ-NEXT: ChangeCodeOffset: 0x1E
; OBJ-NEXT: ChangeCodeOffset: 0x1D
; OBJ-NEXT: ChangeCodeLength: 0x7
; OBJ: ]
; OBJ: }
@ -199,7 +199,7 @@
; OBJ: Inlinee: foo (0x1003)
; OBJ: BinaryAnnotations [
; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xF, LineOffset: 1}
; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xA, LineOffset: 1}
; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x9, LineOffset: 1}
; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x6, LineOffset: 1}
; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x7, LineOffset: 1}
; OBJ-NEXT: ChangeCodeLength: 0x7