This is a follow-up on r174446, now taking Atom processors into

account. Atoms use LEA for updating SP in prologs/epilogs, and the
exact LEA opcode depends on the data model.

Also reapplying the test case which was added and then reverted
(because of Atom failures), this time specifying explicitly the CPU in
addition to the triple. The test case now checks all variations (data
mode, cpu Atom vs. Core).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174542 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eli Bendersky 2013-02-06 20:43:57 +00:00
parent 4cc74fcba0
commit 16221a60a0
2 changed files with 37 additions and 6 deletions

View File

@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned isLP64, int64_t Imm) {
} }
} }
static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) { static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
if (isLP64) { if (IsLP64) {
if (isInt<8>(Imm)) if (isInt<8>(Imm))
return X86::ADD64ri8; return X86::ADD64ri8;
return X86::ADD64ri32; return X86::ADD64ri32;
@ -79,8 +79,8 @@ static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) {
} }
} }
static unsigned getLEArOpcode(unsigned is64Bit) { static unsigned getLEArOpcode(unsigned IsLP64) {
return is64Bit ? X86::LEA64r : X86::LEA32r; return IsLP64 ? X86::LEA64r : X86::LEA32r;
} }
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
@ -151,7 +151,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
uint64_t Offset = isSub ? -NumBytes : NumBytes; uint64_t Offset = isSub ? -NumBytes : NumBytes;
unsigned Opc; unsigned Opc;
if (UseLEA) if (UseLEA)
Opc = getLEArOpcode(Is64Bit); Opc = getLEArOpcode(IsLP64);
else else
Opc = isSub Opc = isSub
? getSUBriOpcode(IsLP64, Offset) ? getSUBriOpcode(IsLP64, Offset)
@ -1083,7 +1083,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (RegInfo->needsStackRealignment(MF)) if (RegInfo->needsStackRealignment(MF))
MBBI = FirstCSPop; MBBI = FirstCSPop;
if (CSSize != 0) { if (CSSize != 0) {
unsigned Opc = getLEArOpcode(Is64Bit); unsigned Opc = getLEArOpcode(IsLP64);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
FramePtr, false, -CSSize); FramePtr, false, -CSSize);
} else { } else {

View File

@ -0,0 +1,31 @@
; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_LP64 %s
; RUN: llc -mtriple=x86_64-pc-linux -mcpu=atom < %s | FileCheck -check-prefix=ATOM_LP64 %s
; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_ILP32 %s
; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=atom < %s | FileCheck -check-prefix=ATOM_ILP32 %s
define i32 @bar(i32 %a) nounwind {
entry:
%arr = alloca [400 x i32], align 16
; There is a 2x2 variation matrix here:
; Atoms use LEA to update the SP. Opcode bitness depends on data model.
; Cores use sub/add to update the SP. Opcode titness depends on data model.
; CORE_LP64: subq $1608
; CORE_ILP32: subl $1608
; ATOM_LP64: leaq -1608
; ATOM_ILP32: leal -1608
%arraydecay = getelementptr inbounds [400 x i32]* %arr, i64 0, i64 0
%call = call i32 @foo(i32 %a, i32* %arraydecay) nounwind
ret i32 %call
; CORE_LP64: addq $1608
; CORE_ILP32: addl $1608
; ATOM_LP64: leaq 1608
; ATOM_ILP32: leal 1608
}
declare i32 @foo(i32, i32*)