mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-27 08:26:23 +00:00
Implemented stack symbol table ordering/packing optimization to improve data locality and code size from SP/FP offset encoding.
Differential Revision: http://reviews.llvm.org/D15393 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260917 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ab96d57a0b
commit
5ed116b04b
@ -177,6 +177,11 @@ DisableTailCalls("disable-tail-calls",
|
||||
cl::desc("Never emit tail calls"),
|
||||
cl::init(false));
|
||||
|
||||
cl::opt<bool>
|
||||
StackSymbolOrdering("stack-symbol-ordering",
|
||||
cl::desc("Order local stack symbols."),
|
||||
cl::init(true));
|
||||
|
||||
cl::opt<unsigned>
|
||||
OverrideStackAlignment("stack-alignment",
|
||||
cl::desc("Override default stack alignment"),
|
||||
@ -284,6 +289,7 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
|
||||
Options.NoZerosInBSS = DontPlaceZerosInBSS;
|
||||
Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
|
||||
Options.StackAlignmentOverride = OverrideStackAlignment;
|
||||
Options.StackSymbolOrdering = StackSymbolOrdering;
|
||||
Options.PositionIndependentExecutable = EnablePIE;
|
||||
Options.UseInitArray = !UseCtors;
|
||||
Options.DataSections = DataSections;
|
||||
|
@ -288,6 +288,18 @@ public:
|
||||
"target!");
|
||||
}
|
||||
|
||||
|
||||
/// Order the symbols in the local stack frame.
|
||||
/// The list of objects that we want to order is in \p objectsToAllocate as
|
||||
/// indices into the MachineFrameInfo. The array can be reordered in any way
|
||||
/// upon return. The contents of the array, however, may not be modified (i.e.
|
||||
/// only their order may be changed).
|
||||
/// By default, just maintain the original order.
|
||||
virtual void
|
||||
orderFrameObjects(const MachineFunction &MF,
|
||||
SmallVectorImpl<int> &objectsToAllocate) const {
|
||||
}
|
||||
|
||||
/// Check whether or not the given \p MBB can be used as a prologue
|
||||
/// for the target.
|
||||
/// The prologue will be inserted first in this basic block.
|
||||
|
@ -97,6 +97,7 @@ namespace llvm {
|
||||
UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
|
||||
HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
|
||||
GuaranteedTailCallOpt(false), StackAlignmentOverride(0),
|
||||
StackSymbolOrdering(true),
|
||||
EnableFastISel(false), PositionIndependentExecutable(false),
|
||||
UseInitArray(false), DisableIntegratedAS(false),
|
||||
CompressDebugSections(false), FunctionSections(false),
|
||||
@ -169,6 +170,12 @@ namespace llvm {
|
||||
/// StackAlignmentOverride - Override default stack alignment for target.
|
||||
unsigned StackAlignmentOverride;
|
||||
|
||||
/// StackSymbolOrdering - When true, this will allow CodeGen to order
|
||||
/// the local stack symbols (for code size, code locality, or any other
|
||||
/// heuristics). When false, the local symbols are left in whatever order
|
||||
/// they were generated. Default is true.
|
||||
unsigned StackSymbolOrdering : 1;
|
||||
|
||||
/// EnableFastISel - This flag enables fast-path instruction selection
|
||||
/// which trades away generated code quality in favor of reducing
|
||||
/// compile time.
|
||||
|
@ -707,8 +707,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
Offset, MaxAlign, Skew);
|
||||
}
|
||||
|
||||
// Then assign frame offsets to stack objects that are not used to spill
|
||||
// callee saved registers.
|
||||
SmallVector<int, 8> ObjectsToAllocate;
|
||||
|
||||
// Then prepare to assign frame offsets to stack objects that are not used to
|
||||
// spill callee saved registers.
|
||||
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
|
||||
if (MFI->isObjectPreAllocated(i) &&
|
||||
MFI->getUseLocalStackAllocationBlock())
|
||||
@ -724,8 +726,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
if (ProtectedObjs.count(i))
|
||||
continue;
|
||||
|
||||
AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
|
||||
// Add the objects that we need to allocate to our working set.
|
||||
ObjectsToAllocate.push_back(i);
|
||||
}
|
||||
// Give the targets a chance to order the objects the way they like it.
|
||||
if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
|
||||
Fn.getTarget().Options.StackSymbolOrdering)
|
||||
TFI.orderFrameObjects(Fn, ObjectsToAllocate);
|
||||
|
||||
// Now walk the objects and actually assign base offsets to them.
|
||||
for (auto &Object : ObjectsToAllocate)
|
||||
AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
|
||||
|
||||
// Make sure the special register scavenging spill slot is closest to the
|
||||
// stack pointer.
|
||||
|
@ -2669,6 +2669,148 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
|
||||
return MBBI;
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Struct used by orderFrameObjects to help sort the stack objects.
|
||||
struct X86FrameSortingObject {
|
||||
bool IsValid = false; // true if we care about this Object.
|
||||
unsigned ObjectIndex = 0; // Index of Object into MFI list.
|
||||
unsigned ObjectSize = 0; // Size of Object in bytes.
|
||||
unsigned ObjectAlignment = 1; // Alignment of Object in bytes.
|
||||
unsigned ObjectNumUses = 0; // Object static number of uses.
|
||||
};
|
||||
|
||||
// The comparison function we use for std::sort to order our local
|
||||
// stack symbols. The current algorithm is to use an estimated
|
||||
// "density". This takes into consideration the size and number of
|
||||
// uses each object has in order to roughly minimize code size.
|
||||
// So, for example, an object of size 16B that is referenced 5 times
|
||||
// will get higher priority than 4 4B objects referenced 1 time each.
|
||||
// It's not perfect and we may be able to squeeze a few more bytes out of
|
||||
// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
|
||||
// fringe end can have special consideration, given their size is less
|
||||
// important, etc.), but the algorithmic complexity grows too much to be
|
||||
// worth the extra gains we get. This gets us pretty close.
|
||||
// The final order leaves us with objects with highest priority going
|
||||
// at the end of our list.
|
||||
struct X86FrameSortingComparator {
|
||||
inline bool operator()(const X86FrameSortingObject &A,
|
||||
const X86FrameSortingObject &B) {
|
||||
uint64_t DensityAScaled, DensityBScaled;
|
||||
|
||||
// For consistency in our comparison, all invalid objects are placed
|
||||
// at the end. This also allows us to stop walking when we hit the
|
||||
// first invalid item after it's all sorted.
|
||||
if (!A.IsValid)
|
||||
return false;
|
||||
if (!B.IsValid)
|
||||
return true;
|
||||
|
||||
// The density is calculated by doing :
|
||||
// (double)DensityA = A.ObjectNumUses / A.ObjectSize
|
||||
// (double)DensityB = B.ObjectNumUses / B.ObjectSize
|
||||
// Since this approach may cause inconsistencies in
|
||||
// the floating point <, >, == comparisons, depending on the floating
|
||||
// point model with which the compiler was built, we're going
|
||||
// to scale both sides by multiplying with
|
||||
// A.ObjectSize * B.ObjectSize. This ends up factoring away
|
||||
// the division and, with it, the need for any floating point
|
||||
// arithmetic.
|
||||
DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
|
||||
static_cast<uint64_t>(B.ObjectSize);
|
||||
DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
|
||||
static_cast<uint64_t>(A.ObjectSize);
|
||||
|
||||
// If the two densities are equal, prioritize highest alignment
|
||||
// objects. This allows for similar alignment objects
|
||||
// to be packed together (given the same density).
|
||||
// There's room for improvement here, also, since we can pack
|
||||
// similar alignment (different density) objects next to each
|
||||
// other to save padding. This will also require further
|
||||
// complexity/iterations, and the overall gain isn't worth it,
|
||||
// in general. Something to keep in mind, though.
|
||||
if (DensityAScaled == DensityBScaled)
|
||||
return A.ObjectAlignment < B.ObjectAlignment;
|
||||
|
||||
return DensityAScaled < DensityBScaled;
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// Order the symbols in the local stack.
|
||||
// We want to place the local stack objects in some sort of sensible order.
|
||||
// The heuristic we use is to try and pack them according to static number
|
||||
// of uses and size of object in order to minimize code size.
|
||||
void X86FrameLowering::orderFrameObjects(
|
||||
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
// Don't waste time if there's nothing to do.
|
||||
if (ObjectsToAllocate.empty())
|
||||
return;
|
||||
|
||||
// Create an array of all MFI objects. We won't need all of these
|
||||
// objects, but we're going to create a full array of them to make
|
||||
// it easier to index into when we're counting "uses" down below.
|
||||
// We want to be able to easily/cheaply access an object by simply
|
||||
// indexing into it, instead of having to search for it every time.
|
||||
std::vector<X86FrameSortingObject> SortingObjects(MFI->getObjectIndexEnd());
|
||||
|
||||
// Walk the objects we care about and mark them as such in our working
|
||||
// struct.
|
||||
for (auto &Obj : ObjectsToAllocate) {
|
||||
SortingObjects[Obj].IsValid = true;
|
||||
SortingObjects[Obj].ObjectIndex = Obj;
|
||||
SortingObjects[Obj].ObjectAlignment = MFI->getObjectAlignment(Obj);
|
||||
// Set the size.
|
||||
int ObjectSize = MFI->getObjectSize(Obj);
|
||||
if (ObjectSize == 0)
|
||||
// Variable size. Just use 4.
|
||||
SortingObjects[Obj].ObjectSize = 4;
|
||||
else
|
||||
SortingObjects[Obj].ObjectSize = ObjectSize;
|
||||
}
|
||||
|
||||
// Count the number of uses for each object.
|
||||
for (auto &MBB : MF) {
|
||||
for (auto &MI : MBB) {
|
||||
for (const MachineOperand &MO : MI.operands()) {
|
||||
// Check to see if it's a local stack symbol.
|
||||
if (!MO.isFI())
|
||||
continue;
|
||||
int Index = MO.getIndex();
|
||||
// Check to see if it falls within our range, and is tagged
|
||||
// to require ordering.
|
||||
if (Index >= 0 && Index < MFI->getObjectIndexEnd() &&
|
||||
SortingObjects[Index].IsValid)
|
||||
SortingObjects[Index].ObjectNumUses++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the objects using X86FrameSortingAlgorithm (see its comment for
|
||||
// info).
|
||||
std::stable_sort(SortingObjects.begin(), SortingObjects.end(),
|
||||
X86FrameSortingComparator());
|
||||
|
||||
// Now modify the original list to represent the final order that
|
||||
// we want. The order will depend on whether we're going to access them
|
||||
// from the stack pointer or the frame pointer. For SP, the list should
|
||||
// end up with the END containing objects that we want with smaller offsets.
|
||||
// For FP, it should be flipped.
|
||||
int i = 0;
|
||||
for (auto &Obj : SortingObjects) {
|
||||
// All invalid items are sorted at the end, so it's safe to stop.
|
||||
if (!Obj.IsValid)
|
||||
break;
|
||||
ObjectsToAllocate[i++] = Obj.ObjectIndex;
|
||||
}
|
||||
|
||||
// Flip it if we're accessing off of the FP.
|
||||
if (!TRI->needsStackRealignment(MF) && hasFP(MF))
|
||||
std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
|
||||
}
|
||||
|
||||
|
||||
unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
|
||||
// RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
|
||||
unsigned Offset = 16;
|
||||
|
@ -137,6 +137,13 @@ public:
|
||||
/// Returns true if the target will correctly handle shrink wrapping.
|
||||
bool enableShrinkWrapping(const MachineFunction &MF) const override;
|
||||
|
||||
/// Order the symbols in the local stack.
|
||||
/// We want to place the local stack objects in some sort of sensible order.
|
||||
/// The heuristic we use is to try and pack them according to static number
|
||||
/// of uses and size in order to minimize code size.
|
||||
void orderFrameObjects(const MachineFunction &MF,
|
||||
SmallVectorImpl<int> &ObjectsToAllocate) const override;
|
||||
|
||||
/// convertArgMovsToPushes - This method tries to convert a call sequence
|
||||
/// that uses sub and mov instructions to put the argument onto the stack
|
||||
/// into a series of pushes.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s
|
||||
; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -verify-machineinstrs | FileCheck %s
|
||||
; PR3538
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i386-apple-darwin9"
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -stack-symbol-ordering=0 | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -stack-symbol-ordering=0 | FileCheck %s -check-prefix=X32
|
||||
|
||||
%struct.Baz = type { [17 x i8] }
|
||||
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
|
||||
|
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
|
||||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
|
||||
|
||||
define i16 @mask16(i16 %x) {
|
||||
; CHECK-LABEL: mask16:
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
|
||||
; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
|
||||
; RUN: llc -mtriple=i686-pc-windows-msvc -stack-symbol-ordering=0 < %s | FileCheck --check-prefix=X86 %s
|
||||
; RUN: llc -mtriple=x86_64-pc-windows-msvc -stack-symbol-ordering=0 < %s | FileCheck --check-prefix=X64 %s
|
||||
|
||||
declare i32 @__CxxFrameHandler3(...)
|
||||
declare void @Dtor(i64* %o)
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
|
||||
; RUN: llc < %s -mcpu=generic -stackrealign -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -mcpu=generic -stackrealign -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
|
||||
; rdar://11496434
|
||||
|
||||
; no VLAs or dynamic alignment
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu -mcpu=pentium | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu -mcpu=pentium | FileCheck %s
|
||||
|
||||
; Check the HiPE calling convention works (x86-32)
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s
|
||||
|
||||
; Check the HiPE calling convention works (x86-64)
|
||||
|
||||
|
184
test/CodeGen/X86/local_stack_symbol_ordering.ll
Normal file
184
test/CodeGen/X86/local_stack_symbol_ordering.ll
Normal file
@ -0,0 +1,184 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s -check-prefix=X32
|
||||
|
||||
; CHECK-LABEL: foo
|
||||
|
||||
; Check the functionality of the local stack symbol table ordering
|
||||
; heuristics.
|
||||
; The test has a bunch of locals of various sizes that are referenced a
|
||||
; different number of times.
|
||||
;
|
||||
; a : 120B, 9 uses, density = 0.075
|
||||
; aa : 4000B, 1 use, density = 0.00025
|
||||
; b : 4B, 1 use, density = 0.25
|
||||
; cc : 4000B, 2 uses density = 0.0005
|
||||
; d : 4B, 2 uses density = 0.5
|
||||
; e : 4B, 3 uses density = 0.75
|
||||
; f : 4B, 4 uses density = 1
|
||||
;
|
||||
; Given the size, number of uses and calculated density (uses / size), we're
|
||||
; going to hope that f gets allocated closest to the stack pointer,
|
||||
; followed by e, d, b, then a (to check for just a few).
|
||||
; We use gnu-inline asm between calls to prevent registerization of addresses
|
||||
; so that we get exact counts.
|
||||
;
|
||||
; The test is taken from something like this:
|
||||
; void foo()
|
||||
; {
|
||||
; int f; // 4 uses. 4 / 4 = 1
|
||||
; int a[30]; // 9 uses. 8 / 120 = 0.06
|
||||
; int aa[1000]; // 1 use. 1 / 4000 =
|
||||
; int e; // 3 uses. 3 / 4 = 0.75
|
||||
; int cc[1000]; // 2 uses. 2 / 4000 =
|
||||
; int b; // 1 use. 1 / 4 = 0.25
|
||||
; int d; // 2 uses. 2 / 4 = 0.5
|
||||
; int aaa[1000]; // 2 uses. 2 / 4000
|
||||
;
|
||||
;
|
||||
; check_a(&a);
|
||||
; bar1(&aaa);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar1(&a);
|
||||
; check_f(&f);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar1(&a);
|
||||
; bar3(&aa, &aaa, &cc);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar2(&a,&cc);
|
||||
; check_b(&b);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar1(&a);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar2(&a, &f);
|
||||
; check_e(&e);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar1(&a);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar2(&e, &f);
|
||||
; check_d(&d);
|
||||
; bar1(&a);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar3(&d, &e, &f);
|
||||
; asm ("":::"esi","edi","ebp","ebx","rbx","r12","r13","r14","r15","rbp");
|
||||
; bar1(&a);
|
||||
; }
|
||||
;
|
||||
; X64: leaq 16(%rsp), %rdi
|
||||
; X64: callq check_a
|
||||
; X64: callq bar1
|
||||
; X64: callq bar1
|
||||
; X64: leaq (%rsp), %rdi
|
||||
; X64: callq check_f
|
||||
; X64: callq bar1
|
||||
; X64: callq bar3
|
||||
; X64: callq bar2
|
||||
; X64: leaq 12(%rsp), %rdi
|
||||
; X64: callq check_b
|
||||
; X64: callq bar1
|
||||
; X64: callq bar2
|
||||
; X64: leaq 4(%rsp), %rdi
|
||||
; X64: callq check_e
|
||||
; X64: callq bar1
|
||||
; X64: callq bar2
|
||||
; X64: leaq 8(%rsp), %rdi
|
||||
; X64: callq check_d
|
||||
|
||||
; X32: leal 32(%esp)
|
||||
; X32: calll check_a
|
||||
; X32: calll bar1
|
||||
; X32: calll bar1
|
||||
; X32: leal 16(%esp)
|
||||
; X32: calll check_f
|
||||
; X32: calll bar1
|
||||
; X32: calll bar3
|
||||
; X32: calll bar2
|
||||
; X32: leal 28(%esp)
|
||||
; X32: calll check_b
|
||||
; X32: calll bar1
|
||||
; X32: calll bar2
|
||||
; X32: leal 20(%esp)
|
||||
; X32: calll check_e
|
||||
; X32: calll bar1
|
||||
; X32: calll bar2
|
||||
; X32: leal 24(%esp)
|
||||
; X32: calll check_d
|
||||
|
||||
|
||||
define void @foo() nounwind uwtable {
|
||||
entry:
|
||||
%f = alloca i32, align 4
|
||||
%a = alloca [30 x i32], align 16
|
||||
%aa = alloca [1000 x i32], align 16
|
||||
%e = alloca i32, align 4
|
||||
%cc = alloca [1000 x i32], align 16
|
||||
%b = alloca i32, align 4
|
||||
%d = alloca i32, align 4
|
||||
%aaa = alloca [1000 x i32], align 16
|
||||
%0 = bitcast i32* %f to i8*
|
||||
call void @llvm.lifetime.start(i64 4, i8* %0) #1
|
||||
%1 = bitcast [30 x i32]* %a to i8*
|
||||
call void @llvm.lifetime.start(i64 120, i8* %1) #1
|
||||
%2 = bitcast [1000 x i32]* %aa to i8*
|
||||
call void @llvm.lifetime.start(i64 4000, i8* %2) #1
|
||||
%3 = bitcast i32* %e to i8*
|
||||
call void @llvm.lifetime.start(i64 4, i8* %3) #1
|
||||
%4 = bitcast [1000 x i32]* %cc to i8*
|
||||
call void @llvm.lifetime.start(i64 4000, i8* %4) #1
|
||||
%5 = bitcast i32* %b to i8*
|
||||
call void @llvm.lifetime.start(i64 4, i8* %5) #1
|
||||
%6 = bitcast i32* %d to i8*
|
||||
call void @llvm.lifetime.start(i64 4, i8* %6) #1
|
||||
%7 = bitcast [1000 x i32]* %aaa to i8*
|
||||
call void @llvm.lifetime.start(i64 4000, i8* %7) #1
|
||||
%call = call i32 ([30 x i32]*, ...) bitcast (i32 (...)* @check_a to i32 ([30 x i32]*, ...)*)([30 x i32]* %a)
|
||||
%call1 = call i32 ([1000 x i32]*, ...) bitcast (i32 (...)* @bar1 to i32 ([1000 x i32]*, ...)*)([1000 x i32]* %aaa)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call2 = call i32 ([30 x i32]*, ...) bitcast (i32 (...)* @bar1 to i32 ([30 x i32]*, ...)*)([30 x i32]* %a)
|
||||
%call3 = call i32 (i32*, ...) bitcast (i32 (...)* @check_f to i32 (i32*, ...)*)(i32* %f)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call4 = call i32 ([30 x i32]*, ...) bitcast (i32 (...)* @bar1 to i32 ([30 x i32]*, ...)*)([30 x i32]* %a)
|
||||
%call5 = call i32 ([1000 x i32]*, [1000 x i32]*, [1000 x i32]*, ...) bitcast (i32 (...)* @bar3 to i32 ([1000 x i32]*, [1000 x i32]*, [1000 x i32]*, ...)*)([1000 x i32]* %aa, [1000 x i32]* %aaa, [1000 x i32]* %cc)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call6 = call i32 ([30 x i32]*, [1000 x i32]*, ...) bitcast (i32 (...)* @bar2 to i32 ([30 x i32]*, [1000 x i32]*, ...)*)([30 x i32]* %a, [1000 x i32]* %cc)
|
||||
%call7 = call i32 (i32*, ...) bitcast (i32 (...)* @check_b to i32 (i32*, ...)*)(i32* %b)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call8 = call i32 ([30 x i32]*, ...) bitcast (i32 (...)* @bar1 to i32 ([30 x i32]*, ...)*)([30 x i32]* %a)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call9 = call i32 ([30 x i32]*, i32*, ...) bitcast (i32 (...)* @bar2 to i32 ([30 x i32]*, i32*, ...)*)([30 x i32]* %a, i32* %f)
|
||||
%call10 = call i32 (i32*, ...) bitcast (i32 (...)* @check_e to i32 (i32*, ...)*)(i32* %e)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call11 = call i32 ([30 x i32]*, ...) bitcast (i32 (...)* @bar1 to i32 ([30 x i32]*, ...)*)([30 x i32]* %a)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call12 = call i32 (i32*, i32*, ...) bitcast (i32 (...)* @bar2 to i32 (i32*, i32*, ...)*)(i32* %e, i32* %f)
|
||||
%call13 = call i32 (i32*, ...) bitcast (i32 (...)* @check_d to i32 (i32*, ...)*)(i32* %d)
|
||||
%call14 = call i32 ([30 x i32]*, ...) bitcast (i32 (...)* @bar1 to i32 ([30 x i32]*, ...)*)([30 x i32]* %a)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call15 = call i32 (i32*, i32*, i32*, ...) bitcast (i32 (...)* @bar3 to i32 (i32*, i32*, i32*, ...)*)(i32* %d, i32* %e, i32* %f)
|
||||
call void asm sideeffect "", "~{esi},~{edi},~{ebp},~{ebx},~{rbx},~{r12},~{r13},~{r14},~{r15},~{rbp},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
%call16 = call i32 ([30 x i32]*, ...) bitcast (i32 (...)* @bar1 to i32 ([30 x i32]*, ...)*)([30 x i32]* %a)
|
||||
call void @llvm.lifetime.end(i64 4000, i8* %7) #1
|
||||
call void @llvm.lifetime.end(i64 4, i8* %6) #1
|
||||
call void @llvm.lifetime.end(i64 4, i8* %5) #1
|
||||
call void @llvm.lifetime.end(i64 4000, i8* %4) #1
|
||||
call void @llvm.lifetime.end(i64 4, i8* %3) #1
|
||||
call void @llvm.lifetime.end(i64 4000, i8* %2) #1
|
||||
call void @llvm.lifetime.end(i64 120, i8* %1) #1
|
||||
call void @llvm.lifetime.end(i64 4, i8* %0) #1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture) #1
|
||||
|
||||
declare i32 @check_a(...) #2
|
||||
declare i32 @bar1(...) #2
|
||||
declare i32 @check_f(...) #2
|
||||
declare i32 @bar3(...) #2
|
||||
declare i32 @bar2(...) #2
|
||||
declare i32 @check_b(...) #2
|
||||
declare i32 @check_e(...) #2
|
||||
declare i32 @check_d(...) #2
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.lifetime.end(i64, i8* nocapture) #1
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s
|
||||
; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s
|
||||
; RUN: llc -O0 < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s
|
||||
; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling.
|
||||
|
||||
@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s
|
||||
; RUN: llc -stack-symbol-ordering=0 -mtriple=i686-windows-msvc < %s | FileCheck %s
|
||||
|
||||
; 32-bit catch-all has to use a filter function because that's how it saves the
|
||||
; exception code.
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s
|
||||
; RUN: llc -stack-symbol-ordering=0 -mtriple=i686-windows-msvc < %s | FileCheck %s
|
||||
|
||||
; 32-bit catch-all has to use a filter function because that's how it saves the
|
||||
; exception code.
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -disable-fp-elim -mtriple=x86_64-pc-linux-gnu -mcpu=corei7 -o - | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -disable-fp-elim -mtriple=x86_64-pc-linux-gnu -mcpu=corei7 -o - | FileCheck %s
|
||||
; This test is fairly fragile. The goal is to ensure that "large" stack
|
||||
; objects are allocated closest to the stack protector (i.e., farthest away
|
||||
; from the Stack Pointer.) In standard SSP mode this means that large (>=
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: llc -stack-symbol-ordering=0 < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-pc-linux-gnu"
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
|
||||
; RUN: llc < %s -mtriple="x86_64-pc-unknown-elf" | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
|
||||
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple="x86_64-pc-unknown-elf" | FileCheck %s
|
||||
|
||||
; This test is a sanity check to ensure statepoints are generating StackMap
|
||||
; sections correctly. This is not intended to be a rigorous test of the
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mcpu=core-avx -debug-only=stackmaps < %s | FileCheck %s
|
||||
; RUN: llc -stack-symbol-ordering=0 -mcpu=core-avx -debug-only=stackmaps < %s | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
target triple = "x86_64-pc-linux-gnu"
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
|
||||
; RUN: llc -stack-symbol-ordering=0 < %s -mtriple=x86_64-linux | FileCheck %s
|
||||
|
||||
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc %s -o - -march=x86-64 -mattr=-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=SSE
|
||||
; RUN: llc %s -o - -march=x86-64 -mattr=+avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=AVX
|
||||
; RUN: llc -stack-symbol-ordering=0 %s -o - -march=x86-64 -mattr=-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=SSE
|
||||
; RUN: llc -stack-symbol-ordering=0 %s -o - -march=x86-64 -mattr=+avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=AVX
|
||||
; PR4891
|
||||
; PR5626
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
|
||||
; RUN: llc -stack-symbol-ordering=0 -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc -stack-symbol-ordering=0 -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
|
||||
|
||||
declare void @llvm.va_start(i8*)
|
||||
declare void @llvm.va_end(i8*)
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
|
||||
; RUN: llc -stack-symbol-ordering=0 -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
|
||||
; RUN: llc -stack-symbol-ordering=0 -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
|
||||
|
||||
; Loosely based on IR for this C++ source code:
|
||||
; void f(int p);
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: llc -stack-symbol-ordering=0 < %s | FileCheck %s
|
||||
|
||||
; The aligned alloca means that we have to realign the stack, which forces the
|
||||
; use of ESI to address local variables.
|
||||
|
Loading…
x
Reference in New Issue
Block a user