[SystemZ] Enable the use of MVC for frame-to-frame spills

...now that the problem that prompted the restriction has been fixed.

The original spill-02.py was a compromise because at the time I couldn't
find an example that actually failed without the two scavenging slots.
The version included here did.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185701 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-07-05 14:02:01 +00:00
parent 457571ed69
commit cf1b5bd60a
2 changed files with 59 additions and 40 deletions

View File

@ -363,18 +363,11 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// not valid in cases where the two memories partially overlap; however,
// that is not a problem here, because we know that one of the memories
// is a full frame index.
//
// For now we punt if the load or store is also to a frame index.
// In that case we might end up eliminating both of them to out-of-range
// offsets, which might then force the register scavenger to spill two
// other registers. The backend can only handle one such scavenger spill
// at a time.
if (OpNum == 0 && MI->hasOneMemOperand()) {
MachineMemOperand *MMO = *MI->memoperands_begin();
if (MMO->getSize() == Size && !MMO->isVolatile()) {
// Handle conversion of loads.
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad) &&
!MI->getOperand(1).isFI()) {
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) {
uint64_t Offset = 0;
MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
MachineMemOperand::MOStore);
@ -384,8 +377,7 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
.addMemOperand(FrameMMO).addMemOperand(MMO);
}
// Handle conversion of stores.
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore) &&
!MI->getOperand(1).isFI()) {
if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) {
uint64_t Offset = 0;
MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
MachineMemOperand::MOLoad);

View File

@ -1,46 +1,73 @@
# Test cases where we spill from one frame index to another, both of which
# would be out of range of MVC. At present we don't use MVC in this case.
# are out of range of MVC, and both of which need emergency spill slots.
# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
# There are 8 usable call-saved GPRs. The first 160 bytes of the frame
# are needed for the ABI call frame, and a further 8 bytes are needed
# for the emergency spill slot. That means we will have at least one
# out-of-range slot if:
#
# count == (4096 - 168) / 8 + 8 + 1 == 500
#
# Add in some extra just to be sure.
#
# CHECK: f1:
# CHECK-NOT: mvc
# CHECK: %fallthru
# CHECK-DAG: stg [[REG1:%r[0-9]+]], 8168(%r15)
# CHECK-DAG: stg [[REG2:%r[0-9]+]], 8176(%r15)
# CHECK-DAG: lay [[REG3:%r[0-9]+]], 8192(%r15)
# CHECK-DAG: lay [[REG4:%r[0-9]+]], 4096(%r15)
# CHECK: mvc 0(8,[[REG3]]), 4088([[REG4]])
# CHECK-DAG: lg [[REG1]], 8168(%r15)
# CHECK-DAG: lg [[REG2]], 8176(%r15)
# CHECK: %skip
# CHECK: br %r14
count = 510
print 'declare void @foo(i64 *%base0, i64 *%base1)'
# Arrange for %foo's spill slot to be at 8184(%r15) and the alloca area to be at
# 8192(%r15). The two emergency spill slots live below that, so this requires
# the first 8168 bytes to be used for the call. 160 of these bytes are
# allocated for the ABI frame. There are also 5 argument registers, one of
# which is used as a base pointer.
args = (8168 - 160) / 8 + (5 - 1)
print 'declare i64 *@foo(i64 *%s)' % (', i64' * args)
print 'declare void @bar(i64 *)'
print ''
print 'define void @f1() {'
print 'define i64 @f1(i64 %foo) {'
print 'entry:'
for i in range(2):
print ' %%alloc%d = alloca [%d x i64]' % (i, count / 2)
print (' %%base%d = getelementptr [%d x i64] * %%alloc%d, i64 0, i64 0'
% (i, count / 2, i))
print ' call void @foo(i64 *%base0, i64 *%base1)'
# Make the allocation big, so that it goes at the top of the frame.
print ' %array = alloca [1000 x i64]'
print ' %area = getelementptr [1000 x i64] *%array, i64 0, i64 0'
print ' %%base = call i64 *@foo(i64 *%%area%s)' % (', i64 0' * args)
print ''
# Make sure all GPRs are used. One is needed for the stack pointer and
# another for %base, so we need 14 live values.
count = 14
for i in range(count):
print ' %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
print ' %%val%d = load i64 *%%ptr%d' % (i, i)
print ' %%ptr%d = getelementptr i64 *%%base, i64 %d' % (i, i / 2)
print ' %%val%d = load volatile i64 *%%ptr%d' % (i, i)
print ''
print ' call void @foo(i64 *%base0, i64 *%base1)'
print ''
# Encourage the register allocator to give preference to these %vals
# by using them several times.
for j in range(4):
for i in range(count):
print ' store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
print ''
for i in range (count):
print ' store i64 %%val%d, i64 *%%ptr%d' % (i, i)
# Copy the incoming argument, which we expect to be spilled, to the frame
# index for the alloca area. Also throw in a volatile store, so that this
# block cannot be reordered with the surrounding code.
print ' %cond = icmp eq i64 %val0, %val1'
print ' br i1 %cond, label %skip, label %fallthru'
print ''
print 'fallthru:'
print ' store i64 %foo, i64 *%area'
print ' store volatile i64 %val0, i64 *%ptr0'
print ' br label %skip'
print ''
print 'skip:'
print ''
print ' call void @foo(i64 *%base0, i64 *%base1)'
print ''
print ' ret void'
# Use each %val a few more times to emphasise the point, and to make sure
# that they are live across the store of %foo.
for j in range(4):
for i in range(count):
print ' store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
print ''
print ' call void @bar(i64 *%area)'
print ' ret i64 0'
print '}'