[SystemZ] Enable the use of MVC for frame-to-frame spills

...now that the problem that prompted the restriction has been fixed. The original spill-02.py was a compromise because at the time I couldn't find an example that actually failed without the two scavenging slots. The version included here did. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185701 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-16 08:29:43 +00:00 · 2013-07-05 14:02:01 +00:00 · 2013-07-05 14:02:01 +00:00 · cf1b5bd60a
commit cf1b5bd60a
parent 457571ed69
2 changed files with 59 additions and 40 deletions
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@ -363,18 +363,11 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
  // not valid in cases where the two memories partially overlap; however,
  // that is not a problem here, because we know that one of the memories
  // is a full frame index.
-  //
-  // For now we punt if the load or store is also to a frame index.
-  // In that case we might end up eliminating both of them to out-of-range
-  // offsets, which might then force the register scavenger to spill two
-  // other registers.  The backend can only handle one such scavenger spill
-  // at a time.
  if (OpNum == 0 && MI->hasOneMemOperand()) {
    MachineMemOperand *MMO = *MI->memoperands_begin();
    if (MMO->getSize() == Size && !MMO->isVolatile()) {
      // Handle conversion of loads.
-      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad) &&
-          !MI->getOperand(1).isFI()) {
+      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) {
        uint64_t Offset = 0;
        MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
                                                  MachineMemOperand::MOStore);
@ -384,8 +377,7 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
          .addMemOperand(FrameMMO).addMemOperand(MMO);
      }
      // Handle conversion of stores.
-      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore) &&
-          !MI->getOperand(1).isFI()) {
+      if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) {
        uint64_t Offset = 0;
        MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
                                                  MachineMemOperand::MOLoad);
--- a/test/CodeGen/SystemZ/Large/spill-02.py
+++ b/test/CodeGen/SystemZ/Large/spill-02.py
@ -1,46 +1,73 @@
 # Test cases where we spill from one frame index to another, both of which
-# would be out of range of MVC.  At present we don't use MVC in this case.
+# are out of range of MVC, and both of which need emergency spill slots.
 # RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s

-# There are 8 usable call-saved GPRs.  The first 160 bytes of the frame
-# are needed for the ABI call frame, and a further 8 bytes are needed
-# for the emergency spill slot.  That means we will have at least one
-# out-of-range slot if:
-#
-#    count == (4096 - 168) / 8 + 8 + 1 == 500
-#
-# Add in some extra just to be sure.
-#
 # CHECK: f1:
-# CHECK-NOT: mvc
+# CHECK: %fallthru
+# CHECK-DAG: stg [[REG1:%r[0-9]+]], 8168(%r15)
+# CHECK-DAG: stg [[REG2:%r[0-9]+]], 8176(%r15)
+# CHECK-DAG: lay [[REG3:%r[0-9]+]], 8192(%r15)
+# CHECK-DAG: lay [[REG4:%r[0-9]+]], 4096(%r15)
+# CHECK: mvc 0(8,[[REG3]]), 4088([[REG4]])
+# CHECK-DAG: lg [[REG1]], 8168(%r15)
+# CHECK-DAG: lg [[REG2]], 8176(%r15)
+# CHECK: %skip
 # CHECK: br %r14
-count = 510

-print 'declare void @foo(i64 *%base0, i64 *%base1)'
+# Arrange for %foo's spill slot to be at 8184(%r15) and the alloca area to be at
+# 8192(%r15).  The two emergency spill slots live below that, so this requires
+# the first 8168 bytes to be used for the call.  160 of these bytes are
+# allocated for the ABI frame.  There are also 5 argument registers, one of
+# which is used as a base pointer.
+args = (8168 - 160) / 8 + (5 - 1)
+
+print 'declare i64 *@foo(i64 *%s)' % (', i64' * args)
+print 'declare void @bar(i64 *)'
 print ''
-print 'define void @f1() {'
+print 'define i64 @f1(i64 %foo) {'
+print 'entry:'

-for i in range(2):
-    print '  %%alloc%d = alloca [%d x i64]' % (i, count / 2)
-    print ('  %%base%d = getelementptr [%d x i64] * %%alloc%d, i64 0, i64 0'
-           % (i, count / 2, i))
-
-print '  call void @foo(i64 *%base0, i64 *%base1)'
+# Make the allocation big, so that it goes at the top of the frame.
+print '  %array = alloca [1000 x i64]'
+print '  %area = getelementptr [1000 x i64] *%array, i64 0, i64 0'
+print '  %%base = call i64 *@foo(i64 *%%area%s)' % (', i64 0' * args)
 print ''

+# Make sure all GPRs are used.  One is needed for the stack pointer and
+# another for %base, so we need 14 live values.
+count = 14
 for i in range(count):
-    print '  %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
-    print '  %%val%d = load i64 *%%ptr%d' % (i, i)
+    print '  %%ptr%d = getelementptr i64 *%%base, i64 %d' % (i, i / 2)
+    print '  %%val%d = load volatile i64 *%%ptr%d' % (i, i)
    print ''

-print '  call void @foo(i64 *%base0, i64 *%base1)'
-print ''
+# Encourage the register allocator to give preference to these %vals
+# by using them several times.
+for j in range(4):
+    for i in range(count):
+        print '  store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
+    print ''

-for i in range (count):
-    print '  store i64 %%val%d, i64 *%%ptr%d' % (i, i)
+# Copy the incoming argument, which we expect to be spilled, to the frame
+# index for the alloca area.  Also throw in a volatile store, so that this
+# block cannot be reordered with the surrounding code.
+print '  %cond = icmp eq i64 %val0, %val1'
+print '  br i1 %cond, label %skip, label %fallthru'
+print ''
+print 'fallthru:'
+print '  store i64 %foo, i64 *%area'
+print '  store volatile i64 %val0, i64 *%ptr0'
+print '  br label %skip'
+print ''
+print 'skip:'

-print ''
-print '  call void @foo(i64 *%base0, i64 *%base1)'
-print ''
-print '  ret void'
+# Use each %val a few more times to emphasise the point, and to make sure
+# that they are live across the store of %foo.
+for j in range(4):
+    for i in range(count):
+        print '  store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
+    print ''
+
+print '  call void @bar(i64 *%area)'
+print '  ret i64 0'
 print '}'