[Sparc] Generate correct code for leaf functions with stack objects

llvm-svn: 183067
2025-01-27 06:54:30 +00:00 · 2013-06-01 04:51:18 +00:00 · 2013-06-01 04:51:18 +00:00 · 1eaf496598
commit 1eaf496598
parent 0863d85015
5 changed files with 102 additions and 30 deletions
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@ -35,8 +35,6 @@ DisableLeafProc("disable-sparc-leaf-proc",

 void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
-  if (FuncInfo->isLeafProc())
-    return;

  MachineBasicBlock &MBB = MF.front();
  MachineFrameInfo *MFI = MF.getFrameInfo();
@ -48,31 +46,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
  // Get the number of bytes to allocate from the FrameInfo
  int NumBytes = (int) MFI->getStackSize();

-  if (SubTarget.is64Bit()) {
-    // All 64-bit stack frames must be 16-byte aligned, and must reserve space
-    // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128.
-    NumBytes += 128;
-    // Frames with calls must also reserve space for 6 outgoing arguments
-    // whether they are used or not. LowerCall_64 takes care of that.
-    assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned");
-  } else {
-    // Emit the correct save instruction based on the number of bytes in
-    // the frame. Minimum stack frame size according to V8 ABI is:
-    //   16 words for register window spill
-    //    1 word for address of returned aggregate-value
-    // +  6 words for passing parameters on the stack
-    // ----------
-    //   23 words * 4 bytes per word = 92 bytes
-    NumBytes += 92;
-
-    // Round up to next doubleword boundary -- a double-word boundary
-    // is required by the ABI.
-    NumBytes = RoundUpToAlignment(NumBytes, 8);
+  unsigned SAVEri = SP::SAVEri;
+  unsigned SAVErr = SP::SAVErr;
+  if (FuncInfo->isLeafProc()) {
+    if (NumBytes == 0)
+      return;
+    SAVEri = SP::ADDri;
+    SAVErr = SP::ADDrr;
  }
-  NumBytes = -NumBytes;
+  NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);

  if (NumBytes >= -4096) {
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+    BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6)
      .addReg(SP::O6).addImm(NumBytes);
  } else {
    // Emit this the hard way.  This clobbers G1 which we always know is
@ -82,7 +67,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
    // Emit G1 = G1 + I6
    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+    BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6)
      .addReg(SP::O6).addReg(SP::G1);
  }
 }
@ -109,16 +94,39 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
                                  MachineBasicBlock &MBB) const {
  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
-  if (FuncInfo->isLeafProc())
-    return;
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  const SparcInstrInfo &TII =
    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
  DebugLoc dl = MBBI->getDebugLoc();
  assert(MBBI->getOpcode() == SP::RETL &&
         "Can only put epilog before 'retl' instruction!");
-  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
-    .addReg(SP::G0);
+  if (!FuncInfo->isLeafProc()) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+      .addReg(SP::G0);
+    return;
+  }
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  int NumBytes = (int) MFI->getStackSize();
+  if (NumBytes == 0)
+    return;
+
+  NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
+
+  if (NumBytes < 4096) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
+      .addReg(SP::O6).addImm(NumBytes);
+  } else {
+    // Emit this the hard way.  This clobbers G1 which we always know is
+    // available here.
+    unsigned OffHi = (unsigned)NumBytes >> 10U;
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6)
+      .addReg(SP::O6).addReg(SP::G1);
+  }
 }

 bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@ -91,7 +91,14 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                   MI.getOperand(FIOperandNum + 1).getImm() +
                   Subtarget.getStackPointerBias();
  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
-  unsigned FramePtr = (FuncInfo->isLeafProc()) ? SP::O6 : SP::I6;
+  unsigned FramePtr = SP::I6;
+  if (FuncInfo->isLeafProc()) {
+    //Use %sp and adjust offset if needed.
+    FramePtr = SP::O6;
+    int stackSize = MF.getFrameInfo()->getStackSize();
+    Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ;
+  }
+
  // Replace frame index with a frame pointer reference.
  if (Offset >= -4096 && Offset <= 4095) {
    // If the offset is small enough to fit in the immediate field, directly
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@ -13,6 +13,7 @@

 #include "SparcSubtarget.h"
 #include "Sparc.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"

 #define GET_SUBTARGETINFO_TARGET_DESC
@ -44,3 +45,30 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
  // Parse features string.
  ParseSubtargetFeatures(CPUName, FS);
 }
+
+
+int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
+
+  if (is64Bit()) {
+    // All 64-bit stack frames must be 16-byte aligned, and must reserve space
+    // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128.
+    frameSize += 128;
+    // Frames with calls must also reserve space for 6 outgoing arguments
+    // whether they are used or not. LowerCall_64 takes care of that.
+    assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned");
+  } else {
+    // Emit the correct save instruction based on the number of bytes in
+    // the frame. Minimum stack frame size according to V8 ABI is:
+    //   16 words for register window spill
+    //    1 word for address of returned aggregate-value
+    // +  6 words for passing parameters on the stack
+    // ----------
+    //   23 words * 4 bytes per word = 92 bytes
+    frameSize += 92;
+
+    // Round up to next doubleword boundary -- a double-word boundary
+    // is required by the ABI.
+    frameSize = RoundUpToAlignment(frameSize, 8);
+  }
+  return frameSize;
+}
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@ -58,6 +58,12 @@ public:
  int64_t getStackPointerBias() const {
    return is64Bit() ? 2047 : 0;
  }
+
+  /// Given a actual stack size as determined by FrameInfo, this function
+  /// returns adjusted framesize which includes space for register window
+  /// spills and arguments.
+  int getAdjustedFrameSize(int stackSize) const;
+
 };

 } // end namespace llvm
--- a/test/CodeGen/SPARC/leafproc.ll
+++ b/test/CodeGen/SPARC/leafproc.ll
@ -55,3 +55,26 @@ entry:
  %6 = add nsw i32 %5, %h
  ret i32 %6
 }
+
+; CHECK:      leaf_proc_with_local_array:
+; CHECK:      add %sp, -104, %sp
+; CHECK:      or %g0, 1, [[R1:%[go][0-7]]]
+; CHECK:      st [[R1]], [%sp+96]
+; CHECK:      or %g0, 2, [[R2:%[go][0-7]]]
+; CHECK:      st [[R2]], [%sp+100]
+; CHECK:      ld {{.+}}, %o0
+; CHECK:      jmp %o7+8
+; CHECK-NEXT: add %sp, 104, %sp
+
+define i32 @leaf_proc_with_local_array(i32 %a, i32 %b, i32 %c) {
+entry:
+  %array = alloca [2 x i32], align 4
+  %0 = sub nsw i32 %b, %c
+  %1 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 0
+  store i32 1, i32* %1, align 4
+  %2 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 1
+  store i32 2, i32* %2, align 4
+  %3 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 %a
+  %4 = load i32* %3, align 4
+  ret i32 %4
+}