[PPC] Set SP after loading data from stack frame, if no red zone is present

Follow-up to r280705: Make sure that the SP is only restored after all data is loaded from the stack frame, if there is no red zone. This completes the fix for https://llvm.org/bugs/show_bug.cgi?id=26519. Differential Revision: https://reviews.llvm.org/D24466 llvm-svn: 282174
2025-01-07 11:51:13 +00:00 · 2016-09-22 17:22:43 +00:00 · 2016-09-22 17:22:43 +00:00 · 344cd70c0b
commit 344cd70c0b
parent 986cc88263
5 changed files with 369 additions and 65 deletions
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@ -926,7 +926,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
  }

  // Have we generated a STUX instruction to claim stack frame? If so,
-  // the frame size will be placed in ScratchReg.
+  // the negated frame size will be placed in ScratchReg.
  bool HasSTUX = false;

  // This condition must be kept in sync with canUseAsPrologue.
@ -986,33 +986,88 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
  if (!HasRedZone) {
    assert(!isPPC64 && "A red zone is always available on PPC64");
    if (HasSTUX) {
-      // The frame size is in ScratchReg, and the SPReg has been advanced
-      // (downwards) by the frame size: SPReg = old SPReg + ScratchReg.
-      // Set ScratchReg to the original SPReg: ScratchReg = SPReg - ScratchReg.
+      // The negated frame size is in ScratchReg, and the SPReg has been
+      // decremented by the frame size: SPReg = old SPReg + ScratchReg.
+      // Since FPOffset, PBPOffset, etc. are relative to the beginning of
+      // the stack frame (i.e. the old SP), ideally, we would put the old
+      // SP into a register and use it as the base for the stores. The
+      // problem is that the only available register may be ScratchReg,
+      // which could be R0, and R0 cannot be used as a base address.
+
+      // First, set ScratchReg to the old SP. This may need to be modified
+      // later.
      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
        .addReg(ScratchReg, RegState::Kill)
        .addReg(SPReg);

-      // Now that the stack frame has been allocated, save all the necessary
-      // registers using ScratchReg as the base address.
-      if (HasFP)
-        BuildMI(MBB, MBBI, dl, StoreInst)
-          .addReg(FPReg)
-          .addImm(FPOffset)
-          .addReg(ScratchReg);
-      if (FI->usesPICBase())
-        BuildMI(MBB, MBBI, dl, StoreInst)
-          .addReg(PPC::R30)
-          .addImm(PBPOffset)
-          .addReg(ScratchReg);
-      if (HasBP) {
-        BuildMI(MBB, MBBI, dl, StoreInst)
-          .addReg(BPReg)
-          .addImm(BPOffset)
-          .addReg(ScratchReg);
-        BuildMI(MBB, MBBI, dl, OrInst, BPReg)
-          .addReg(ScratchReg, RegState::Kill)
-          .addReg(ScratchReg);
+      if (ScratchReg == PPC::R0) {
+        // R0 cannot be used as a base register, but it can be used as an
+        // index in a store-indexed.
+        int LastOffset = 0;
+        if (HasFP)  {
+          // R0 += (FPOffset-LastOffset).
+          // Need addic, since addi treats R0 as 0.
+          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
+            .addReg(ScratchReg)
+            .addImm(FPOffset-LastOffset);
+          LastOffset = FPOffset;
+          // Store FP into *R0.
+          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
+            .addReg(FPReg, RegState::Kill)  // Save FP.
+            .addReg(PPC::ZERO)
+            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
+        }
+        if (FI->usesPICBase()) {
+          // R0 += (PBPOffset-LastOffset).
+          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
+            .addReg(ScratchReg)
+            .addImm(PBPOffset-LastOffset);
+          LastOffset = PBPOffset;
+          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
+            .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
+            .addReg(PPC::ZERO)
+            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
+        }
+        if (HasBP) {
+          // R0 += (BPOffset-LastOffset).
+          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
+            .addReg(ScratchReg)
+            .addImm(BPOffset-LastOffset);
+          LastOffset = BPOffset;
+          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
+            .addReg(BPReg, RegState::Kill)  // Save BP.
+            .addReg(PPC::ZERO)
+            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
+          // BP = R0-LastOffset
+          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
+            .addReg(ScratchReg, RegState::Kill)
+            .addImm(-LastOffset);
+        }
+      } else {
+        // ScratchReg is not R0, so use it as the base register. It is
+        // already set to the old SP, so we can use the offsets directly.
+
+        // Now that the stack frame has been allocated, save all the necessary
+        // registers using ScratchReg as the base address.
+        if (HasFP)
+          BuildMI(MBB, MBBI, dl, StoreInst)
+            .addReg(FPReg)
+            .addImm(FPOffset)
+            .addReg(ScratchReg);
+        if (FI->usesPICBase())
+          BuildMI(MBB, MBBI, dl, StoreInst)
+            .addReg(PPC::R30)
+            .addImm(PBPOffset)
+            .addReg(ScratchReg);
+        if (HasBP) {
+          BuildMI(MBB, MBBI, dl, StoreInst)
+            .addReg(BPReg)
+            .addImm(BPOffset)
+            .addReg(ScratchReg);
+          BuildMI(MBB, MBBI, dl, OrInst, BPReg)
+            .addReg(ScratchReg, RegState::Kill)
+            .addReg(ScratchReg);
+        }
      }
    } else {
      // The frame size is a known 16-bit constant (fitting in the immediate
@ -1190,6 +1245,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
  // Do we have a frame pointer and/or base pointer for this function?
  bool HasFP = hasFP(MF);
  bool HasBP = RegInfo->hasBasePointer(MF);
+  bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();

  unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
  unsigned BPReg      = RegInfo->getBaseRegister(MF);
@ -1202,6 +1258,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
                                                 : PPC::LWZ );
  const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
                                                           : PPC::LIS );
+  const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
+                                              : PPC::OR );
  const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
                                                  : PPC::ORI );
  const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
@ -1223,7 +1281,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,

  if (HasFP) {
    if (isSVR4ABI) {
-      MachineFrameInfo &MFI = MF.getFrameInfo();
      int FPIndex = FI->getFramePointerSaveIndex();
      assert(FPIndex && "No Frame Pointer Save Slot!");
      FPOffset = MFI.getObjectOffset(FPIndex);
@ -1235,7 +1292,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
  int BPOffset = 0;
  if (HasBP) {
    if (isSVR4ABI) {
-      MachineFrameInfo &MFI = MF.getFrameInfo();
      int BPIndex = FI->getBasePointerSaveIndex();
      assert(BPIndex && "No Base Pointer Save Slot!");
      BPOffset = MFI.getObjectOffset(BPIndex);
@ -1246,7 +1302,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,

  int PBPOffset = 0;
  if (FI->usesPICBase()) {
-    MachineFrameInfo &MFI = MF.getFrameInfo();
    int PBPIndex = FI->getPICBasePointerSaveIndex();
    assert(PBPIndex && "No PIC Base Pointer Save Slot!");
    PBPOffset = MFI.getObjectOffset(PBPIndex);
@ -1282,9 +1337,25 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
  // indexed into with a simple LD/LWZ immediate offset operand.
  bool isLargeFrame = !isInt<16>(FrameSize);

+  // On targets without red zone, the SP needs to be restored last, so that
+  // all live contents of the stack frame are upwards of the SP. This means
+  // that we cannot restore SP just now, since there may be more registers
+  // to restore from the stack frame (e.g. R31). If the frame size is not
+  // a simple immediate value, we will need a spare register to hold the
+  // restored SP. If the frame size is known and small, we can simply adjust
+  // the offsets of the registers to be restored, and still use SP to restore
+  // them. In such case, the final update of SP will be to add the frame
+  // size to it.
+  // To simplify the code, set RBReg to the base register used to restore
+  // values from the stack, and set SPAdd to the value that needs to be added
+  // to the SP at the end. The default values are as if red zone was present.
+  unsigned RBReg = SPReg;
+  unsigned SPAdd = 0;
+
  if (FrameSize) {
-    // In the prologue, the loaded (or persistent) stack pointer value is offset
-    // by the STDU/STDUX/STWU/STWUX instruction.  Add this offset back now.
+    // In the prologue, the loaded (or persistent) stack pointer value is
+    // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
+    // zone add this offset back now.

    // If this function contained a fastcc call and GuaranteedTailCallOpt is
    // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
@ -1292,8 +1363,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
    // value of R31 in this case.
    if (FI->hasFastCall()) {
      assert(HasFP && "Expecting a valid frame pointer.");
+      if (!HasRedZone)
+        RBReg = FPReg;
      if (!isLargeFrame) {
-        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+        BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
          .addReg(FPReg).addImm(FrameSize);
      } else {
        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
@ -1302,27 +1375,55 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
          .addReg(ScratchReg, RegState::Kill)
          .addImm(FrameSize & 0xFFFF);
        BuildMI(MBB, MBBI, dl, AddInst)
-          .addReg(SPReg)
+          .addReg(RBReg)
          .addReg(FPReg)
          .addReg(ScratchReg);
      }
    } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
-      BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
-        .addReg(SPReg)
-        .addImm(FrameSize);
+      if (HasRedZone) {
+        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+          .addReg(SPReg)
+          .addImm(FrameSize);
+      } else {
+        // Make sure that adding FrameSize will not overflow the max offset
+        // size.
+        assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
+               "Local offsets should be negative");
+        SPAdd = FrameSize;
+        FPOffset += FrameSize;
+        BPOffset += FrameSize;
+        PBPOffset += FrameSize;
+      }
    } else {
-      BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
+      // We don't want to use ScratchReg as a base register, because it
+      // could happen to be R0. Use FP instead, but make sure to preserve it.
+      if (!HasRedZone) {
+        // If FP is not saved, copy it to ScratchReg.
+        if (!HasFP)
+          BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
+            .addReg(FPReg)
+            .addReg(FPReg);
+        RBReg = FPReg;
+      }
+      BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
        .addImm(0)
        .addReg(SPReg);
    }
  }
+  assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
+  // If there is no red zone, ScratchReg may be needed for holding a useful
+  // value (although not the base register). Make sure it is not overwritten
+  // too early.

  assert((isPPC64 || !MustSaveCR) &&
         "Epilogue CR restoring supported only in 64-bit mode");

-  // If we need to save both the LR and the CR and we only have one available
-  // scratch register, we must do them one at a time.
+  // If we need to restore both the LR and the CR and we only have one
+  // available scratch register, we must do them one at a time.
  if (MustSaveCR && SingleScratchReg && MustSaveLR) {
+    // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
+    // is live here.
+    assert(HasRedZone && "Expecting red zone");
    BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
      .addImm(8)
      .addReg(SPReg);
@ -1331,33 +1432,77 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
        .addReg(TempReg, getKillRegState(i == e-1));
  }

-  if (MustSaveLR)
+  // Delay restoring of the LR if ScratchReg is needed. This is ok, since
+  // LR is stored in the caller's stack frame. ScratchReg will be needed
+  // if RBReg is anything other than SP. We shouldn't use ScratchReg as
+  // a base register anyway, because it may happen to be R0.
+  bool LoadedLR = false;
+  if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
    BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
-      .addImm(LROffset)
-      .addReg(SPReg);
+      .addImm(LROffset+SPAdd)
+      .addReg(RBReg);
+    LoadedLR = true;
+  }

-  if (MustSaveCR &&
-      !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
+  if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
+    // This will only occur for PPC64.
+    assert(isPPC64 && "Expecting 64-bit mode");
+    assert(RBReg == SPReg && "Should be using SP as a base register");
    BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
      .addImm(8)
-      .addReg(SPReg);
+      .addReg(RBReg);
+  }

-  if (HasFP)
-    BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
-      .addImm(FPOffset)
-      .addReg(SPReg);
+  if (HasFP) {
+    // If there is red zone, restore FP directly, since SP has already been
+    // restored. Otherwise, restore the value of FP into ScratchReg.
+    if (HasRedZone || RBReg == SPReg)
+      BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
+        .addImm(FPOffset)
+        .addReg(SPReg);
+    else
+      BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+        .addImm(FPOffset)
+        .addReg(RBReg);
+  }

  if (FI->usesPICBase())
-    // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
    BuildMI(MBB, MBBI, dl, LoadInst)
      .addReg(PPC::R30)
      .addImm(PBPOffset)
-      .addReg(SPReg);
+      .addReg(RBReg);

  if (HasBP)
    BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
      .addImm(BPOffset)
-      .addReg(SPReg);
+      .addReg(RBReg);
+
+  // There is nothing more to be loaded from the stack, so now we can
+  // restore SP: SP = RBReg + SPAdd.
+  if (RBReg != SPReg || SPAdd != 0) {
+    assert(!HasRedZone && "This should not happen with red zone");
+    // If SPAdd is 0, generate a copy.
+    if (SPAdd == 0)
+      BuildMI(MBB, MBBI, dl, OrInst, SPReg)
+        .addReg(RBReg)
+        .addReg(RBReg);
+    else
+      BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+        .addReg(RBReg)
+        .addImm(SPAdd);
+
+    assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
+    if (RBReg == FPReg)
+      BuildMI(MBB, MBBI, dl, OrInst, FPReg)
+        .addReg(ScratchReg)
+        .addReg(ScratchReg);
+
+    // Now load the LR from the caller's stack frame.
+    if (MustSaveLR && !LoadedLR)
+      BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+        .addImm(LROffset)
+        .addReg(SPReg);
+  }

  if (MustSaveCR &&
      !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
--- a/test/CodeGen/PowerPC/ppc32-pic-large.ll
+++ b/test/CodeGen/PowerPC/ppc32-pic-large.ll
@ -25,7 +25,7 @@ entry:
 ; LARGE-BSS-DAG:     lwz [[VREG:[0-9]+]], [[VREF:\.LC[0-9]+]]-.LTOC(30)
 ; LARGE-BSS-DAG:     lwz {{[0-9]+}}, 0([[VREG]])
 ; LARGE-BSS-DAG:     stw {{[0-9]+}}, 8(1)
-; LARGE-BSS:         lwz 30, -8(1)
+; LARGE-BSS:         lwz 30, 24(1)
 ; LARGE-BSS:       [[VREF]]:
 ; LARGE-BSS-NEXT:     .p2align 2
 ; LARGE-BSS-NEXT:    .long bar
--- a/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/test/CodeGen/PowerPC/ppc32-pic.ll
@ -21,4 +21,4 @@ entry:
 ; SMALL-BSS-DAG:     lwz [[VREG:[0-9]+]], bar@GOT(30)
 ; SMALL-BSS-DAG:     lwz {{[0-9]+}}, 0([[VREG]])
 ; SMALL-BSS:         bl call_foo@PLT
-; SMALL-BSS:         lwz 30, -8(1)
+; SMALL-BSS:         lwz 30, 24(1)
--- a/test/CodeGen/PowerPC/stack-no-redzone.ll
+++ b/test/CodeGen/PowerPC/stack-no-redzone.ll
@ -0,0 +1,146 @@
+; Test that accesses of the stack remain within the range defined by R1,
+; i.e. that loads and stores only access the allocated stack. This does not
+; have to be the case when red zone is present.
+
+; Make sure that there is no red zone, i.e. ppc32 and SVR4 ABI.
+; RUN: llc -mtriple=powerpc--freebsd-elf < %s | FileCheck %s
+
+; There are two ways that the stack pointer can be adjusted in the prologue:
+; - by adding an immediate value:
+;     stwu r1, -imm(r1)
+; - by adding another register:
+;     stwux r1, rx, r1
+;
+; The restoring of the stack pointer can be done:
+; - by adding an immediate value to it:
+;     addi r1, r1, imm
+; - by copying the value from another register:
+;     mr r1, rx
+
+
+; Nothing (no special features).
+;
+; CHECK-LABEL: test_n:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwu 1, -[[SIZE:[0-9]+]](1)
+; CHECK: addi 1, 1, [[SIZE]]
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_n() local_unnamed_addr #0 {
+entry:
+  %t0 = tail call i32 bitcast (i32 (...)* @bar0 to i32 ()*)() #0
+  ret i32 %t0
+}
+
+; Aligned object on the stack.
+;
+; CHECK-LABEL: test_a:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+
+define i32 @test_a() local_unnamed_addr #0 {
+entry:
+  %t0 = alloca i32, align 128
+  %t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  ret i32 %t1
+}
+
+; Dynamic allocation on the stack.
+;
+; CHECK-LABEL: test_d:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwu 1, -[[SIZE:[0-9]+]](1)
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_d(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, i32 %p0, align 4
+  %t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  ret i32 %t1
+}
+
+; Large stack (exceeds size of D-field).
+; CHECK-LABEL: test_s:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_s(i32 %p0) local_unnamed_addr #0 {
+entry:
+  %t0 = alloca [16384 x i32]
+  %t1 = getelementptr [16384 x i32], [16384 x i32]* %t0, i32 0, i32 0
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
+  ret i32 %t2
+}
+
+; Combinations.
+
+; CHECK-LABEL: test_ad:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_ad(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, align 128
+  %t1 = alloca i32, i32 %p0, align 4
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
+  %t4 = add i32 %t2, %t3
+  ret i32 %t4
+}
+
+; CHECK-LABEL: test_as:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_as() local_unnamed_addr #0 {
+  %t0 = alloca i32, align 128
+  %t1 = alloca [16384 x i32]
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0
+  %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0
+  %t5 = add i32 %t2, %t4
+  ret i32 %t5
+}
+
+; CHECK-LABEL: test_ds:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_ds(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, i32 %p0, align 4
+  %t1 = alloca [16384 x i32]
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0
+  %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0
+  %t5 = add i32 %t2, %t4
+  ret i32 %t5
+}
+
+; CHECK-LABEL: test_ads:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_ads(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, align 128
+  %t1 = alloca i32, i32 %p0, align 4
+  %t2 = alloca [16384 x i32]
+
+  %t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
+  %t5 = add i32 %t3, %t4
+
+  %t6 = getelementptr [16384 x i32], [16384 x i32]* %t2, i32 0, i32 0
+  %t7 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t6) #0
+  %t8 = add i32 %t5, %t7
+  ret i32 %t7
+}
+
+
+declare i32 @bar0(...) local_unnamed_addr #0
+declare i32 @bar1(...) local_unnamed_addr #0
+
+attributes #0 = { nounwind }
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@ -83,18 +83,26 @@ entry:
 ; CHECK-32-DAG: subfic 0, [[REG]], -64
 ; CHECK-32: stwux 1, 1, 0
 ; CHECK-32: subf 0, 0, 1
-; CHECK-32: stw 30, -8(0)
-; CHECK-32: mr 30, 0
+; CHECK-32: addic 0, 0, -4
+; CHECK-32: stwx 31, 0, 0
+; CHECK-32: addic 0, 0, -4
+; CHECK-32: stwx 30, 0, 0
+; CHECK-32: addic 30, 0, 8

 ; CHECK-32-PIC-LABEL: @goo
 ; CHECK-32-PIC-DAG: mflr [[LR:[0-9]+]]
 ; CHECK-32-PIC-DAG: clrlwi [[REG:[0-9]+]], 1, 27
 ; CHECK-32-PIC-DAG: stw [[LR]], 4(1)
 ; CHECK-32-PIC-DAG: subfic 0, [[REG]], -64
-; CHECK-32-PIC: stwux 1, 1, 0
-; CHECK-32-PIC: subf 0, 0, 1
-; CHECK-32-PIC: stw 29, -12(0)
-; CHECK-32-PIC-DAG: mr 29, 0
+; CHECK-32-PIC:     stwux 1, 1, 0
+; CHECK-32-PIC:     subf 0, 0, 1
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 31, 0, 0
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 30, 0, 0
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 29, 0, 0
+; CHECK-32-PIC:     addic 29, 0, 12

 ; The large-frame-size case.
 define void @hoo(%struct.s* byval nocapture readonly %a) {
@ -138,9 +146,11 @@ entry:
 ; CHECK-32-DAG: subfc 0, [[REG3]], [[REG2]]
 ; CHECK-32:     stwux 1, 1, 0
 ; CHECK-32:     subf 0, 0, 1
-; CHECK-32-DAG: stw 31, -4(0)
-; CHECK-32-DAG: stw 30, -8(0)
-; CHECK-32: mr 30, 0
+; CHECK-32:     addic 0, 0, -4
+; CHECK-32:     stwx 31, 0, 0
+; CHECK-32:     addic 0, 0, -4
+; CHECK-32:     stwx 30, 0, 0
+; CHECK-32:     addic 30, 0, 8

 ; CHECK-32: blr

@ -152,10 +162,13 @@ entry:
 ; CHECK-32-PIC-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
 ; CHECK-32-PIC-DAG: stw 0, 4(1)
 ; CHECK-32-PIC-DAG: subfc 0, [[REG3]], [[REG2]]
-; CHECK-32-PIC: stwux 1, 1, 0
-; CHECK-32-PIC: stw 29, -12(0)
-; CHECK-32-PIC: subf 0, 0, 1
-; CHECK-32-PIC: mr 29, 0
+; CHECK-32-PIC:     stwux 1, 1, 0
+; CHECK-32-PIC:     subf 0, 0, 1
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 31, 0, 0
+; CHECK-32-PIC:     addic 0, 0, -8
+; CHECK-32-PIC:     stwx 29, 0, 0
+; CHECK-32-PIC:     addic 29, 0, 12

 ; CHECK-32: blr