[WebAssembly] Stackify function prologs and epilogs

The instructions are the same, but fewer locals are used. Differential Revision: http://reviews.llvm.org/D17428 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261452 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-15 16:09:02 +00:00 · 2016-02-20 21:46:50 +00:00 · 2016-02-20 21:46:50 +00:00 · 37d3f7f35c
commit 37d3f7f35c
parent 51fba47bbf
4 changed files with 111 additions and 102 deletions
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@ -36,8 +36,6 @@ using namespace llvm;

 // TODO: Implement a red zone?
 // TODO: wasm64
-// TODO: Prolog/epilog should be stackified too. This pass runs after register
-//       stackification, so we'll have to do it manually.
 // TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions

 /// Return true if the specified function should have a dedicated frame pointer
@ -76,6 +74,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
  auto *MFI = MF.getFrameInfo();
  assert(MFI->getCalleeSavedInfo().empty() &&
         "WebAssembly should not have callee-saved registers");
+  auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();

  uint64_t StackSize = MFI->getStackSize();
  if (!StackSize && !MFI->adjustsStack() && !hasFP(MF)) return;
@ -86,9 +85,10 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
  auto InsertPt = MBB.begin();
  DebugLoc DL;

+  unsigned SPAddr = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
-  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg)
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
      .addExternalSymbol(SPSymbol);
  // This MachinePointerInfo should reference __stack_pointer as well but
  // doesn't because MachinePointerInfo() takes a GV which we don't have for
@ -99,21 +99,23 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
  // Load the SP value.
  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32),
          StackSize ? SPReg : (unsigned)WebAssembly::SP32)
-      .addImm(0)      // offset
-      .addReg(SPReg)  // addr
-      .addImm(2)      // p2align
+      .addImm(0)       // offset
+      .addReg(SPAddr)  // addr
+      .addImm(2)       // p2align
      .addMemOperand(LoadMMO);
+  WFI->stackifyVReg(SPAddr);

-  unsigned OffsetReg = 0;
  if (StackSize) {
    // Subtract the frame size
-    OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
        .addImm(StackSize);
    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32),
            WebAssembly::SP32)
        .addReg(SPReg)
        .addReg(OffsetReg);
+    WFI->stackifyVReg(OffsetReg);
+    WFI->stackifyVReg(SPReg);
  }
  if (hasFP(MF)) {
    // Unlike most conventional targets (where FP points to the saved FP),
@ -124,19 +126,20 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
        .addReg(WebAssembly::SP32);
  }
  if (StackSize) {
-    assert(OffsetReg);
+    SPAddr = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
    // The SP32 register now has the new stacktop. Also write it back to memory.
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
        .addExternalSymbol(SPSymbol);
    auto *MMO = new MachineMemOperand(MachinePointerInfo(),
                                      MachineMemOperand::MOStore, 4, 4);
    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32),
            WebAssembly::SP32)
        .addImm(0)
-        .addReg(OffsetReg)
+        .addReg(SPAddr)
        .addImm(2)  // p2align
        .addReg(WebAssembly::SP32)
        .addMemOperand(MMO);
+    WFI->stackifyVReg(SPAddr);
  }
 }

@ -145,9 +148,9 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
  auto *MFI = MF.getFrameInfo();
  uint64_t StackSize = MFI->getStackSize();
  if (!StackSize && !MFI->adjustsStack() && !hasFP(MF)) return;
+  auto *WFI = MF.getInfo<WebAssemblyFunctionInfo>();
  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  auto &MRI = MF.getRegInfo();
-  unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  auto InsertPt = MBB.getFirstTerminator();
  DebugLoc DL;

@ -158,25 +161,28 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
  // Restore the stack pointer. If we had fixed-size locals, add the offset
  // subtracted in the prolog.
  if (StackSize) {
+    unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
        .addImm(StackSize);
    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32),
            WebAssembly::SP32)
        .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
        .addReg(OffsetReg);
+    WFI->stackifyVReg(OffsetReg);
  }

  auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
-  // Re-use OffsetReg to hold the address of the stacktop
-  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+  unsigned SPAddr = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr)
      .addExternalSymbol(SPSymbol);
  auto *MMO = new MachineMemOperand(MachinePointerInfo(),
                                    MachineMemOperand::MOStore, 4, 4);
  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32),
          WebAssembly::SP32)
      .addImm(0)
-      .addReg(OffsetReg)
+      .addReg(SPAddr)
      .addImm(2)  // p2align
      .addReg((!StackSize && hasFP(MF)) ? WebAssembly::FP32 : WebAssembly::SP32)
      .addMemOperand(MMO);
+  WFI->stackifyVReg(SPAddr);
 }
--- a/test/CodeGen/WebAssembly/byval.ll
+++ b/test/CodeGen/WebAssembly/byval.ll
@ -24,13 +24,13 @@ declare void @ext_byval_func_empty(%EmptyStruct* byval)
 define void @byval_arg(%SmallStruct* %ptr) {
 ; CHECK: .param i32
 ; Subtract 16 from SP (SP is 16-byte aligned)
- ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
- ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
 ; Ensure SP is stored back before the call
- ; CHECK-NEXT: i32.const [[L3:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0([[L3]]), [[SP]]
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
 ; Copy the SmallStruct argument to the stack (SP+12, original SP-4)
 ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0)
 ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]]
@ -40,10 +40,10 @@ define void @byval_arg(%SmallStruct* %ptr) {
 ; CHECK-NEXT: call ext_byval_func@FUNCTION, [[L5]]
 call void @ext_byval_func(%SmallStruct* byval %ptr)
 ; Restore the stack
- ; CHECK-NEXT: i32.const [[L6:.+]]=, 16
- ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L6]]
- ; CHECK-NEXT: i32.const [[L7:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0([[L7]]), [[SP]]
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L6]]
+ ; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), [[SP]]
 ; CHECK-NEXT: return
 ret void
 }
@ -52,8 +52,8 @@ define void @byval_arg(%SmallStruct* %ptr) {
 define void @byval_arg_align8(%SmallStruct* %ptr) {
 ; CHECK: .param i32
 ; Don't check the entire SP sequence, just enough to get the alignment.
- ; CHECK: i32.const [[L2:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
 ; Copy the SmallStruct argument to the stack (SP+8, original SP-8)
 ; CHECK: i32.load $push[[L4:.+]]=, 0($0):p2align=3
 ; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]):p2align=3, $pop[[L4]]
@ -69,8 +69,8 @@ define void @byval_arg_align8(%SmallStruct* %ptr) {
 define void @byval_arg_double(%AlignedStruct* %ptr) {
 ; CHECK: .param i32
 ; Subtract 16 from SP (SP is 16-byte aligned)
- ; CHECK: i32.const [[L2:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
 ; Copy the AlignedStruct argument to the stack (SP+0, original SP-16)
 ; Just check the last load/store pair of the memcpy
 ; CHECK: i64.load $push[[L4:.+]]=, 0($0)
@ -107,10 +107,11 @@ define void @byval_empty_callee(%EmptyStruct* byval %ptr) {
 }

 ; Call memcpy for "big" byvals.
-; TODO: When the prolog/epilog sequences are optimized, refine these checks to
-; be more specific.
-
 ; CHECK-LABEL: big_byval:
+; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
+; CHECK-NEXT: i32.const $push[[L3:.+]]=, 131072
+; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
 ; CHECK:      i32.call       ${{[^,]+}}=, memcpy@FUNCTION,
 ; CHECK-NEXT: call           big_byval_callee@FUNCTION,
 %big = type [131072 x i8]
--- a/test/CodeGen/WebAssembly/mem-intrinsics.ll
+++ b/test/CodeGen/WebAssembly/mem-intrinsics.ll
@ -61,8 +61,8 @@ define void @set_no(i8* %dst, i8 %src, i32 %len) {


 ; CHECK-LABEL: frame_index:
-; CHECK: i32.call $discard=, memset@FUNCTION, $3, $pop1, $pop0{{$}}
-; CHECK: i32.call $discard=, memset@FUNCTION, $4, $pop3, $pop2{{$}}
+; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop1, $pop0{{$}}
+; CHECK: i32.call $discard=, memset@FUNCTION, $1, $pop3, $pop2{{$}}
 ; CHECK: return{{$}}
 define void @frame_index() {
 entry:
--- a/test/CodeGen/WebAssembly/userstack.ll
+++ b/test/CodeGen/WebAssembly/userstack.ll
@ -7,54 +7,58 @@ target triple = "wasm32-unknown-unknown"

 ; CHECK-LABEL: alloca32:
 ; Check that there is an extra local for the stack pointer.
-; CHECK: .local i32, i32, i32, i32{{$}}
+; CHECK: .local i32{{$}}
 define void @alloca32() {
- ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
- ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer{{$}}
+ ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L4]]), [[SP]]
 %retval = alloca i32
- ; CHECK: i32.const $push[[L3:.+]]=, 0
- ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ ; CHECK: i32.const $push[[L0:.+]]=, 0
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]]
 store i32 0, i32* %retval
- ; CHECK: i32.const [[L4:.+]]=, 16
- ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]]
- ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]]
+ ; CHECK: i32.const $push[[L5:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[SP]]
 ret void
 }

 ; CHECK-LABEL: alloca3264:
-; CHECK: .local i32, i32, i32, i32{{$}}
+; CHECK: .local i32{{$}}
 define void @alloca3264() {
- ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
- ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L4]]), [[SP]]
 %r1 = alloca i32
 %r2 = alloca double
 ; CHECK: i32.const $push[[L3:.+]]=, 0
 ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
 store i32 0, i32* %r1
- ; CHECK: i64.const $push[[L4:.+]]=, 0
- ; CHECK: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]]
+ ; CHECK: i64.const $push[[L0:.+]]=, 0
+ ; CHECK: i64.store {{.*}}=, 0([[SP]]), $pop[[L0]]
 store double 0.0, double* %r2
- ; CHECK: i32.const [[L4:.+]]=, 16
- ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]]
- ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]]
+ ; CHECK: i32.const $push[[L5:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[SP]]
 ret void
 }

 ; CHECK-LABEL: allocarray:
-; CHECK: .local i32, i32, i32, i32, i32{{$}}
+; CHECK: .local i32, i32{{$}}
 define void @allocarray() {
- ; CHECK-NEXT: i32.const [[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
- ; CHECK-NEXT: i32.const [[L2:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
- ; CHECK-NEXT: i32.const [[L2]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.store [[SP]]=, 0([[L2]]), [[SP]]
+ ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer{{$}}
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L4]]), [[SP]]
 %r = alloca [5 x i32]

 ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12
@ -69,18 +73,18 @@ define void @allocarray() {
 %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 3
 store i32 1, i32* %p2

- ; CHECK-NEXT: i32.const [[L7:.+]]=, 32
- ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]]
- ; CHECK-NEXT: i32.const [[L8:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store [[SP]]=, 0([[L8]]), [[SP]]
+ ; CHECK: i32.const $push[[L11:.+]]=, 32
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L11]]
+ ; CHECK-NEXT: i32.const $push[[L12:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L12]]), [[SP]]
 ret void
 }

 declare void @ext_func(i64* %ptr)
 ; CHECK-LABEL: non_mem_use
 define void @non_mem_use(i8** %addr) {
- ; CHECK: i32.const [[L2:.+]]=, 48
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, 48
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
 %buf = alloca [27 x i8], align 16
 %r = alloca i64
 %r2 = alloca i64
@ -103,12 +107,12 @@ define void @non_mem_use(i8** %addr) {
 }

 ; CHECK-LABEL: allocarray_inbounds:
-; CHECK: .local i32, i32, i32, i32{{$}}
+; CHECK: .local i32{{$}}
 define void @allocarray_inbounds() {
- ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
- ; CHECK-NEXT: i32.const [[L2:.+]]=, 32
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
 %r = alloca [5 x i32]
 ; CHECK: i32.const $push[[L3:.+]]=, 1
 ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
@ -118,45 +122,45 @@ define void @allocarray_inbounds() {
 ; CHECK-NEXT: i32.store {{.*}}=, 24([[SP]]), $pop
 %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3
 store i32 1, i32* %p2
- ; CHECK: i32.const [[L7:.+]]=, 32
- ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]]
- ; CHECK-NEXT: i32.const [[L8:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store [[SP]]=, 0([[L7]]), [[SP]]
+ ; CHECK: i32.const $push[[L5:.+]]=, 32
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[SP]]
 ret void
 }

 ; CHECK-LABEL: dynamic_alloca:
 define void @dynamic_alloca(i32 %alloc) {
- ; CHECK: i32.const [[L0:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[SP:.+]]=, 0([[L0]])
+ ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
 ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
 ; Target independent codegen bumps the stack pointer
 ; FIXME: we need to write the value back to memory
 %r = alloca i32, i32 %alloc
 ; Target-independent codegen also calculates the store addr
 store i32 0, i32* %r
- ; CHECK: i32.const [[L3:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store [[SP]]=, 0([[L3]]), [[FP]]
+ ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L3]]), [[FP]]
 ret void
 }


 ; CHECK-LABEL: dynamic_static_alloca:
 define void @dynamic_static_alloca(i32 %alloc) {
- ; CHECK: i32.const [[L0:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.load [[L0]]=, 0([[L0]])
- ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L0]], [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]])
+ ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, $pop[[L2]], $pop[[L3]]
 ; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
- ; CHECK-NEXT: i32.const [[L3:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store {{.*}}=, 0([[L3]]), [[SP]]
+ ; CHECK-NEXT: i32.const $push[[L4:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L4]]), [[SP]]
 %r1 = alloca i32
 %r = alloca i32, i32 %alloc
 store i32 0, i32* %r
- ; CHECK: i32.const [[L3:.+]]=, 16
- ; CHECK: i32.add [[SP]]=, [[FP]], [[L3]]
- ; CHECK: i32.const [[L4:.+]]=, __stack_pointer
- ; CHECK-NEXT: i32.store [[SP]]=, 0([[L4]]), [[SP]]
+ ; CHECK: i32.const $push[[L5:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[FP]], $pop[[L5]]
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[SP]]
 ret void
 }

@ -165,8 +169,8 @@ define void @dynamic_static_alloca(i32 %alloc) {
 ; CHECK-LABEL: copytoreg_fi:
 define void @copytoreg_fi(i1 %cond, i32* %b) {
 entry:
- ; CHECK: i32.const [[L2:.+]]=, 16
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, [[L2]]
+ ; CHECK: i32.const $push[[L1:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]]
 %addr = alloca i32
 ; CHECK: i32.const [[OFF:.+]]=, 12
 ; CHECK-NEXT: i32.add [[ADDR:.+]]=, [[SP]], [[OFF]]
@ -185,15 +189,13 @@ declare void @use_i8_star(i8*)
 declare i8* @llvm.frameaddress(i32)

 ; Test __builtin_frame_address(0).
-; TODO: When the prolog/epilog sequences are optimized, refine these checks to
-; be more specific.
-
 ; CHECK-LABEL: frameaddress_0:
-; CHECK: __stack_pointer
-; CHECK: load
-; CHECK: call use_i8_star
-; CHECK: __stack_pointer
-; CHECK: store
+; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer
+; CHECK-NEXT: i32.load [[SP:.+]]=, 0($pop[[L1]])
+; CHECK-NEXT: copy_local [[FP:.+]]=, [[SP]]
+; CHECK-NEXT: call use_i8_star@FUNCTION, [[FP]]
+; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer
+; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L6]]), [[FP]]
 define void @frameaddress_0() {
  %t = call i8* @llvm.frameaddress(i32 0)
  call void @use_i8_star(i8* %t)