diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index bd843fbe7d6..4a0359a37c9 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -163,10 +163,12 @@ static void Query(const MachineInstr *MI, AliasAnalysis &AA, // of memoperands as having a potential unknown memory reference. break; default: - // Record potential stores, unless it's a call, as calls are handled + // Record volatile accesses, unless it's a call, as calls are handled // specially below. - if (!MI->isCall()) + if (!MI->isCall()) { Write = true; + Effects = true; + } break; } } @@ -197,24 +199,15 @@ static void Query(const MachineInstr *MI, AliasAnalysis &AA, if (MI->isCall()) { switch (MI->getOpcode()) { case WebAssembly::CALL_VOID: + case WebAssembly::CALL_INDIRECT_VOID: QueryCallee(MI, 0, Read, Write, Effects, StackPointer); break; - case WebAssembly::CALL_I32: - case WebAssembly::CALL_I64: - case WebAssembly::CALL_F32: - case WebAssembly::CALL_F64: + case WebAssembly::CALL_I32: case WebAssembly::CALL_I64: + case WebAssembly::CALL_F32: case WebAssembly::CALL_F64: + case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64: + case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64: QueryCallee(MI, 1, Read, Write, Effects, StackPointer); break; - case WebAssembly::CALL_INDIRECT_VOID: - case WebAssembly::CALL_INDIRECT_I32: - case WebAssembly::CALL_INDIRECT_I64: - case WebAssembly::CALL_INDIRECT_F32: - case WebAssembly::CALL_INDIRECT_F64: - Read = true; - Write = true; - Effects = true; - StackPointer = true; - break; default: llvm_unreachable("unexpected call opcode"); } @@ -360,7 +353,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, const MachineBasicBlock &MBB, const MachineRegisterInfo &MRI, const MachineDominatorTree &MDT, - LiveIntervals &LIS) { + LiveIntervals &LIS, + WebAssemblyFunctionInfo &MFI) { const LiveInterval &LI = LIS.getInterval(Reg); const MachineInstr *OneUseInst = OneUse.getParent(); @@ -384,8 +378,31 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, return false; } else { // Test that the use is dominated by the one selected use. - if (!MDT.dominates(OneUseInst, UseInst)) - return false; + while (!MDT.dominates(OneUseInst, UseInst)) { + // Actually, dominating is over-conservative. Test that the use would + // happen after the one selected use in the stack evaluation order. + // + // This is needed as a consequence of using implicit get_locals for + // uses and implicit set_locals for defs. + if (UseInst->getDesc().getNumDefs() == 0) + return false; + const MachineOperand &MO = UseInst->getOperand(0); + if (!MO.isReg()) + return false; + unsigned DefReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg) || + !MFI.isVRegStackified(DefReg)) + return false; + assert(MRI.hasOneUse(DefReg)); + const MachineOperand &NewUse = *MRI.use_begin(DefReg); + const MachineInstr *NewUseInst = NewUse.getParent(); + if (NewUseInst == OneUseInst) { + if (&OneUse > &NewUse) + return false; + break; + } + UseInst = NewUseInst; + } } } return true; @@ -619,6 +636,9 @@ public: /// Test whether the given register is present on the stack, indicating an /// operand in the tree that we haven't visited yet. Moving a definition of /// Reg to a point in the tree after that would change its value. + /// + /// This is needed as a consequence of using implicit get_locals for + /// uses and implicit set_locals for defs. bool IsOnStack(unsigned Reg) const { for (const RangeTy &Range : Worklist) for (const MachineOperand &MO : Range) @@ -763,7 +783,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI, TII, TRI); } else if (CanMove && - OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS)) { + OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) { Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI, TII); } else { diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll index 0287efe40d4..c65910c3274 100644 --- a/test/CodeGen/WebAssembly/reg-stackify.ll +++ b/test/CodeGen/WebAssembly/reg-stackify.ll @@ -281,15 +281,36 @@ define i32 @commute() { ; an implicit get_local for the register. ; CHECK-LABEL: no_stackify_past_use: -; CHECK: i32.call $1=, callee@FUNCTION, $0 +; CHECK: i32.call $1=, callee@FUNCTION, $0 +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: i32.add $push1=, $0, $pop0 +; CHECK-NEXT: i32.call $push2=, callee@FUNCTION, $pop1 +; CHECK-NEXT: i32.sub $push3=, $pop2, $1 +; CHECK-NEXT: i32.div_s $push4=, $pop3, $1 +; CHECK-NEXT: return $pop4 +declare i32 @callee(i32) +define i32 @no_stackify_past_use(i32 %arg) { + %tmp1 = call i32 @callee(i32 %arg) + %tmp2 = add i32 %arg, 1 + %tmp3 = call i32 @callee(i32 %tmp2) + %tmp5 = sub i32 %tmp3, %tmp1 + %tmp6 = sdiv i32 %tmp5, %tmp1 + ret i32 %tmp6 +} + +; This is the same as no_stackify_past_use, except using a commutative operator, +; so we can reorder the operands and stackify. + +; CHECK-LABEL: commute_to_fix_ordering: +; CHECK: i32.call $push[[L0:.+]]=, callee@FUNCTION, $0 +; CHECK: tee_local $push[[L1:.+]]=, $1=, $pop[[L0]] ; CHECK: i32.const $push0=, 1 ; CHECK: i32.add $push1=, $0, $pop0 ; CHECK: i32.call $push2=, callee@FUNCTION, $pop1 ; CHECK: i32.add $push3=, $1, $pop2 -; CHECK: i32.mul $push4=, $1, $pop3 +; CHECK: i32.mul $push4=, $pop[[L1]], $pop3 ; CHECK: return $pop4 -declare i32 @callee(i32) -define i32 @no_stackify_past_use(i32 %arg) { +define i32 @commute_to_fix_ordering(i32 %arg) { %tmp1 = call i32 @callee(i32 %arg) %tmp2 = add i32 %arg, 1 %tmp3 = call i32 @callee(i32 %tmp2) diff --git a/test/CodeGen/WebAssembly/userstack.ll b/test/CodeGen/WebAssembly/userstack.ll index 15dea2b9633..914bb072589 100644 --- a/test/CodeGen/WebAssembly/userstack.ll +++ b/test/CodeGen/WebAssembly/userstack.ll @@ -56,14 +56,14 @@ define void @allocarray() { ; CHECK-NEXT: i32.load $push[[L5:.+]]=, 0($pop[[L4]]) ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 144{{$}} ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L5]], $pop[[L6]] - ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L7]]), $pop[[L11]] + ; CHECK-NEXT: i32.store ${{.+}}=, 0($pop[[L7]]), $pop[[L11]] %r = alloca [33 x i32] - ; CHECK-NEXT: i32.const $push[[L2:.+]]=, 24 - ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $[[SP]], $pop[[L2]] + ; CHECK: i32.const $push{{.+}}=, 24 + ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $[[SP]], $pop{{.+}} ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}} ; CHECK-NEXT: i32.store $push[[L0:.+]]=, 0($pop[[L3]]), $pop[[L1]]{{$}} - ; CHECK-NEXT: i32.store $discard=, 12($[[SP]]), $pop[[L0]]{{$}} + ; CHECK-NEXT: i32.store $discard=, 12(${{.+}}), $pop[[L0]]{{$}} %p = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p %p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3 @@ -111,20 +111,20 @@ define void @allocarray_inbounds() { ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($pop[[L3]]) ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32{{$}} ; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L4]], $pop[[L5]] - ; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L6]]), $pop[[L10]]{{$}} + ; CHECK-NEXT: i32.store ${{.+}}=, 0($pop[[L6]]), $pop[[L10]]{{$}} %r = alloca [5 x i32] ; CHECK: i32.const $push[[L3:.+]]=, 1 - ; CHECK: i32.store {{.*}}=, 12($[[SP]]), $pop[[L3]] + ; CHECK: i32.store {{.*}}=, 12(${{.+}}), $pop[[L3]] %p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p ; This store should have both the GEP and the FI folded into it. - ; CHECK-NEXT: i32.store {{.*}}=, 24($[[SP]]), $pop + ; CHECK-NEXT: i32.store {{.*}}=, 24(${{.+}}), $pop %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 call void @ext_func(i64* null); ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32 - ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]] ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void }