diff --git a/js/public/ProfilingFrameIterator.h b/js/public/ProfilingFrameIterator.h index afae8a1da185..886ed806e9bd 100644 --- a/js/public/ProfilingFrameIterator.h +++ b/js/public/ProfilingFrameIterator.h @@ -96,11 +96,30 @@ class MOZ_NON_PARAM JS_PUBLIC_API ProfilingFrameIterator { public: struct RegisterState { - RegisterState() : pc(nullptr), sp(nullptr), fp(nullptr), lr(nullptr) {} + RegisterState() + : pc(nullptr), + sp(nullptr), + fp(nullptr), + unused1(nullptr), + unused2(nullptr) {} void* pc; void* sp; void* fp; - void* lr; + union { + // Value of the LR register on ARM platforms. + void* lr; + // The return address during a tail call operation. + // Note that for ARM is still the value of LR register. + void* tempRA; + // Undefined on non-ARM plaforms outside tail calls operations. + void* unused1; + }; + union { + // The FP reference during a tail call operation. + void* tempFP; + // Undefined outside tail calls operations. + void* unused2; + }; }; ProfilingFrameIterator( diff --git a/js/src/jit-test/tests/wasm/tail-calls/return-call-profiling.js b/js/src/jit-test/tests/wasm/tail-calls/return-call-profiling.js new file mode 100644 index 000000000000..2946a16d715e --- /dev/null +++ b/js/src/jit-test/tests/wasm/tail-calls/return-call-profiling.js @@ -0,0 +1,68 @@ +// Tests if the profiler (frame iterator) can unwind in the middle +// of collapse frame instructions. + +enableGeckoProfiling(); +try { + enableSingleStepProfiling(); +} catch (e) { + // continue anyway if single step profiling is not supported +} + +var ins = wasmEvalText(` +(module + (func $f (param i64 i64 i64 i64 i64 i64 i64 i64 i64) + local.get 0 + i64.eqz + br_if 0 + local.get 0 + return_call $g + ) + (func $g (param i64) + local.get 0 + i64.const 1 + i64.sub + i64.const 2 + i64.const 6 + i64.const 3 + i64.const 4 + i64.const 1 + i64.const 2 + i64.const 6 + i64.const 3 + return_call $f + ) + (func (export "run") (param i64) + local.get 0 + call $g + ) +)`); + +for (var i = 0; i < 10; i++) { + ins.exports.run(100n); +} + +// Also when trampoline is used. +var ins0 = wasmEvalText(`(module (func (export "t")))`); +var ins = wasmEvalText(` +(module + (import "" "t" (func $g)) + (func $f (return_call_indirect $t (i32.const 0))) + (table $t 1 1 funcref) + + (func (export "run") (param i64) + loop + local.get 0 + i64.eqz + br_if 1 + call $f + local.get 0 + i64.const 1 + i64.sub + local.set 0 + br 0 + end + ) + (elem (i32.const 0) $g) +)`, {"": {t: ins0.exports.t},}); + +ins.exports.run(10n); diff --git a/js/src/jit/MacroAssembler.cpp b/js/src/jit/MacroAssembler.cpp index 18ec32d09d5a..44adf600c62e 100644 --- a/js/src/jit/MacroAssembler.cpp +++ b/js/src/jit/MacroAssembler.cpp @@ -4748,13 +4748,16 @@ static ReturnCallTrampolineData MakeReturnCallTrampoline(MacroAssembler& masm) { masm.moveToStackPtr(FramePointer); # ifdef JS_CODEGEN_ARM64 masm.pop(FramePointer, lr); + masm.append(wasm::CodeRangeUnwindInfo::UseFpLr, masm.currentOffset()); masm.Mov(PseudoStackPointer64, vixl::sp); masm.abiret(); # else masm.pop(FramePointer); + masm.append(wasm::CodeRangeUnwindInfo::UseFp, masm.currentOffset()); masm.ret(); # endif + masm.append(wasm::CodeRangeUnwindInfo::Normal, masm.currentOffset()); masm.setFramePushed(savedPushed); return data; } @@ -4829,6 +4832,7 @@ static void CollapseWasmFrameFast(MacroAssembler& masm, masm.loadPtr(Address(FramePointer, wasm::Frame::callerFPOffset()), tempForFP); masm.loadPtr(Address(FramePointer, wasm::Frame::returnAddressOffset()), tempForRA); + masm.append(wasm::CodeRangeUnwindInfo::RestoreFpRa, masm.currentOffset()); bool copyCallerSlot = oldSlotsAndStackArgBytes != newSlotsAndStackArgBytes; if (copyCallerSlot) { masm.loadPtr( @@ -4868,12 +4872,21 @@ static void CollapseWasmFrameFast(MacroAssembler& masm, masm.storePtr(tempForRA, Address(FramePointer, newFrameOffset + wasm::Frame::returnAddressOffset())); - masm.pop(tempForRA); + // Restore tempForRA, but keep RA on top of the stack. + // There is no non-locking exchange instruction between register and memory. + // Using tempForCaller as scratch register. + masm.loadPtr(Address(masm.getStackPointer(), 0), tempForCaller); + masm.storePtr(tempForRA, Address(masm.getStackPointer(), 0)); + masm.mov(tempForCaller, tempForRA); + masm.append(wasm::CodeRangeUnwindInfo::RestoreFp, masm.currentOffset()); masm.addToStackPtr(Imm32(framePushedAtStart + newFrameOffset + - wasm::Frame::returnAddressOffset())); + wasm::Frame::returnAddressOffset() + sizeof(void*))); # endif masm.movePtr(tempForFP, FramePointer); + // Setting framePushed to pre-collapse state, to properly set that in the + // following code. + masm.setFramePushed(framePushedAtStart); } static void CollapseWasmFrameSlow(MacroAssembler& masm, @@ -4938,6 +4951,7 @@ static void CollapseWasmFrameSlow(MacroAssembler& masm, masm.loadPtr(Address(FramePointer, wasm::Frame::callerFPOffset()), tempForFP); masm.loadPtr(Address(FramePointer, wasm::Frame::returnAddressOffset()), tempForRA); + masm.append(wasm::CodeRangeUnwindInfo::RestoreFpRa, masm.currentOffset()); masm.loadPtr( Address(FramePointer, newArgSrc + WasmCallerInstanceOffsetBeforeCall), tempForCaller); @@ -4999,15 +5013,24 @@ static void CollapseWasmFrameSlow(MacroAssembler& masm, masm.storePtr(tempForRA, Address(FramePointer, newFrameOffset + wasm::Frame::returnAddressOffset())); - masm.pop(tempForRA); - masm.freeStack(reserved); + // Restore tempForRA, but keep RA on top of the stack. + // There is no non-locking exchange instruction between register and memory. + // Using tempForCaller as scratch register. + masm.loadPtr(Address(masm.getStackPointer(), 0), tempForCaller); + masm.storePtr(tempForRA, Address(masm.getStackPointer(), 0)); + masm.mov(tempForCaller, tempForRA); + masm.append(wasm::CodeRangeUnwindInfo::RestoreFp, masm.currentOffset()); masm.addToStackPtr(Imm32(framePushedAtStart + newFrameOffset + - wasm::Frame::returnAddressOffset())); + wasm::Frame::returnAddressOffset() + reserved + + sizeof(void*))); # endif // Point FramePointer to hidden frame. masm.computeEffectiveAddress(Address(FramePointer, newFPOffset), FramePointer); + // Setting framePushed to pre-collapse state, to properly set that in the + // following code. + masm.setFramePushed(framePushedAtStart); } void MacroAssembler::wasmCollapseFrameFast( @@ -5028,6 +5051,7 @@ void MacroAssembler::wasmCollapseFrameSlow( wasmCheckSlowCallsite(temp1, &slow, temp1, temp2); CollapseWasmFrameFast(*this, retCallInfo); jump(&done); + append(wasm::CodeRangeUnwindInfo::Normal, currentOffset()); ReturnCallTrampolineData data = MakeReturnCallTrampoline(*this); @@ -5125,6 +5149,7 @@ CodeOffset MacroAssembler::wasmReturnCallImport( wasm::CallSiteDesc::ReturnStub); wasmCollapseFrameSlow(retCallInfo, stubDesc); jump(ABINonArgReg0); + append(wasm::CodeRangeUnwindInfo::Normal, currentOffset()); return CodeOffset(currentOffset()); } @@ -5134,6 +5159,7 @@ CodeOffset MacroAssembler::wasmReturnCall( wasmCollapseFrameFast(retCallInfo); CodeOffset offset = farJumpWithPatch(); append(desc, offset, funcDefIndex); + append(wasm::CodeRangeUnwindInfo::Normal, currentOffset()); return offset; } #endif // ENABLE_WASM_TAIL_CALLS @@ -5476,6 +5502,7 @@ void MacroAssembler::wasmReturnCallIndirect( wasmCollapseFrameSlow(retCallInfo, stubDesc); jump(calleeScratch); *slowCallOffset = CodeOffset(currentOffset()); + append(wasm::CodeRangeUnwindInfo::Normal, currentOffset()); // Fast path: just load the code pointer and go. @@ -5487,6 +5514,7 @@ void MacroAssembler::wasmReturnCallIndirect( wasmCollapseFrameFast(retCallInfo); jump(calleeScratch); *fastCallOffset = CodeOffset(currentOffset()); + append(wasm::CodeRangeUnwindInfo::Normal, currentOffset()); } #endif // ENABLE_WASM_TAIL_CALLS @@ -5600,6 +5628,7 @@ void MacroAssembler::wasmReturnCallRef( wasm::CallSiteDesc::ReturnStub); wasmCollapseFrameSlow(retCallInfo, stubDesc); jump(calleeScratch); + append(wasm::CodeRangeUnwindInfo::Normal, currentOffset()); // Fast path: just load WASM_FUNC_UNCHECKED_ENTRY_SLOT value and go. // The instance and pinned registers are the same as in the caller. @@ -5610,6 +5639,7 @@ void MacroAssembler::wasmReturnCallRef( wasmCollapseFrameFast(retCallInfo); jump(calleeScratch); + append(wasm::CodeRangeUnwindInfo::Normal, currentOffset()); } #endif diff --git a/js/src/jit/shared/Assembler-shared.h b/js/src/jit/shared/Assembler-shared.h index 2684c3c588e4..00eccc7ae9e9 100644 --- a/js/src/jit/shared/Assembler-shared.h +++ b/js/src/jit/shared/Assembler-shared.h @@ -617,6 +617,8 @@ class AssemblerShared { wasm::TrapSiteVectorArray trapSites_; wasm::SymbolicAccessVector symbolicAccesses_; wasm::TryNoteVector tryNotes_; + wasm::CodeRangeUnwindInfoVector codeRangesUnwind_; + #ifdef DEBUG // To facilitate figuring out which part of SM created each instruction as // shown by IONFLAGS=codegen, this maintains a stack of (notionally) @@ -694,11 +696,19 @@ class AssemblerShared { return true; } + void append(wasm::CodeRangeUnwindInfo::UnwindHow unwindHow, + uint32_t pcOffset) { + enoughMemory_ &= codeRangesUnwind_.emplaceBack(pcOffset, unwindHow); + } + wasm::CallSiteVector& callSites() { return callSites_; } wasm::CallSiteTargetVector& callSiteTargets() { return callSiteTargets_; } wasm::TrapSiteVectorArray& trapSites() { return trapSites_; } wasm::SymbolicAccessVector& symbolicAccesses() { return symbolicAccesses_; } wasm::TryNoteVector& tryNotes() { return tryNotes_; } + wasm::CodeRangeUnwindInfoVector& codeRangeUnwindInfos() { + return codeRangesUnwind_; + } }; // AutoCreatedBy pushes and later pops a who-created-these-insns? tag into the diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp index 06ba0b1cbdcf..33965d23f575 100644 --- a/js/src/shell/js.cpp +++ b/js/src/shell/js.cpp @@ -7238,6 +7238,7 @@ static void SingleStepCallback(void* arg, jit::Simulator* sim, void* pc) { state.sp = (void*)sim->get_register(jit::Simulator::sp); state.lr = (void*)sim->get_register(jit::Simulator::lr); state.fp = (void*)sim->get_register(jit::Simulator::fp); + state.tempFP = (void*)sim->get_register(jit::Simulator::r7); # elif defined(JS_SIMULATOR_MIPS64) || defined(JS_SIMULATOR_MIPS32) state.sp = (void*)sim->getRegister(jit::Simulator::sp); state.lr = (void*)sim->getRegister(jit::Simulator::ra); diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp index 49ed1582c2b8..ab557ee9a13b 100644 --- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -11531,6 +11531,8 @@ bool js::wasm::BaselineCompileFunctions(const ModuleEnvironment& moduleEnv, return false; } + size_t unwindInfoBefore = masm.codeRangeUnwindInfos().length(); + // One-pass baseline compilation. BaseCompiler f(moduleEnv, compilerEnv, func, locals, trapExitLayout, @@ -11542,8 +11544,11 @@ bool js::wasm::BaselineCompileFunctions(const ModuleEnvironment& moduleEnv, if (!f.emitFunction()) { return false; } - if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode, - f.finish())) { + FuncOffsets offsets(f.finish()); + bool hasUnwindInfo = + unwindInfoBefore != masm.codeRangeUnwindInfos().length(); + if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode, offsets, + hasUnwindInfo)) { return false; } } diff --git a/js/src/wasm/WasmBuiltins.cpp b/js/src/wasm/WasmBuiltins.cpp index f1ae5c468968..acde49be1c22 100644 --- a/js/src/wasm/WasmBuiltins.cpp +++ b/js/src/wasm/WasmBuiltins.cpp @@ -1925,6 +1925,7 @@ bool wasm::EnsureBuiltinThunksInitialized() { MOZ_ASSERT(masm.callSiteTargets().empty()); MOZ_ASSERT(masm.trapSites().empty()); MOZ_ASSERT(masm.tryNotes().empty()); + MOZ_ASSERT(masm.codeRangeUnwindInfos().empty()); if (!ExecutableAllocator::makeExecutableAndFlushICache(thunks->codeBase, thunks->codeSize)) { diff --git a/js/src/wasm/WasmCode.cpp b/js/src/wasm/WasmCode.cpp index 87dc83d7bf35..b7aaa1869cbd 100644 --- a/js/src/wasm/WasmCode.cpp +++ b/js/src/wasm/WasmCode.cpp @@ -373,6 +373,7 @@ size_t MetadataTier::sizeOfExcludingThis(MallocSizeOf mallocSizeOf) const { codeRanges.sizeOfExcludingThis(mallocSizeOf) + callSites.sizeOfExcludingThis(mallocSizeOf) + tryNotes.sizeOfExcludingThis(mallocSizeOf) + + codeRangeUnwindInfos.sizeOfExcludingThis(mallocSizeOf) + trapSites.sizeOfExcludingThis(mallocSizeOf) + stackMaps.sizeOfExcludingThis(mallocSizeOf) + funcImports.sizeOfExcludingThis(mallocSizeOf) + @@ -534,6 +535,7 @@ bool LazyStubTier::createManyEntryStubs(const Uint32Vector& funcExportIndices, MOZ_ASSERT(masm.callSiteTargets().empty()); MOZ_ASSERT(masm.trapSites().empty()); MOZ_ASSERT(masm.tryNotes().empty()); + MOZ_ASSERT(masm.codeRangeUnwindInfos().empty()); if (masm.oom()) { return false; @@ -1083,6 +1085,39 @@ bool Code::lookupTrap(void* pc, Trap* trapOut, BytecodeOffset* bytecode) const { return false; } +struct UnwindInfoPCOffset { + const CodeRangeUnwindInfoVector& info; + explicit UnwindInfoPCOffset(const CodeRangeUnwindInfoVector& info) + : info(info) {} + uint32_t operator[](size_t index) const { return info[index].offset(); } +}; + +const CodeRangeUnwindInfo* Code::lookupUnwindInfo(void* pc) const { + for (Tier t : tiers()) { + uint32_t target = ((uint8_t*)pc) - segment(t).base(); + const CodeRangeUnwindInfoVector& unwindInfoArray = + metadata(t).codeRangeUnwindInfos; + size_t match; + const CodeRangeUnwindInfo* info = nullptr; + if (BinarySearch(UnwindInfoPCOffset(unwindInfoArray), 0, + unwindInfoArray.length(), target, &match)) { + info = &unwindInfoArray[match]; + } else { + // Exact match is not found, using insertion point to get the previous + // info entry; skip if info is outside of codeRangeUnwindInfos. + if (match == 0) continue; + if (match == unwindInfoArray.length()) { + MOZ_ASSERT(unwindInfoArray[unwindInfoArray.length() - 1].unwindHow() == + CodeRangeUnwindInfo::Normal); + continue; + } + info = &unwindInfoArray[match - 1]; + } + return info->unwindHow() == CodeRangeUnwindInfo::Normal ? nullptr : info; + } + return nullptr; +} + // When enabled, generate profiling labels for every name in funcNames_ that is // the name of some Function CodeRange. This involves malloc() so do it now // since, once we start sampling, we'll be in a signal-handing context where we diff --git a/js/src/wasm/WasmCode.h b/js/src/wasm/WasmCode.h index 3cfc84b35c1c..f5c7b9e078da 100644 --- a/js/src/wasm/WasmCode.h +++ b/js/src/wasm/WasmCode.h @@ -485,6 +485,7 @@ struct MetadataTier { FuncExportVector funcExports; StackMaps stackMaps; TryNoteVector tryNotes; + CodeRangeUnwindInfoVector codeRangeUnwindInfos; // Debug information, not serialized. uint32_t debugTrapOffset; @@ -847,6 +848,7 @@ class Code : public ShareableBase { const TryNote* lookupTryNote(void* pc, Tier* tier) const; bool containsCodePC(const void* pc) const; bool lookupTrap(void* pc, Trap* trap, BytecodeOffset* bytecode) const; + const CodeRangeUnwindInfo* lookupUnwindInfo(void* pc) const; // To save memory, profilingLabels_ are generated lazily when profiling mode // is enabled. diff --git a/js/src/wasm/WasmCodegenTypes.cpp b/js/src/wasm/WasmCodegenTypes.cpp index efd2e1851606..5558899721f4 100644 --- a/js/src/wasm/WasmCodegenTypes.cpp +++ b/js/src/wasm/WasmCodegenTypes.cpp @@ -102,6 +102,7 @@ CodeRange::CodeRange(Kind kind, uint32_t funcIndex, Offsets offsets) u.func.lineOrBytecode_ = 0; u.func.beginToUncheckedCallEntry_ = 0; u.func.beginToTierEntry_ = 0; + u.func.hasUnwindInfo_ = false; MOZ_ASSERT(isEntry()); MOZ_ASSERT(begin_ <= end_); } @@ -131,10 +132,11 @@ CodeRange::CodeRange(Kind kind, uint32_t funcIndex, CallableOffsets offsets) u.func.lineOrBytecode_ = 0; u.func.beginToUncheckedCallEntry_ = 0; u.func.beginToTierEntry_ = 0; + u.func.hasUnwindInfo_ = false; } CodeRange::CodeRange(uint32_t funcIndex, uint32_t funcLineOrBytecode, - FuncOffsets offsets) + FuncOffsets offsets, bool hasUnwindInfo) : begin_(offsets.begin), ret_(offsets.ret), end_(offsets.end), @@ -147,6 +149,7 @@ CodeRange::CodeRange(uint32_t funcIndex, uint32_t funcLineOrBytecode, u.func.lineOrBytecode_ = funcLineOrBytecode; u.func.beginToUncheckedCallEntry_ = offsets.uncheckedCallEntry - begin_; u.func.beginToTierEntry_ = offsets.tierEntry - begin_; + u.func.hasUnwindInfo_ = hasUnwindInfo; } const CodeRange* wasm::LookupInSorted(const CodeRangeVector& codeRanges, diff --git a/js/src/wasm/WasmCodegenTypes.h b/js/src/wasm/WasmCodegenTypes.h index 2aeaf83b0485..fe012bfeb5e0 100644 --- a/js/src/wasm/WasmCodegenTypes.h +++ b/js/src/wasm/WasmCodegenTypes.h @@ -272,6 +272,7 @@ class CodeRange { uint32_t lineOrBytecode_; uint16_t beginToUncheckedCallEntry_; uint16_t beginToTierEntry_; + bool hasUnwindInfo_; } func; }; }; @@ -282,7 +283,8 @@ class CodeRange { WASM_CHECK_CACHEABLE_POD(begin_, ret_, end_, u.funcIndex_, u.func.lineOrBytecode_, u.func.beginToUncheckedCallEntry_, - u.func.beginToTierEntry_, u.trap_, kind_); + u.func.beginToTierEntry_, u.func.hasUnwindInfo_, + u.trap_, kind_); public: CodeRange() = default; @@ -290,7 +292,8 @@ class CodeRange { CodeRange(Kind kind, uint32_t funcIndex, Offsets offsets); CodeRange(Kind kind, CallableOffsets offsets); CodeRange(Kind kind, uint32_t funcIndex, CallableOffsets); - CodeRange(uint32_t funcIndex, uint32_t lineOrBytecode, FuncOffsets offsets); + CodeRange(uint32_t funcIndex, uint32_t lineOrBytecode, FuncOffsets offsets, + bool hasUnwindInfo); void offsetBy(uint32_t offset) { begin_ += offset; @@ -376,6 +379,10 @@ class CodeRange { MOZ_ASSERT(isFunction()); return u.func.lineOrBytecode_; } + bool funcHasUnwindInfo() const { + MOZ_ASSERT(isFunction()); + return u.func.hasUnwindInfo_; + } // A sorted array of CodeRanges can be looked up via BinarySearch and // OffsetInCode. @@ -626,6 +633,36 @@ struct TryNote { WASM_DECLARE_CACHEABLE_POD(TryNote); WASM_DECLARE_POD_VECTOR(TryNote, TryNoteVector) +class CodeRangeUnwindInfo { + public: + enum UnwindHow { + Normal, + RestoreFpRa, + RestoreFp, + UseFpLr, + UseFp, + }; + + private: + uint32_t offset_; + UnwindHow unwindHow_; + + WASM_CHECK_CACHEABLE_POD(offset_, unwindHow_); + + public: + CodeRangeUnwindInfo(uint32_t offset, UnwindHow unwindHow) + : offset_(offset), unwindHow_(unwindHow) {} + + uint32_t offset() const { return offset_; } + UnwindHow unwindHow() const { return unwindHow_; } + + // Adjust all code offsets in this info by a delta. + void offsetBy(uint32_t offset) { offset_ += offset; } +}; + +WASM_DECLARE_CACHEABLE_POD(CodeRangeUnwindInfo); +WASM_DECLARE_POD_VECTOR(CodeRangeUnwindInfo, CodeRangeUnwindInfoVector) + enum class CallIndirectIdKind { // Generate a no-op signature check prologue, asm.js function tables are // homogenous. diff --git a/js/src/wasm/WasmFrameIter.cpp b/js/src/wasm/WasmFrameIter.cpp index 24d13fc2516c..03c73215e570 100644 --- a/js/src/wasm/WasmFrameIter.cpp +++ b/js/src/wasm/WasmFrameIter.cpp @@ -1101,6 +1101,21 @@ static bool CanUnwindSignatureCheck(uint8_t* fp) { return code && !codeRange->isEntry(); } +static bool GetUnwindInfo(const CodeSegment* codeSegment, + const CodeRange* codeRange, uint8_t* pc, + const CodeRangeUnwindInfo** info) { + if (!codeSegment->isModule()) { + return false; + } + if (!codeRange->isFunction() || !codeRange->funcHasUnwindInfo()) { + return false; + } + + const ModuleSegment* segment = codeSegment->asModule(); + *info = segment->code().lookupUnwindInfo(pc); + return *info; +} + const Instance* js::wasm::GetNearestEffectiveInstance(const Frame* fp) { while (true) { uint8_t* returnAddress = fp->returnAddress(); @@ -1360,6 +1375,33 @@ bool js::wasm::StartUnwinding(const RegisterState& registers, break; } + const CodeRangeUnwindInfo* unwindInfo; + if (codeSegment && + GetUnwindInfo(codeSegment, codeRange, pc, &unwindInfo)) { + switch (unwindInfo->unwindHow()) { + case CodeRangeUnwindInfo::RestoreFpRa: + fixedPC = (uint8_t*)registers.tempRA; + fixedFP = (uint8_t*)registers.tempFP; + break; + case CodeRangeUnwindInfo::RestoreFp: + fixedPC = sp[0]; + fixedFP = (uint8_t*)registers.tempFP; + break; + case CodeRangeUnwindInfo::UseFpLr: + fixedPC = (uint8_t*)registers.lr; + fixedFP = fp; + break; + case CodeRangeUnwindInfo::UseFp: + fixedPC = sp[0]; + fixedFP = fp; + break; + default: + MOZ_CRASH(); + } + MOZ_ASSERT(fixedPC && fixedFP); + break; + } + // Not in the prologue/epilogue. fixedPC = pc; fixedFP = fp; diff --git a/js/src/wasm/WasmGenerator.cpp b/js/src/wasm/WasmGenerator.cpp index cfb549576ea1..ff6cc2ac57c0 100644 --- a/js/src/wasm/WasmGenerator.cpp +++ b/js/src/wasm/WasmGenerator.cpp @@ -56,6 +56,7 @@ bool CompiledCode::swap(MacroAssembler& masm) { trapSites.swap(masm.trapSites()); symbolicAccesses.swap(masm.symbolicAccesses()); tryNotes.swap(masm.tryNotes()); + codeRangeUnwindInfos.swap(masm.codeRangeUnwindInfos()); codeLabels.swap(masm.codeLabels()); return true; } @@ -696,6 +697,14 @@ bool ModuleGenerator::linkCompiledCode(CompiledCode& code) { } } + auto unwindInfoOp = [=](uint32_t, CodeRangeUnwindInfo* i) { + i->offsetBy(offsetInModule); + }; + if (!AppendForEach(&metadataTier_->codeRangeUnwindInfos, + code.codeRangeUnwindInfos, unwindInfoOp)) { + return false; + } + auto tryNoteFilter = [](const TryNote* tn) { // Filter out all try notes that were never given a try body. This may // happen due to dead code elimination. @@ -975,6 +984,12 @@ bool ModuleGenerator::finishMetadataTier() { } } + last = 0; + for (const CodeRangeUnwindInfo& info : metadataTier_->codeRangeUnwindInfos) { + MOZ_ASSERT(info.offset() >= last); + last = info.offset(); + } + // Try notes should be sorted so that the end of ranges are in rising order // so that the innermost catch handler is chosen. last = 0; diff --git a/js/src/wasm/WasmGenerator.h b/js/src/wasm/WasmGenerator.h index 032158b7a75e..656a86be41e5 100644 --- a/js/src/wasm/WasmGenerator.h +++ b/js/src/wasm/WasmGenerator.h @@ -73,6 +73,7 @@ struct CompiledCode { jit::CodeLabelVector codeLabels; StackMaps stackMaps; TryNoteVector tryNotes; + CodeRangeUnwindInfoVector codeRangeUnwindInfos; [[nodiscard]] bool swap(jit::MacroAssembler& masm); @@ -86,6 +87,7 @@ struct CompiledCode { codeLabels.clear(); stackMaps.clear(); tryNotes.clear(); + codeRangeUnwindInfos.clear(); MOZ_ASSERT(empty()); } @@ -93,7 +95,7 @@ struct CompiledCode { return bytes.empty() && codeRanges.empty() && callSites.empty() && callSiteTargets.empty() && trapSites.empty() && symbolicAccesses.empty() && codeLabels.empty() && tryNotes.empty() && - stackMaps.empty(); + stackMaps.empty() && codeRangeUnwindInfos.empty(); } size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const; diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp index 9887be759928..094e50d52558 100644 --- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -9121,6 +9121,8 @@ bool wasm::IonCompileFunctions(const ModuleEnvironment& moduleEnv, return false; } + size_t unwindInfoBefore = masm.codeRangeUnwindInfos().length(); + CodeGenerator codegen(&mir, lir, &masm); BytecodeOffset prologueTrapOffset(func.lineOrBytecode); @@ -9133,8 +9135,10 @@ bool wasm::IonCompileFunctions(const ModuleEnvironment& moduleEnv, return false; } + bool hasUnwindInfo = + unwindInfoBefore != masm.codeRangeUnwindInfos().length(); if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode, - offsets)) { + offsets, hasUnwindInfo)) { return false; } } diff --git a/js/src/wasm/WasmSerialize.cpp b/js/src/wasm/WasmSerialize.cpp index 9c136349f652..584539a7b5a1 100644 --- a/js/src/wasm/WasmSerialize.cpp +++ b/js/src/wasm/WasmSerialize.cpp @@ -1026,7 +1026,7 @@ template CoderResult CodeMetadataTier(Coder& coder, CoderArg item, const uint8_t* codeStart) { - WASM_VERIFY_SERIALIZATION_FOR_SIZE(wasm::MetadataTier, 856); + WASM_VERIFY_SERIALIZATION_FOR_SIZE(wasm::MetadataTier, 896); MOZ_TRY(Magic(coder, Marker::MetadataTier)); MOZ_TRY(CodePodVector(coder, &item->funcToCodeRange)); MOZ_TRY(CodePodVector(coder, &item->codeRanges)); diff --git a/js/src/wasm/WasmStubs.cpp b/js/src/wasm/WasmStubs.cpp index decf41d97d6f..a12a5f8dc0d7 100644 --- a/js/src/wasm/WasmStubs.cpp +++ b/js/src/wasm/WasmStubs.cpp @@ -1932,7 +1932,7 @@ bool wasm::GenerateImportFunctions(const ModuleEnvironment& env, return false; } if (!code->codeRanges.emplaceBack(funcIndex, /* bytecodeOffset = */ 0, - offsets)) { + offsets, /* hasUnwindInfo = */ false)) { return false; } } diff --git a/js/xpconnect/tests/unit/test_wasm_tailcalls_profiler.js b/js/xpconnect/tests/unit/test_wasm_tailcalls_profiler.js new file mode 100644 index 000000000000..64f29b9510e0 --- /dev/null +++ b/js/xpconnect/tests/unit/test_wasm_tailcalls_profiler.js @@ -0,0 +1,122 @@ +Services.prefs.setBoolPref("javascript.options.wasm_tail_calls", true); +registerCleanupFunction(() => { + Services.prefs.clearUserPref("javascript.options.wasm_tail_calls"); +}); + +// The tests runs code in tight loop with the profiler enabled. It is testing +// behavoir of MacroAssembler::wasmCollapseFrameXXXX methods. +// It is not guarantee 100% hit since the profiler probes stacks every 1ms, +// but it will happen often enough. +add_task(async () => { + await Services.profiler.StartProfiler(10, 1, ["js"], ["GeckoMain"]); + Assert.ok(Services.profiler.IsActive()); + +/* Wasm module that is tested: +(module + (func $f (param i64 i64 i64 i64 i64 i64 i64 i64 i64) + local.get 0 + i64.eqz + br_if 0 + local.get 0 + return_call $g + ) + (func $g (param i64) + local.get 0 + i64.const 1 + i64.sub + i64.const 2 + i64.const 6 + i64.const 3 + i64.const 4 + i64.const 1 + i64.const 2 + i64.const 6 + i64.const 3 + return_call $f + ) + (func (export "run") + i64.const 0x100000 + call $g + ) +) +*/ + + const b = new Uint8Array([ + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x14, 0x03, 0x60, + 0x09, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x00, 0x60, + 0x01, 0x7e, 0x00, 0x60, 0x00, 0x00, 0x03, 0x04, 0x03, 0x00, 0x01, 0x02, + 0x07, 0x07, 0x01, 0x03, 0x72, 0x75, 0x6e, 0x00, 0x02, 0x0a, 0x31, 0x03, + 0x0b, 0x00, 0x20, 0x00, 0x50, 0x0d, 0x00, 0x20, 0x00, 0x12, 0x01, 0x0b, + 0x19, 0x00, 0x20, 0x00, 0x42, 0x01, 0x7d, 0x42, 0x02, 0x42, 0x06, 0x42, + 0x03, 0x42, 0x04, 0x42, 0x01, 0x42, 0x02, 0x42, 0x06, 0x42, 0x03, 0x12, + 0x00, 0x0b, 0x09, 0x00, 0x42, 0x80, 0x80, 0xc0, 0x00, 0x10, 0x01, 0x0b + ]); + const ins = new WebAssembly.Instance(new WebAssembly.Module(b)); + for (var i = 0; i < 100; i++) { + ins.exports.run(); + } + + Assert.ok(true, "Done"); + await Services.profiler.StopProfiler(); +}); + +add_task(async () => { + await Services.profiler.StartProfiler(10, 1, ["js"], ["GeckoMain"]); + Assert.ok(Services.profiler.IsActive()); + +/* Wasm modules that are tested: +(module (func (export "t"))) + +(module + (import "" "t" (func $g)) + (table $t 1 1 funcref) + + (func $f (return_call_indirect $t (i32.const 0))) + (func (export "run") (param i64) + loop + local.get 0 + i64.eqz + br_if 1 + call $f + local.get 0 + i64.const 1 + i64.sub + local.set 0 + br 0 + end + ) + (elem (i32.const 0) $g) +) +*/ + const b0 = new Uint8Array([ + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x04, 0x01, 0x60, + 0x00, 0x00, 0x03, 0x02, 0x01, 0x00, 0x07, 0x05, 0x01, 0x01, 0x74, 0x00, + 0x00, 0x0a, 0x04, 0x01, 0x02, 0x00, 0x0b + ]); + const ins0 = new WebAssembly.Instance(new WebAssembly.Module(b0)); + const b = new Uint8Array([ + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, 0x60, + 0x00, 0x00, 0x60, 0x01, 0x7e, 0x00, 0x02, 0x06, 0x01, 0x00, 0x01, 0x74, + 0x00, 0x00, 0x03, 0x03, 0x02, 0x00, 0x01, 0x04, 0x05, 0x01, 0x70, 0x01, + 0x01, 0x01, 0x07, 0x07, 0x01, 0x03, 0x72, 0x75, 0x6e, 0x00, 0x02, 0x09, + 0x07, 0x01, 0x00, 0x41, 0x00, 0x0b, 0x01, 0x00, 0x0a, 0x1f, 0x02, 0x07, + 0x00, 0x41, 0x00, 0x13, 0x00, 0x00, 0x0b, 0x15, 0x00, 0x03, 0x40, 0x20, + 0x00, 0x50, 0x0d, 0x01, 0x10, 0x01, 0x20, 0x00, 0x42, 0x01, 0x7d, 0x21, + 0x00, 0x0c, 0x00, 0x0b, 0x0b + ]); + const ins = new WebAssembly.Instance(new WebAssembly.Module(b), {"": {t: ins0.exports.t,},}); + for (var i = 0; i < 100; i++) { + ins.exports.run(0x100000n); + } + + Assert.ok(true, "Done"); + await Services.profiler.StopProfiler(); +}); + +/** + * All the tests are implemented with add_task, this starts them automatically. + */ +function run_test() { + do_get_profile(); + run_next_test(); +} diff --git a/js/xpconnect/tests/unit/xpcshell.ini b/js/xpconnect/tests/unit/xpcshell.ini index 616fe6330b83..f1b0c4c38ac4 100644 --- a/js/xpconnect/tests/unit/xpcshell.ini +++ b/js/xpconnect/tests/unit/xpcshell.ini @@ -220,3 +220,6 @@ skip-if = [test_envChain_subscript.js] [test_envChain_subscript_in_JSM.js] [test_import_syntax_error.js] +[test_wasm_tailcalls_profiler.js] +skip-if = tsan + !nightly_build diff --git a/mozglue/baseprofiler/core/platform-linux-android.cpp b/mozglue/baseprofiler/core/platform-linux-android.cpp index 7921d9f4fd0f..db53de321c4f 100644 --- a/mozglue/baseprofiler/core/platform-linux-android.cpp +++ b/mozglue/baseprofiler/core/platform-linux-android.cpp @@ -91,32 +91,38 @@ static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) { aRegs.mPC = reinterpret_cast
(mcontext.gregs[REG_EIP]); aRegs.mSP = reinterpret_cast
(mcontext.gregs[REG_ESP]); aRegs.mFP = reinterpret_cast
(mcontext.gregs[REG_EBP]); - aRegs.mLR = 0; + aRegs.mEcx = reinterpret_cast
(mcontext.gregs[REG_ECX]); + aRegs.mEdx = reinterpret_cast
(mcontext.gregs[REG_EDX]); #elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) aRegs.mPC = reinterpret_cast
(mcontext.gregs[REG_RIP]); aRegs.mSP = reinterpret_cast
(mcontext.gregs[REG_RSP]); aRegs.mFP = reinterpret_cast
(mcontext.gregs[REG_RBP]); - aRegs.mLR = 0; + aRegs.mR10 = reinterpret_cast
(mcontext.gregs[REG_R10]); + aRegs.mR12 = reinterpret_cast
(mcontext.gregs[REG_R12]); #elif defined(GP_PLAT_amd64_freebsd) aRegs.mPC = reinterpret_cast
(mcontext.mc_rip); aRegs.mSP = reinterpret_cast
(mcontext.mc_rsp); aRegs.mFP = reinterpret_cast
(mcontext.mc_rbp); - aRegs.mLR = 0; + aRegs.mR10 = reinterpret_cast
(mcontext.mc_r10); + aRegs.mR12 = reinterpret_cast
(mcontext.mc_r12); #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) aRegs.mPC = reinterpret_cast
(mcontext.arm_pc); aRegs.mSP = reinterpret_cast
(mcontext.arm_sp); aRegs.mFP = reinterpret_cast
(mcontext.arm_fp); aRegs.mLR = reinterpret_cast
(mcontext.arm_lr); + aRegs.mR7 = reinterpret_cast
(mcontext.arm_r7); #elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) aRegs.mPC = reinterpret_cast
(mcontext.pc); aRegs.mSP = reinterpret_cast
(mcontext.sp); aRegs.mFP = reinterpret_cast
(mcontext.regs[29]); aRegs.mLR = reinterpret_cast
(mcontext.regs[30]); + aRegs.mR11 = reinterpret_cast
(mcontext.regs[11]); #elif defined(GP_PLAT_arm64_freebsd) aRegs.mPC = reinterpret_cast
(mcontext.mc_gpregs.gp_elr); aRegs.mSP = reinterpret_cast
(mcontext.mc_gpregs.gp_sp); aRegs.mFP = reinterpret_cast
(mcontext.mc_gpregs.gp_x[29]); aRegs.mLR = reinterpret_cast
(mcontext.mc_gpregs.gp_lr); + aRegs.mR11 = reinterpret_cast
(mcontext.mc_gpregs.gp_x[11]; #elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android) aRegs.mPC = reinterpret_cast
(mcontext.pc); aRegs.mSP = reinterpret_cast
(mcontext.gregs[29]); diff --git a/mozglue/baseprofiler/core/platform-macos.cpp b/mozglue/baseprofiler/core/platform-macos.cpp index 6e6e801f1f9b..87ce3eedeba5 100644 --- a/mozglue/baseprofiler/core/platform-macos.cpp +++ b/mozglue/baseprofiler/core/platform-macos.cpp @@ -131,12 +131,14 @@ void Sampler::SuspendAndSampleAndResumeThread( regs.mPC = reinterpret_cast
(state.REGISTER_FIELD(ip)); regs.mSP = reinterpret_cast
(state.REGISTER_FIELD(sp)); regs.mFP = reinterpret_cast
(state.REGISTER_FIELD(bp)); - regs.mLR = 0; + regs.mR10 = reinterpret_cast
(state.REGISTER_FIELD(10)); + regs.mR12 = reinterpret_cast
(state.REGISTER_FIELD(12)); #elif defined(__aarch64__) regs.mPC = reinterpret_cast
(state.REGISTER_FIELD(pc)); regs.mSP = reinterpret_cast
(state.REGISTER_FIELD(sp)); regs.mFP = reinterpret_cast
(state.REGISTER_FIELD(fp)); regs.mLR = reinterpret_cast
(state.REGISTER_FIELD(lr)); + regs.mR11 = reinterpret_cast
(state.REGISTER_FIELD(x[11])); #else # error "unknown architecture" #endif @@ -211,8 +213,7 @@ static void PlatformInit(PSLockRef aLock) {} regs.mFP = reinterpret_cast
(__builtin_frame_address(1)); \ _Pragma("GCC diagnostic pop") \ regs.mPC = reinterpret_cast
( \ - __builtin_extract_return_addr(__builtin_return_address(0))); \ - regs.mLR = 0; + __builtin_extract_return_addr(__builtin_return_address(0))); #endif // clang-format on diff --git a/mozglue/baseprofiler/core/platform-win32.cpp b/mozglue/baseprofiler/core/platform-win32.cpp index e3f1e54fe6b1..d2ddf1a5904b 100644 --- a/mozglue/baseprofiler/core/platform-win32.cpp +++ b/mozglue/baseprofiler/core/platform-win32.cpp @@ -60,17 +60,20 @@ static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) { aRegs.mPC = reinterpret_cast
(aContext->Rip); aRegs.mSP = reinterpret_cast
(aContext->Rsp); aRegs.mFP = reinterpret_cast
(aContext->Rbp); - aRegs.mLR = 0; + aRegs.mR10 = reinterpret_cast
(aContext->R10); + aRegs.mR12 = reinterpret_cast
(aContext->R12); #elif defined(GP_ARCH_x86) aRegs.mPC = reinterpret_cast
(aContext->Eip); aRegs.mSP = reinterpret_cast
(aContext->Esp); aRegs.mFP = reinterpret_cast
(aContext->Ebp); - aRegs.mLR = 0; + aRegs.mEcx = reinterpret_cast
(aContext->Ecx); + aRegs.mEdx = reinterpret_cast
(aContext->Edx); #elif defined(GP_ARCH_arm64) aRegs.mPC = reinterpret_cast
(aContext->Pc); aRegs.mSP = reinterpret_cast
(aContext->Sp); aRegs.mFP = reinterpret_cast
(aContext->Fp); aRegs.mLR = reinterpret_cast
(aContext->Lr); + aRegs.mR11 = reinterpret_cast
(aContext->X11); #else # error "bad arch" #endif @@ -160,7 +163,7 @@ void Sampler::SuspendAndSampleAndResumeThread( #if defined(GP_ARCH_amd64) context.ContextFlags = CONTEXT_FULL; #else - context.ContextFlags = CONTEXT_CONTROL; + context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; #endif if (!GetThreadContext(profiled_thread, &context)) { ResumeThread(profiled_thread); diff --git a/mozglue/baseprofiler/core/platform.cpp b/mozglue/baseprofiler/core/platform.cpp index 71d714f93875..035f45a07ecb 100644 --- a/mozglue/baseprofiler/core/platform.cpp +++ b/mozglue/baseprofiler/core/platform.cpp @@ -1196,11 +1196,49 @@ static const char* const kMainThreadName = "GeckoMain"; //////////////////////////////////////////////////////////////////////// // BEGIN sampling/unwinding code +// Additional registers that have to be saved when thread is paused. +#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) || \ + defined(GP_ARCH_x86) +# define UNWINDING_REGS_HAVE_ECX_EDX +#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ + defined(GP_PLAT_amd64_freebsd) || defined(GP_ARCH_amd64) || \ + defined(__x86_64__) +# define UNWINDING_REGS_HAVE_R10_R12 +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) +# define UNWINDING_REGS_HAVE_LR_R7 +#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ + defined(GP_PLAT_arm64_freebsd) || defined(GP_ARCH_arm64) || \ + defined(__aarch64__) +# define UNWINDING_REGS_HAVE_LR_R11 +#endif + // The registers used for stack unwinding and a few other sampling purposes. // The ctor does nothing; users are responsible for filling in the fields. class Registers { public: - Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {} + Registers() + : mPC{nullptr}, + mSP{nullptr}, + mFP{nullptr} +#if defined(UNWINDING_REGS_HAVE_ECX_EDX) + , + mEcx{nullptr}, + mEdx{nullptr} +#elif defined(UNWINDING_REGS_HAVE_R10_R12) + , + mR10{nullptr}, + mR12{nullptr} +#elif defined(UNWINDING_REGS_HAVE_LR_R7) + , + mLR{nullptr}, + mR7{nullptr} +#elif defined(UNWINDING_REGS_HAVE_LR_R11) + , + mLR{nullptr}, + mR11{nullptr} +#endif + { + } void Clear() { memset(this, 0, sizeof(*this)); } @@ -1210,7 +1248,20 @@ class Registers { Address mPC; // Instruction pointer. Address mSP; // Stack pointer. Address mFP; // Frame pointer. - Address mLR; // ARM link register. +#if defined(UNWINDING_REGS_HAVE_ECX_EDX) + Address mEcx; // Temp for return address. + Address mEdx; // Temp for frame pointer. +#elif defined(UNWINDING_REGS_HAVE_R10_R12) + Address mR10; // Temp for return address. + Address mR12; // Temp for frame pointer. +#elif defined(UNWINDING_REGS_HAVE_LR_R7) + Address mLR; // ARM link register, or temp for return address. + Address mR7; // Temp for frame pointer. +#elif defined(UNWINDING_REGS_HAVE_LR_R11) + Address mLR; // ARM link register, or temp for return address. + Address mR11; // Temp for frame pointer. +#endif + #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) // This contains all the registers, which means it duplicates the four fields // above. This is ok. @@ -1696,6 +1747,11 @@ static void DoPeriodicSample(PSLockRef aLock, aSamplePos, aBufferRangeStart, aBuffer); } +#undef UNWINDING_REGS_HAVE_ECX_EDX +#undef UNWINDING_REGS_HAVE_R10_R12 +#undef UNWINDING_REGS_HAVE_LR_R7 +#undef UNWINDING_REGS_HAVE_LR_R11 + // END sampling/unwinding code //////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/platform-linux-android.cpp b/tools/profiler/core/platform-linux-android.cpp index 6bcb9cf38ba2..1961aa94357d 100644 --- a/tools/profiler/core/platform-linux-android.cpp +++ b/tools/profiler/core/platform-linux-android.cpp @@ -83,32 +83,38 @@ static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) { aRegs.mPC = reinterpret_cast
(mcontext.gregs[REG_EIP]); aRegs.mSP = reinterpret_cast
(mcontext.gregs[REG_ESP]); aRegs.mFP = reinterpret_cast
(mcontext.gregs[REG_EBP]); - aRegs.mLR = 0; + aRegs.mEcx = reinterpret_cast
(mcontext.gregs[REG_ECX]); + aRegs.mEdx = reinterpret_cast
(mcontext.gregs[REG_EDX]); #elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) aRegs.mPC = reinterpret_cast
(mcontext.gregs[REG_RIP]); aRegs.mSP = reinterpret_cast
(mcontext.gregs[REG_RSP]); aRegs.mFP = reinterpret_cast
(mcontext.gregs[REG_RBP]); - aRegs.mLR = 0; + aRegs.mR10 = reinterpret_cast
(mcontext.gregs[REG_R10]); + aRegs.mR12 = reinterpret_cast
(mcontext.gregs[REG_R12]); #elif defined(GP_PLAT_amd64_freebsd) aRegs.mPC = reinterpret_cast
(mcontext.mc_rip); aRegs.mSP = reinterpret_cast
(mcontext.mc_rsp); aRegs.mFP = reinterpret_cast
(mcontext.mc_rbp); - aRegs.mLR = 0; + aRegs.mR10 = reinterpret_cast
(mcontext.mc_r10); + aRegs.mR12 = reinterpret_cast
(mcontext.mc_r12); #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) aRegs.mPC = reinterpret_cast
(mcontext.arm_pc); aRegs.mSP = reinterpret_cast
(mcontext.arm_sp); aRegs.mFP = reinterpret_cast
(mcontext.arm_fp); aRegs.mLR = reinterpret_cast
(mcontext.arm_lr); + aRegs.mR7 = reinterpret_cast
(mcontext.arm_r7); #elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) aRegs.mPC = reinterpret_cast
(mcontext.pc); aRegs.mSP = reinterpret_cast
(mcontext.sp); aRegs.mFP = reinterpret_cast
(mcontext.regs[29]); aRegs.mLR = reinterpret_cast
(mcontext.regs[30]); + aRegs.mR11 = reinterpret_cast
(mcontext.regs[11]); #elif defined(GP_PLAT_arm64_freebsd) aRegs.mPC = reinterpret_cast
(mcontext.mc_gpregs.gp_elr); aRegs.mSP = reinterpret_cast
(mcontext.mc_gpregs.gp_sp); aRegs.mFP = reinterpret_cast
(mcontext.mc_gpregs.gp_x[29]); aRegs.mLR = reinterpret_cast
(mcontext.mc_gpregs.gp_lr); + aRegs.mR11 = reinterpret_cast
(mcontext.mc_gpregs.gp_x[11]; #elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android) aRegs.mPC = reinterpret_cast
(mcontext.pc); aRegs.mSP = reinterpret_cast
(mcontext.gregs[29]); diff --git a/tools/profiler/core/platform-macos.cpp b/tools/profiler/core/platform-macos.cpp index b69a346d64c4..356d9f803e72 100644 --- a/tools/profiler/core/platform-macos.cpp +++ b/tools/profiler/core/platform-macos.cpp @@ -205,12 +205,14 @@ void Sampler::SuspendAndSampleAndResumeThread( regs.mPC = reinterpret_cast
(state.REGISTER_FIELD(ip)); regs.mSP = reinterpret_cast
(state.REGISTER_FIELD(sp)); regs.mFP = reinterpret_cast
(state.REGISTER_FIELD(bp)); - regs.mLR = 0; + regs.mR10 = reinterpret_cast
(state.REGISTER_FIELD(10)); + regs.mR12 = reinterpret_cast
(state.REGISTER_FIELD(12)); #elif defined(__aarch64__) regs.mPC = reinterpret_cast
(state.REGISTER_FIELD(pc)); regs.mSP = reinterpret_cast
(state.REGISTER_FIELD(sp)); regs.mFP = reinterpret_cast
(state.REGISTER_FIELD(fp)); regs.mLR = reinterpret_cast
(state.REGISTER_FIELD(lr)); + regs.mR11 = reinterpret_cast
(state.REGISTER_FIELD(x[11])); #else # error "unknown architecture" #endif @@ -291,7 +293,6 @@ static void PlatformInit(PSLockRef aLock) {} regs.mFP = reinterpret_cast
(__builtin_frame_address(1)); \ _Pragma("GCC diagnostic pop") \ regs.mPC = reinterpret_cast
( \ - __builtin_extract_return_addr(__builtin_return_address(0))); \ - regs.mLR = 0; + __builtin_extract_return_addr(__builtin_return_address(0))); #endif // clang-format on diff --git a/tools/profiler/core/platform-win32.cpp b/tools/profiler/core/platform-win32.cpp index f57aad4b4f5b..0e5c1c9dbb77 100644 --- a/tools/profiler/core/platform-win32.cpp +++ b/tools/profiler/core/platform-win32.cpp @@ -41,17 +41,20 @@ static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) { aRegs.mPC = reinterpret_cast
(aContext->Rip); aRegs.mSP = reinterpret_cast
(aContext->Rsp); aRegs.mFP = reinterpret_cast
(aContext->Rbp); - aRegs.mLR = 0; + aRegs.mR10 = reinterpret_cast
(aContext->R10); + aRegs.mR12 = reinterpret_cast
(aContext->R12); #elif defined(GP_ARCH_x86) aRegs.mPC = reinterpret_cast
(aContext->Eip); aRegs.mSP = reinterpret_cast
(aContext->Esp); aRegs.mFP = reinterpret_cast
(aContext->Ebp); - aRegs.mLR = 0; + aRegs.mEcx = reinterpret_cast
(aContext->Ecx); + aRegs.mEdx = reinterpret_cast
(aContext->Edx); #elif defined(GP_ARCH_arm64) aRegs.mPC = reinterpret_cast
(aContext->Pc); aRegs.mSP = reinterpret_cast
(aContext->Sp); aRegs.mFP = reinterpret_cast
(aContext->Fp); aRegs.mLR = reinterpret_cast
(aContext->Lr); + aRegs.mR11 = reinterpret_cast
(aContext->X11); #else # error "bad arch" #endif @@ -262,7 +265,7 @@ void Sampler::SuspendAndSampleAndResumeThread( #if defined(GP_ARCH_amd64) context.ContextFlags = CONTEXT_FULL; #else - context.ContextFlags = CONTEXT_CONTROL; + context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; #endif if (!GetThreadContext(profiled_thread, &context)) { ResumeThread(profiled_thread); diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp index b37c9f9ea69f..45bac4838a8a 100644 --- a/tools/profiler/core/platform.cpp +++ b/tools/profiler/core/platform.cpp @@ -1603,11 +1603,49 @@ static const char* const kMainThreadName = "GeckoMain"; //////////////////////////////////////////////////////////////////////// // BEGIN sampling/unwinding code +// Additional registers that have to be saved when thread is paused. +#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) || \ + defined(GP_ARCH_x86) +# define UNWINDING_REGS_HAVE_ECX_EDX +#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ + defined(GP_PLAT_amd64_freebsd) || defined(GP_ARCH_amd64) || \ + defined(__x86_64__) +# define UNWINDING_REGS_HAVE_R10_R12 +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) +# define UNWINDING_REGS_HAVE_LR_R7 +#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ + defined(GP_PLAT_arm64_freebsd) || defined(GP_ARCH_arm64) || \ + defined(__aarch64__) +# define UNWINDING_REGS_HAVE_LR_R11 +#endif + // The registers used for stack unwinding and a few other sampling purposes. // The ctor does nothing; users are responsible for filling in the fields. class Registers { public: - Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {} + Registers() + : mPC{nullptr}, + mSP{nullptr}, + mFP{nullptr} +#if defined(UNWINDING_REGS_HAVE_ECX_EDX) + , + mEcx{nullptr}, + mEdx{nullptr} +#elif defined(UNWINDING_REGS_HAVE_R10_R12) + , + mR10{nullptr}, + mR12{nullptr} +#elif defined(UNWINDING_REGS_HAVE_LR_R7) + , + mLR{nullptr}, + mR7{nullptr} +#elif defined(UNWINDING_REGS_HAVE_LR_R11) + , + mLR{nullptr}, + mR11{nullptr} +#endif + { + } void Clear() { memset(this, 0, sizeof(*this)); } @@ -1617,7 +1655,20 @@ class Registers { Address mPC; // Instruction pointer. Address mSP; // Stack pointer. Address mFP; // Frame pointer. - Address mLR; // ARM link register. +#if defined(UNWINDING_REGS_HAVE_ECX_EDX) + Address mEcx; // Temp for return address. + Address mEdx; // Temp for frame pointer. +#elif defined(UNWINDING_REGS_HAVE_R10_R12) + Address mR10; // Temp for return address. + Address mR12; // Temp for frame pointer. +#elif defined(UNWINDING_REGS_HAVE_LR_R7) + Address mLR; // ARM link register, or temp for return address. + Address mR7; // Temp for frame pointer. +#elif defined(UNWINDING_REGS_HAVE_LR_R11) + Address mLR; // ARM link register, or temp for return address. + Address mR11; // Temp for frame pointer. +#endif + #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) // This contains all the registers, which means it duplicates the four fields // above. This is ok. @@ -1759,8 +1810,20 @@ static uint32_t ExtractJsFrames( JS::ProfilingFrameIterator::RegisterState registerState; registerState.pc = aRegs.mPC; registerState.sp = aRegs.mSP; - registerState.lr = aRegs.mLR; registerState.fp = aRegs.mFP; +#if defined(UNWINDING_REGS_HAVE_ECX_EDX) + registerState.tempRA = aRegs.mEcx; + registerState.tempFP = aRegs.mEdx; +#elif defined(UNWINDING_REGS_HAVE_R10_R12) + registerState.tempRA = aRegs.mR10; + registerState.tempFP = aRegs.mR12; +#elif defined(UNWINDING_REGS_HAVE_LR_R7) + registerState.lr = aRegs.mLR; + registerState.tempFP = aRegs.mR7; +#elif defined(UNWINDING_REGS_HAVE_LR_R11) + registerState.lr = aRegs.mLR; + registerState.tempFP = aRegs.mR11; +#endif // Non-periodic sampling passes Nothing() as the buffer write position to // ProfilingFrameIterator to avoid incorrectly resetting the buffer @@ -2546,6 +2609,11 @@ static inline void DoPeriodicSample( aBuffer); } +#undef UNWINDING_REGS_HAVE_ECX_EDX +#undef UNWINDING_REGS_HAVE_R10_R12 +#undef UNWINDING_REGS_HAVE_LR_R7 +#undef UNWINDING_REGS_HAVE_LR_R11 + // END sampling/unwinding code ////////////////////////////////////////////////////////////////////////