mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-01 22:55:23 +00:00
393 lines
11 KiB
C++
393 lines
11 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "mozilla/HangMonitor.h"
|
|
#include "mozilla/BackgroundHangMonitor.h"
|
|
#include "mozilla/Monitor.h"
|
|
#include "mozilla/Preferences.h"
|
|
#include "mozilla/Telemetry.h"
|
|
#include "mozilla/ProcessedStack.h"
|
|
#include "mozilla/Atomics.h"
|
|
#include "nsXULAppAPI.h"
|
|
#include "nsThreadUtils.h"
|
|
#include "nsStackWalk.h"
|
|
|
|
#ifdef MOZ_CRASHREPORTER
|
|
#include "nsExceptionHandler.h"
|
|
#endif
|
|
|
|
#ifdef XP_WIN
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#if defined(MOZ_ENABLE_PROFILER_SPS) && defined(MOZ_PROFILING) && defined(XP_WIN)
|
|
#define REPORT_CHROME_HANGS
|
|
#endif
|
|
|
|
namespace mozilla { namespace HangMonitor {
|
|
|
|
/**
|
|
* A flag which may be set from within a debugger to disable the hang
|
|
* monitor.
|
|
*/
|
|
volatile bool gDebugDisableHangMonitor = false;
|
|
|
|
const char kHangMonitorPrefName[] = "hangmonitor.timeout";
|
|
|
|
#ifdef MOZ_TELEMETRY_ON_BY_DEFAULT
|
|
const char kTelemetryPrefName[] = "toolkit.telemetry.enabledPreRelease";
|
|
#else
|
|
const char kTelemetryPrefName[] = "toolkit.telemetry.enabled";
|
|
#endif
|
|
|
|
// Monitor protects gShutdown and gTimeout, but not gTimestamp which rely on
|
|
// being atomically set by the processor; synchronization doesn't really matter
|
|
// in this use case.
|
|
Monitor* gMonitor;
|
|
|
|
// The timeout preference, in seconds.
|
|
int32_t gTimeout;
|
|
|
|
PRThread* gThread;
|
|
|
|
// Set when shutdown begins to signal the thread to exit immediately.
|
|
bool gShutdown;
|
|
|
|
// The timestamp of the last event notification, or PR_INTERVAL_NO_WAIT if
|
|
// we're currently not processing events.
|
|
Atomic<PRIntervalTime> gTimestamp(PR_INTERVAL_NO_WAIT);
|
|
|
|
#ifdef REPORT_CHROME_HANGS
|
|
// Main thread ID used in reporting chrome hangs under Windows
|
|
static HANDLE winMainThreadHandle = nullptr;
|
|
|
|
// Default timeout for reporting chrome hangs to Telemetry (5 seconds)
|
|
static const int32_t DEFAULT_CHROME_HANG_INTERVAL = 5;
|
|
|
|
// Maximum number of PCs to gather from the stack
|
|
static const int32_t MAX_CALL_STACK_PCS = 400;
|
|
#endif
|
|
|
|
// PrefChangedFunc
|
|
void
|
|
PrefChanged(const char*, void*)
|
|
{
|
|
int32_t newval = Preferences::GetInt(kHangMonitorPrefName);
|
|
#ifdef REPORT_CHROME_HANGS
|
|
// Monitor chrome hangs on the profiling branch if Telemetry enabled
|
|
if (newval == 0) {
|
|
bool telemetryEnabled = Preferences::GetBool(kTelemetryPrefName);
|
|
if (telemetryEnabled) {
|
|
newval = DEFAULT_CHROME_HANG_INTERVAL;
|
|
}
|
|
}
|
|
#endif
|
|
MonitorAutoLock lock(*gMonitor);
|
|
if (newval != gTimeout) {
|
|
gTimeout = newval;
|
|
lock.Notify();
|
|
}
|
|
}
|
|
|
|
void
|
|
Crash()
|
|
{
|
|
if (gDebugDisableHangMonitor) {
|
|
return;
|
|
}
|
|
|
|
#ifdef XP_WIN
|
|
if (::IsDebuggerPresent()) {
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#ifdef MOZ_CRASHREPORTER
|
|
CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("Hang"),
|
|
NS_LITERAL_CSTRING("1"));
|
|
#endif
|
|
|
|
NS_RUNTIMEABORT("HangMonitor triggered");
|
|
}
|
|
|
|
#ifdef REPORT_CHROME_HANGS
|
|
static void
|
|
ChromeStackWalker(void *aPC, void *aSP, void *aClosure)
|
|
{
|
|
MOZ_ASSERT(aClosure);
|
|
std::vector<uintptr_t> *stack =
|
|
static_cast<std::vector<uintptr_t>*>(aClosure);
|
|
if (stack->size() == MAX_CALL_STACK_PCS)
|
|
return;
|
|
MOZ_ASSERT(stack->size() < MAX_CALL_STACK_PCS);
|
|
stack->push_back(reinterpret_cast<uintptr_t>(aPC));
|
|
}
|
|
|
|
static void
|
|
GetChromeHangReport(Telemetry::ProcessedStack &aStack,
|
|
int32_t &aSystemUptime,
|
|
int32_t &aFirefoxUptime)
|
|
{
|
|
MOZ_ASSERT(winMainThreadHandle);
|
|
|
|
// The thread we're about to suspend might have the alloc lock
|
|
// so allocate ahead of time
|
|
std::vector<uintptr_t> rawStack;
|
|
rawStack.reserve(MAX_CALL_STACK_PCS);
|
|
DWORD ret = ::SuspendThread(winMainThreadHandle);
|
|
if (ret == -1)
|
|
return;
|
|
NS_StackWalk(ChromeStackWalker, /* skipFrames */ 0, /* maxFrames */ 0,
|
|
reinterpret_cast<void*>(&rawStack),
|
|
reinterpret_cast<uintptr_t>(winMainThreadHandle), nullptr);
|
|
ret = ::ResumeThread(winMainThreadHandle);
|
|
if (ret == -1)
|
|
return;
|
|
aStack = Telemetry::GetStackAndModules(rawStack);
|
|
|
|
// Record system uptime (in minutes) at the time of the hang
|
|
aSystemUptime = ((GetTickCount() / 1000) - (gTimeout * 2)) / 60;
|
|
|
|
// Record Firefox uptime (in minutes) at the time of the hang
|
|
bool error;
|
|
TimeStamp processCreation = TimeStamp::ProcessCreation(error);
|
|
if (!error) {
|
|
TimeDuration td = TimeStamp::Now() - processCreation;
|
|
aFirefoxUptime = (static_cast<int32_t>(td.ToSeconds()) - (gTimeout * 2)) / 60;
|
|
} else {
|
|
aFirefoxUptime = -1;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void
|
|
ThreadMain(void*)
|
|
{
|
|
PR_SetCurrentThreadName("Hang Monitor");
|
|
|
|
MonitorAutoLock lock(*gMonitor);
|
|
|
|
// In order to avoid issues with the hang monitor incorrectly triggering
|
|
// during a general system stop such as sleeping, the monitor thread must
|
|
// run twice to trigger hang protection.
|
|
PRIntervalTime lastTimestamp = 0;
|
|
int waitCount = 0;
|
|
|
|
#ifdef REPORT_CHROME_HANGS
|
|
Telemetry::ProcessedStack stack;
|
|
int32_t systemUptime = -1;
|
|
int32_t firefoxUptime = -1;
|
|
#endif
|
|
|
|
while (true) {
|
|
if (gShutdown) {
|
|
return; // Exit the thread
|
|
}
|
|
|
|
// avoid rereading the volatile value in this loop
|
|
PRIntervalTime timestamp = gTimestamp;
|
|
|
|
PRIntervalTime now = PR_IntervalNow();
|
|
|
|
if (timestamp != PR_INTERVAL_NO_WAIT &&
|
|
now < timestamp) {
|
|
// 32-bit overflow, reset for another waiting period
|
|
timestamp = 1; // lowest legal PRInterval value
|
|
}
|
|
|
|
if (timestamp != PR_INTERVAL_NO_WAIT &&
|
|
timestamp == lastTimestamp &&
|
|
gTimeout > 0) {
|
|
++waitCount;
|
|
#ifdef REPORT_CHROME_HANGS
|
|
// Capture the chrome-hang stack + Firefox & system uptimes after
|
|
// the minimum hang duration has been reached (not when the hang ends)
|
|
if (waitCount == 2) {
|
|
GetChromeHangReport(stack, systemUptime, firefoxUptime);
|
|
}
|
|
#else
|
|
// This is the crash-on-hang feature.
|
|
// See bug 867313 for the quirk in the waitCount comparison
|
|
if (waitCount >= 2) {
|
|
int32_t delay =
|
|
int32_t(PR_IntervalToSeconds(now - timestamp));
|
|
if (delay >= gTimeout) {
|
|
MonitorAutoUnlock unlock(*gMonitor);
|
|
Crash();
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
else {
|
|
#ifdef REPORT_CHROME_HANGS
|
|
if (waitCount >= 2) {
|
|
uint32_t hangDuration = PR_IntervalToSeconds(now - lastTimestamp);
|
|
Telemetry::RecordChromeHang(hangDuration, stack,
|
|
systemUptime, firefoxUptime);
|
|
stack.Clear();
|
|
}
|
|
#endif
|
|
lastTimestamp = timestamp;
|
|
waitCount = 0;
|
|
}
|
|
|
|
PRIntervalTime timeout;
|
|
if (gTimeout <= 0) {
|
|
timeout = PR_INTERVAL_NO_TIMEOUT;
|
|
}
|
|
else {
|
|
timeout = PR_MillisecondsToInterval(gTimeout * 500);
|
|
}
|
|
lock.Wait(timeout);
|
|
}
|
|
}
|
|
|
|
void
|
|
Startup()
|
|
{
|
|
// The hang detector only runs in chrome processes. If you change this,
|
|
// you must also deal with the threadsafety of AnnotateCrashReport in
|
|
// non-chrome processes!
|
|
if (GeckoProcessType_Default != XRE_GetProcessType())
|
|
return;
|
|
|
|
MOZ_ASSERT(!gMonitor, "Hang monitor already initialized");
|
|
gMonitor = new Monitor("HangMonitor");
|
|
|
|
Preferences::RegisterCallback(PrefChanged, kHangMonitorPrefName, nullptr);
|
|
PrefChanged(nullptr, nullptr);
|
|
|
|
#ifdef REPORT_CHROME_HANGS
|
|
Preferences::RegisterCallback(PrefChanged, kTelemetryPrefName, nullptr);
|
|
winMainThreadHandle =
|
|
OpenThread(THREAD_ALL_ACCESS, FALSE, GetCurrentThreadId());
|
|
if (!winMainThreadHandle)
|
|
return;
|
|
#endif
|
|
|
|
// Don't actually start measuring hangs until we hit the main event loop.
|
|
// This potentially misses a small class of really early startup hangs,
|
|
// but avoids dealing with some xpcshell tests and other situations which
|
|
// start XPCOM but don't ever start the event loop.
|
|
Suspend();
|
|
|
|
gThread = PR_CreateThread(PR_USER_THREAD,
|
|
ThreadMain,
|
|
nullptr, PR_PRIORITY_LOW, PR_GLOBAL_THREAD,
|
|
PR_JOINABLE_THREAD, 0);
|
|
}
|
|
|
|
void
|
|
Shutdown()
|
|
{
|
|
if (GeckoProcessType_Default != XRE_GetProcessType())
|
|
return;
|
|
|
|
MOZ_ASSERT(gMonitor, "Hang monitor not started");
|
|
|
|
{ // Scope the lock we're going to delete later
|
|
MonitorAutoLock lock(*gMonitor);
|
|
gShutdown = true;
|
|
lock.Notify();
|
|
}
|
|
|
|
// thread creation could theoretically fail
|
|
if (gThread) {
|
|
PR_JoinThread(gThread);
|
|
gThread = nullptr;
|
|
}
|
|
|
|
delete gMonitor;
|
|
gMonitor = nullptr;
|
|
}
|
|
|
|
static bool
|
|
IsUIMessageWaiting()
|
|
{
|
|
#ifndef XP_WIN
|
|
return false;
|
|
#else
|
|
#define NS_WM_IMEFIRST WM_IME_SETCONTEXT
|
|
#define NS_WM_IMELAST WM_IME_KEYUP
|
|
BOOL haveUIMessageWaiting = FALSE;
|
|
MSG msg;
|
|
haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_KEYFIRST,
|
|
WM_IME_KEYLAST, PM_NOREMOVE);
|
|
haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, NS_WM_IMEFIRST,
|
|
NS_WM_IMELAST, PM_NOREMOVE);
|
|
haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_MOUSEFIRST,
|
|
WM_MOUSELAST, PM_NOREMOVE);
|
|
return haveUIMessageWaiting;
|
|
#endif
|
|
}
|
|
|
|
void
|
|
NotifyActivity(ActivityType activityType)
|
|
{
|
|
MOZ_ASSERT(NS_IsMainThread(),
|
|
"HangMonitor::Notify called from off the main thread.");
|
|
|
|
// Determine the activity type more specifically
|
|
if (activityType == kGeneralActivity) {
|
|
activityType = IsUIMessageWaiting() ? kActivityUIAVail :
|
|
kActivityNoUIAVail;
|
|
}
|
|
|
|
// Calculate the cumulative amount of lag time since the last UI message
|
|
static uint32_t cumulativeUILagMS = 0;
|
|
switch(activityType) {
|
|
case kActivityNoUIAVail:
|
|
cumulativeUILagMS = 0;
|
|
break;
|
|
case kActivityUIAVail:
|
|
case kUIActivity:
|
|
if (gTimestamp != PR_INTERVAL_NO_WAIT) {
|
|
cumulativeUILagMS += PR_IntervalToMilliseconds(PR_IntervalNow() -
|
|
gTimestamp);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// This is not a locked activity because PRTimeStamp is a 32-bit quantity
|
|
// which can be read/written atomically, and we don't want to pay locking
|
|
// penalties here.
|
|
gTimestamp = PR_IntervalNow();
|
|
|
|
// If we have UI activity we should reset the timer and report it if it is
|
|
// significant enough.
|
|
if (activityType == kUIActivity) {
|
|
// The minimum amount of lag time that we should report for telemetry data.
|
|
// Mozilla's UI responsiveness goal is 50ms
|
|
static const uint32_t kUIResponsivenessThresholdMS = 50;
|
|
if (cumulativeUILagMS > kUIResponsivenessThresholdMS) {
|
|
mozilla::Telemetry::Accumulate(mozilla::Telemetry::EVENTLOOP_UI_LAG_EXP_MS,
|
|
cumulativeUILagMS);
|
|
}
|
|
cumulativeUILagMS = 0;
|
|
}
|
|
|
|
if (gThread && !gShutdown) {
|
|
mozilla::BackgroundHangMonitor().NotifyActivity();
|
|
}
|
|
}
|
|
|
|
void
|
|
Suspend()
|
|
{
|
|
MOZ_ASSERT(NS_IsMainThread(),
|
|
"HangMonitor::Suspend called from off the main thread.");
|
|
|
|
// Because gTimestamp changes this resets the wait count.
|
|
gTimestamp = PR_INTERVAL_NO_WAIT;
|
|
|
|
if (gThread && !gShutdown) {
|
|
mozilla::BackgroundHangMonitor().NotifyWait();
|
|
}
|
|
}
|
|
|
|
} } // namespace mozilla::HangMonitor
|