From 10b4b19413c7de30424a4f62039145b294eefb04 Mon Sep 17 00:00:00 2001 From: Jim Chen Date: Fri, 22 Nov 2013 14:17:29 -0500 Subject: [PATCH] Bug 909974 - Let monitor thread detect hangs in addition to permahangs; r=froydnj --- xpcom/threads/BackgroundHangMonitor.cpp | 63 +++++++++++++++++++------ 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/xpcom/threads/BackgroundHangMonitor.cpp b/xpcom/threads/BackgroundHangMonitor.cpp index f2829ed6bea2..679b4b0fca4b 100644 --- a/xpcom/threads/BackgroundHangMonitor.cpp +++ b/xpcom/threads/BackgroundHangMonitor.cpp @@ -105,6 +105,10 @@ public: const PRIntervalTime mMaxTimeout; // Time at last activity PRIntervalTime mInterval; + // Time when a hang started + PRIntervalTime mHangStart; + // Is the thread in a hang + bool mHanging; // Is the thread in a waiting state bool mWaiting; @@ -113,7 +117,7 @@ public: uint32_t aMaxTimeoutMs); ~BackgroundHangThread(); - // Report a hang; aManager->mLock is NOT locked + // Report a hang; aManager->mLock IS locked void ReportHang(PRIntervalTime aHangTime) const; // Report a permanent hang; aManager->mLock IS locked void ReportPermaHang() const; @@ -168,7 +172,7 @@ BackgroundHangManager::RunMonitorThread() PRIntervalTime systemTime = PR_IntervalNow(); // Default values for the first iteration of thread loop PRIntervalTime waitTime = PR_INTERVAL_NO_WAIT; - PRIntervalTime permaHangTimeout = PR_INTERVAL_NO_WAIT; + PRIntervalTime recheckTimeout = PR_INTERVAL_NO_WAIT; while (!mShutdown) { @@ -187,24 +191,24 @@ BackgroundHangManager::RunMonitorThread() mIntervalNow += systemInterval; } - /* If it's before the next permahang timeout, and our wait did not + /* If it's before the next recheck timeout, and our wait did not get interrupted (either through Notify or PR_Interrupt), we can keep the current waitTime and skip iterating through hang monitors. */ - if (MOZ_LIKELY(systemInterval < permaHangTimeout && + if (MOZ_LIKELY(systemInterval < recheckTimeout && systemInterval >= waitTime && rv == NS_OK)) { - permaHangTimeout -= systemInterval; + recheckTimeout -= systemInterval; continue; } /* We are in one of the following scenarios, - - Permahang timeout + - Hang or permahang recheck timeout - Thread added/removed - - Thread wait ended + - Thread wait or hang ended In all cases, we want to go through our list of hang - monitors and update waitTime and permaHangTimeout. */ + monitors and update waitTime and recheckTimeout. */ waitTime = PR_INTERVAL_NO_TIMEOUT; - permaHangTimeout = PR_INTERVAL_NO_TIMEOUT; + recheckTimeout = PR_INTERVAL_NO_TIMEOUT; // Locally hold mIntervalNow PRIntervalTime intervalNow = mIntervalNow; @@ -217,18 +221,44 @@ BackgroundHangManager::RunMonitorThread() // Thread is waiting, not hanging continue; } - PRIntervalTime hangTime = intervalNow - currentThread->mInterval; + PRIntervalTime interval = currentThread->mInterval; + PRIntervalTime hangTime = intervalNow - interval; if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) { + // A permahang started // Skip subsequent iterations and tolerate a race on mWaiting here currentThread->mWaiting = true; currentThread->ReportPermaHang(); continue; } + + if (MOZ_LIKELY(!currentThread->mHanging)) { + if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) { + // A hang started + currentThread->mHangStart = interval; + currentThread->mHanging = true; + } + } else { + if (MOZ_LIKELY(interval != currentThread->mHangStart)) { + // A hang ended + currentThread->ReportHang(intervalNow - currentThread->mHangStart); + currentThread->mHanging = false; + } + } + + /* If we are hanging, the next time we check for hang status is when + the hang turns into a permahang. If we're not hanging, the next + recheck timeout is when we may be entering a hang. */ + PRIntervalTime nextRecheck; + if (currentThread->mHanging) { + nextRecheck = currentThread->mMaxTimeout; + } else { + nextRecheck = currentThread->mTimeout; + } + recheckTimeout = std::min(recheckTimeout, nextRecheck - hangTime); + /* We wait for a quarter of the shortest timeout value to give mIntervalNow enough granularity. */ waitTime = std::min(waitTime, currentThread->mTimeout / 4); - permaHangTimeout = std::min( - permaHangTimeout, currentThread->mMaxTimeout - hangTime); } } @@ -249,6 +279,8 @@ BackgroundHangThread::BackgroundHangThread(const char* aName, , mTimeout(PR_MillisecondsToInterval(aTimeoutMs)) , mMaxTimeout(PR_MillisecondsToInterval(aMaxTimeoutMs)) , mInterval(mManager->mIntervalNow) + , mHangStart(mInterval) + , mHanging(false) , mWaiting(true) { if (sTlsKey.initialized()) { @@ -280,8 +312,8 @@ BackgroundHangThread::~BackgroundHangThread() void BackgroundHangThread::ReportHang(PRIntervalTime aHangTime) const { - // Recovered from a hang; called on the hanged thread - // mManager->mLock is NOT locked + // Recovered from a hang; called on the monitor thread + // mManager->mLock IS locked // TODO: Add telemetry reporting for hangs } @@ -308,7 +340,8 @@ BackgroundHangThread::NotifyActivity() } else { PRIntervalTime duration = intervalNow - mInterval; if (MOZ_UNLIKELY(duration >= mTimeout)) { - ReportHang(duration); + /* Wake up the manager thread to tell it that a hang ended */ + mManager->Wakeup(); } mInterval = intervalNow; }