mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-03 02:25:34 +00:00
Bug 1442178
- Repair broken socket polling wakeup mechanism after a network change to prevent long load hangs, r=dragana
This commit is contained in:
parent
fbc0928677
commit
dab22ca3b9
@ -1770,6 +1770,7 @@ pref("network.http.focused_window_transaction_ratio", "0.9");
|
||||
// Whether or not we give more priority to active tab.
|
||||
// Note that this requires restart for changes to take effect.
|
||||
pref("network.http.active_tab_priority", true);
|
||||
// </http>
|
||||
|
||||
// default values for FTP
|
||||
// in a DSCP environment this should be 40 (0x28, or AF11), per RFC-4594,
|
||||
@ -1785,7 +1786,12 @@ pref("network.sts.max_time_for_events_between_two_polls", 100);
|
||||
// During shutdown we limit PR_Close calls. If time exceeds this pref (in ms)
|
||||
// let sockets just leak.
|
||||
pref("network.sts.max_time_for_pr_close_during_shutdown", 5000);
|
||||
// </http>
|
||||
|
||||
// When the polling socket pair we use to wake poll() up on demand doesn't
|
||||
// get signalled (is not readable) within this timeout, we try to repair it.
|
||||
// This timeout can be disabled by setting this pref to 0.
|
||||
// The value is expected in seconds.
|
||||
pref("network.sts.pollable_event_timeout", 6);
|
||||
|
||||
// 2147483647 == PR_INT32_MAX == ~2 GB
|
||||
pref("network.websocket.max-message-size", 2147483647);
|
||||
|
@ -140,6 +140,8 @@ PollableEvent::PollableEvent()
|
||||
: mWriteFD(nullptr)
|
||||
, mReadFD(nullptr)
|
||||
, mSignaled(false)
|
||||
, mWriteFailed(false)
|
||||
, mSignalTimestampAdjusted(false)
|
||||
{
|
||||
MOZ_COUNT_CTOR(PollableEvent);
|
||||
MOZ_ASSERT(OnSocketThread(), "not on socket thread");
|
||||
@ -218,6 +220,7 @@ PollableEvent::PollableEvent()
|
||||
// prime the system to deal with races invovled in [dc]tor cycle
|
||||
SOCKET_LOG(("PollableEvent() ctor ok\n"));
|
||||
mSignaled = true;
|
||||
MarkFirstSignalTimestamp();
|
||||
PR_Write(mWriteFD, "I", 1);
|
||||
}
|
||||
}
|
||||
@ -274,13 +277,20 @@ PollableEvent::Signal()
|
||||
}
|
||||
#endif
|
||||
|
||||
mSignaled = true;
|
||||
if (!mSignaled) {
|
||||
mSignaled = true;
|
||||
MarkFirstSignalTimestamp();
|
||||
}
|
||||
|
||||
int32_t status = PR_Write(mWriteFD, "M", 1);
|
||||
SOCKET_LOG(("PollableEvent::Signal PR_Write %d\n", status));
|
||||
if (status != 1) {
|
||||
NS_WARNING("PollableEvent::Signal Failed\n");
|
||||
SOCKET_LOG(("PollableEvent::Signal Failed\n"));
|
||||
mSignaled = false;
|
||||
mWriteFailed = true;
|
||||
} else {
|
||||
mWriteFailed = false;
|
||||
}
|
||||
return (status == 1);
|
||||
}
|
||||
@ -292,11 +302,21 @@ PollableEvent::Clear()
|
||||
MOZ_ASSERT(OnSocketThread(), "not on socket thread");
|
||||
|
||||
SOCKET_LOG(("PollableEvent::Clear\n"));
|
||||
|
||||
if (!mFirstSignalAfterClear.IsNull()) {
|
||||
SOCKET_LOG(("PollableEvent::Clear time to signal %ums",
|
||||
(uint32_t)(TimeStamp::NowLoRes() - mFirstSignalAfterClear).ToMilliseconds()));
|
||||
}
|
||||
|
||||
mFirstSignalAfterClear = TimeStamp();
|
||||
mSignalTimestampAdjusted = false;
|
||||
mSignaled = false;
|
||||
|
||||
if (!mReadFD) {
|
||||
SOCKET_LOG(("PollableEvent::Clear mReadFD is null\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
char buf[2048];
|
||||
int32_t status;
|
||||
#ifdef XP_WIN
|
||||
@ -304,7 +324,7 @@ PollableEvent::Clear()
|
||||
// do not have any deadlock read from the socket as much as we can.
|
||||
while (true) {
|
||||
status = PR_Read(mReadFD, buf, 2048);
|
||||
SOCKET_LOG(("PollableEvent::Signal PR_Read %d\n", status));
|
||||
SOCKET_LOG(("PollableEvent::Clear PR_Read %d\n", status));
|
||||
if (status == 0) {
|
||||
SOCKET_LOG(("PollableEvent::Clear EOF!\n"));
|
||||
return false;
|
||||
@ -321,7 +341,7 @@ PollableEvent::Clear()
|
||||
}
|
||||
#else
|
||||
status = PR_Read(mReadFD, buf, 2048);
|
||||
SOCKET_LOG(("PollableEvent::Signal PR_Read %d\n", status));
|
||||
SOCKET_LOG(("PollableEvent::Clear PR_Read %d\n", status));
|
||||
|
||||
if (status == 1) {
|
||||
return true;
|
||||
@ -345,5 +365,47 @@ PollableEvent::Clear()
|
||||
#endif //XP_WIN
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
PollableEvent::MarkFirstSignalTimestamp()
|
||||
{
|
||||
if (mFirstSignalAfterClear.IsNull()) {
|
||||
SOCKET_LOG(("PollableEvent::MarkFirstSignalTimestamp"));
|
||||
mFirstSignalAfterClear = TimeStamp::NowLoRes();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PollableEvent::AdjustFirstSignalTimestamp()
|
||||
{
|
||||
if (!mSignalTimestampAdjusted && !mFirstSignalAfterClear.IsNull()) {
|
||||
SOCKET_LOG(("PollableEvent::AdjustFirstSignalTimestamp"));
|
||||
mFirstSignalAfterClear = TimeStamp::NowLoRes();
|
||||
mSignalTimestampAdjusted = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
PollableEvent::IsSignallingAlive(TimeDuration const& timeout)
|
||||
{
|
||||
if (mWriteFailed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
// The timeout would be just a disturbance in a debug build.
|
||||
return true;
|
||||
#else
|
||||
if (!mSignaled || mFirstSignalAfterClear.IsNull() || timeout == TimeDuration()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
TimeDuration delay = (TimeStamp::NowLoRes() - mFirstSignalAfterClear);
|
||||
bool timedOut = delay > timeout;
|
||||
|
||||
return !timedOut;
|
||||
#endif // DEBUG
|
||||
}
|
||||
|
||||
} // namespace net
|
||||
} // namespace mozilla
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define PollableEvent_h__
|
||||
|
||||
#include "mozilla/Mutex.h"
|
||||
#include "mozilla/TimeStamp.h"
|
||||
|
||||
namespace mozilla {
|
||||
namespace net {
|
||||
@ -21,15 +22,42 @@ public:
|
||||
|
||||
// Signal/Clear return false only if they fail
|
||||
bool Signal();
|
||||
// This is called only when we get non-null out_flags for the socket pair
|
||||
bool Clear();
|
||||
bool Valid() { return mWriteFD && mReadFD; }
|
||||
|
||||
// We want to detect if writing to one of the socket pair sockets takes
|
||||
// too long to be received by the other socket from the pair.
|
||||
// Hence, we remember the timestamp of the earliest write by a call to
|
||||
// MarkFirstSignalTimestamp() from Signal(). After waking up from poll()
|
||||
// we check how long it took get the 'readable' signal on the socket pair.
|
||||
void MarkFirstSignalTimestamp();
|
||||
// Called right before we enter poll() to exclude any possible delay between
|
||||
// the earlist call to Signal() and entering poll() caused by processing
|
||||
// of events dispatched to the socket transport thread.
|
||||
void AdjustFirstSignalTimestamp();
|
||||
// This returns false on following conditions:
|
||||
// - PR_Write has failed
|
||||
// - no out_flags were signalled on the socket pair for too long after
|
||||
// the earliest Signal()
|
||||
bool IsSignallingAlive(TimeDuration const& timeout);
|
||||
|
||||
PRFileDesc *PollableFD() { return mReadFD; }
|
||||
|
||||
private:
|
||||
PRFileDesc *mWriteFD;
|
||||
PRFileDesc *mReadFD;
|
||||
bool mSignaled;
|
||||
// true when PR_Write to the socket pair has failed (status < 1)
|
||||
bool mWriteFailed;
|
||||
// Set true after AdjustFirstSignalTimestamp() was called
|
||||
// Set false after Clear() was called
|
||||
// Ensures shifting the timestamp before entering poll() only once
|
||||
// between Clear()'ings.
|
||||
bool mSignalTimestampAdjusted;
|
||||
// Timestamp of the first call to Signal() (or time we enter poll())
|
||||
// that happened after the last Clear() call
|
||||
TimeStamp mFirstSignalAfterClear;
|
||||
};
|
||||
|
||||
} // namespace net
|
||||
|
@ -53,6 +53,7 @@ static Atomic<PRThread*, Relaxed> gSocketThread;
|
||||
#define MAX_TIME_BETWEEN_TWO_POLLS "network.sts.max_time_for_events_between_two_polls"
|
||||
#define TELEMETRY_PREF "toolkit.telemetry.enabled"
|
||||
#define MAX_TIME_FOR_PR_CLOSE_DURING_SHUTDOWN "network.sts.max_time_for_pr_close_during_shutdown"
|
||||
#define POLLABLE_EVENT_TIMEOUT "network.sts.pollable_event_timeout"
|
||||
|
||||
#define REPAIR_POLLABLE_EVENT_TIME 10
|
||||
|
||||
@ -135,6 +136,7 @@ nsSocketTransportService::nsSocketTransportService()
|
||||
, mKeepaliveRetryIntervalS(1)
|
||||
, mKeepaliveProbeCount(kDefaultTCPKeepCount)
|
||||
, mKeepaliveEnabledPref(false)
|
||||
, mPollableEventTimeout(TimeDuration::FromSeconds(6))
|
||||
, mServingPendingQueue(false)
|
||||
, mMaxTimePerPollIter(100)
|
||||
, mTelemetryEnabledPref(false)
|
||||
@ -606,6 +608,7 @@ nsSocketTransportService::Init()
|
||||
tmpPrefService->AddObserver(MAX_TIME_BETWEEN_TWO_POLLS, this, false);
|
||||
tmpPrefService->AddObserver(TELEMETRY_PREF, this, false);
|
||||
tmpPrefService->AddObserver(MAX_TIME_FOR_PR_CLOSE_DURING_SHUTDOWN, this, false);
|
||||
tmpPrefService->AddObserver(POLLABLE_EVENT_TIMEOUT, this, false);
|
||||
}
|
||||
UpdatePrefs();
|
||||
|
||||
@ -1159,6 +1162,20 @@ nsSocketTransportService::DoPollIteration(TimeDuration *pollDuration)
|
||||
MoveToPollList(&mIdleList[i]);
|
||||
}
|
||||
|
||||
{
|
||||
MutexAutoLock lock(mLock);
|
||||
if (mPollableEvent) {
|
||||
// we want to make sure the timeout is measured from the time
|
||||
// we enter poll(). This method resets the timestamp to 'now',
|
||||
// if we were first signalled between leaving poll() and here.
|
||||
// If we didn't do this and processing events took longer than
|
||||
// the allowed signal timeout, we would detect it as a
|
||||
// false-positive. AdjustFirstSignalTimestamp is then a no-op
|
||||
// until mPollableEvent->Clear() is called.
|
||||
mPollableEvent->AdjustFirstSignalTimestamp();
|
||||
}
|
||||
}
|
||||
|
||||
SOCKET_LOG((" calling PR_Poll [active=%u idle=%u]\n", mActiveCount, mIdleCount));
|
||||
|
||||
#if defined(XP_WIN)
|
||||
@ -1230,29 +1247,26 @@ nsSocketTransportService::DoPollIteration(TimeDuration *pollDuration)
|
||||
DetachSocket(mActiveList, &mActiveList[i]);
|
||||
}
|
||||
|
||||
if (n != 0 && (mPollList[0].out_flags & (PR_POLL_READ | PR_POLL_EXCEPT))) {
|
||||
{
|
||||
MutexAutoLock lock(mLock);
|
||||
|
||||
// acknowledge pollable event (should not block)
|
||||
if (mPollableEvent &&
|
||||
((mPollList[0].out_flags & PR_POLL_EXCEPT) ||
|
||||
!mPollableEvent->Clear())) {
|
||||
if (n != 0 &&
|
||||
(mPollList[0].out_flags & (PR_POLL_READ | PR_POLL_EXCEPT)) &&
|
||||
mPollableEvent &&
|
||||
((mPollList[0].out_flags & PR_POLL_EXCEPT) || !mPollableEvent->Clear())) {
|
||||
// On Windows, the TCP loopback connection in the
|
||||
// pollable event may become broken when a laptop
|
||||
// switches between wired and wireless networks or
|
||||
// wakes up from hibernation. We try to create a
|
||||
// new pollable event. If that fails, we fall back
|
||||
// on "busy wait".
|
||||
NS_WARNING("Trying to repair mPollableEvent");
|
||||
mPollableEvent.reset(new PollableEvent());
|
||||
if (!mPollableEvent->Valid()) {
|
||||
mPollableEvent = nullptr;
|
||||
}
|
||||
SOCKET_LOG(("running socket transport thread without "
|
||||
"a pollable event now valid=%d", !!mPollableEvent));
|
||||
mPollList[0].fd = mPollableEvent ? mPollableEvent->PollableFD() : nullptr;
|
||||
mPollList[0].in_flags = PR_POLL_READ | PR_POLL_EXCEPT;
|
||||
mPollList[0].out_flags = 0;
|
||||
TryRepairPollableEvent();
|
||||
}
|
||||
|
||||
if (mPollableEvent &&
|
||||
!mPollableEvent->IsSignallingAlive(mPollableEventTimeout)) {
|
||||
SOCKET_LOG(("Pollable event signalling failed/timed out"));
|
||||
TryRepairPollableEvent();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1335,6 +1349,14 @@ nsSocketTransportService::UpdatePrefs()
|
||||
if (NS_SUCCEEDED(rv) && maxTimeForPrClosePref >=0) {
|
||||
mMaxTimeForPrClosePref = PR_MillisecondsToInterval(maxTimeForPrClosePref);
|
||||
}
|
||||
|
||||
int32_t pollableEventTimeout;
|
||||
rv = tmpPrefService->GetIntPref(POLLABLE_EVENT_TIMEOUT,
|
||||
&pollableEventTimeout);
|
||||
if (NS_SUCCEEDED(rv) && pollableEventTimeout >= 0) {
|
||||
MutexAutoLock lock(mLock);
|
||||
mPollableEventTimeout = TimeDuration::FromSeconds(pollableEventTimeout);
|
||||
}
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
@ -1695,7 +1717,24 @@ nsSocketTransportService::EndPolling()
|
||||
mPollRepairTimer->Cancel();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void nsSocketTransportService::TryRepairPollableEvent()
|
||||
{
|
||||
mLock.AssertCurrentThreadOwns();
|
||||
|
||||
NS_WARNING("Trying to repair mPollableEvent");
|
||||
mPollableEvent.reset(new PollableEvent());
|
||||
if (!mPollableEvent->Valid()) {
|
||||
mPollableEvent = nullptr;
|
||||
}
|
||||
SOCKET_LOG(("running socket transport thread without "
|
||||
"a pollable event now valid=%d", !!mPollableEvent));
|
||||
mPollList[0].fd = mPollableEvent ? mPollableEvent->PollableFD() : nullptr;
|
||||
mPollList[0].in_flags = PR_POLL_READ | PR_POLL_EXCEPT;
|
||||
mPollList[0].out_flags = 0;
|
||||
}
|
||||
|
||||
} // namespace net
|
||||
} // namespace mozilla
|
||||
|
@ -247,6 +247,8 @@ private:
|
||||
int32_t mKeepaliveProbeCount;
|
||||
// True if TCP keepalive is enabled globally.
|
||||
bool mKeepaliveEnabledPref;
|
||||
// Timeout of pollable event signalling.
|
||||
TimeDuration mPollableEventTimeout;
|
||||
|
||||
Atomic<bool> mServingPendingQueue;
|
||||
Atomic<int32_t, Relaxed> mMaxTimePerPollIter;
|
||||
@ -285,6 +287,8 @@ private:
|
||||
void StartPolling();
|
||||
void EndPolling();
|
||||
#endif
|
||||
|
||||
void TryRepairPollableEvent();
|
||||
};
|
||||
|
||||
extern nsSocketTransportService *gSocketTransportService;
|
||||
|
Loading…
Reference in New Issue
Block a user