Bug 1325336 - Generate statistics for cache performance on the given HW, r=valentin, r=honzab

CachePerfStats gathers performance data for single open, read and write operations as well as the whole cache entry opening. It maintains long term and short term average. The long term average filters out excessive values and it represents and average time for a given operation when the cache is not busy. The short term average represents the current cache speed. By comparing these two stats we know pretty quickly that the cache is getting slower and then we race the cache with network immediately without a delay. Otherwise the delay is based on the average cache entry open time.
This commit is contained in:
Michal Novotny 2017-06-22 15:44:00 +02:00
parent 66ad9aca23
commit 7f22d3a3e9
6 changed files with 311 additions and 14 deletions

View File

@ -594,15 +594,60 @@ protected:
bool mNotified;
};
class OpenFileEvent : public Runnable {
// Class responsible for reporting IO performance stats
class IOPerfReportEvent {
public:
IOPerfReportEvent(CacheFileUtils::CachePerfStats::EDataType aType)
: mType(aType)
, mEventCounter(0)
{
}
void Start(CacheIOThread *aIOThread)
{
mStartTime = TimeStamp::Now();
mEventCounter = aIOThread->EventCounter();
}
void Report(CacheIOThread *aIOThread)
{
if (mStartTime.IsNull()) {
return;
}
// Single IO operations can take less than 1ms. So we use microseconds to
// keep a good resolution of data.
uint32_t duration = (TimeStamp::Now() - mStartTime).ToMicroseconds();
// This is a simple prefiltering of values that might differ a lot from the
// average value. Do not add the value to the filtered stats when the event
// had to wait in a long queue.
uint32_t eventCounter = aIOThread->EventCounter();
bool shortOnly = eventCounter - mEventCounter < 5 ? false : true;
CacheFileUtils::CachePerfStats::AddValue(mType, duration, shortOnly);
}
protected:
CacheFileUtils::CachePerfStats::EDataType mType;
TimeStamp mStartTime;
uint32_t mEventCounter;
};
class OpenFileEvent : public Runnable
, public IOPerfReportEvent {
public:
OpenFileEvent(const nsACString &aKey, uint32_t aFlags,
CacheFileIOListener *aCallback)
: mFlags(aFlags)
: IOPerfReportEvent(CacheFileUtils::CachePerfStats::IO_OPEN)
, mFlags(aFlags)
, mCallback(aCallback)
, mKey(aKey)
{
mIOMan = CacheFileIOManager::gInstance;
if (!(mFlags & CacheFileIOManager::SPECIAL_FILE)) {
Start(mIOMan->mIOThread);
}
}
protected:
@ -630,6 +675,9 @@ public:
} else {
rv = mIOMan->OpenFileInternal(&mHash, mKey, mFlags,
getter_AddRefs(mHandle));
if (NS_SUCCEEDED(rv)) {
Report(mIOMan->mIOThread);
}
}
mIOMan = nullptr;
if (mHandle) {
@ -652,16 +700,21 @@ protected:
nsCString mKey;
};
class ReadEvent : public Runnable {
class ReadEvent : public Runnable
, public IOPerfReportEvent {
public:
ReadEvent(CacheFileHandle *aHandle, int64_t aOffset, char *aBuf,
int32_t aCount, CacheFileIOListener *aCallback)
: mHandle(aHandle)
: IOPerfReportEvent(CacheFileUtils::CachePerfStats::IO_READ)
, mHandle(aHandle)
, mOffset(aOffset)
, mBuf(aBuf)
, mCount(aCount)
, mCallback(aCallback)
{
if (!mHandle->IsSpecialFile()) {
Start(CacheFileIOManager::gInstance->mIOThread);
}
}
protected:
@ -679,6 +732,9 @@ public:
} else {
rv = CacheFileIOManager::gInstance->ReadInternal(
mHandle, mOffset, mBuf, mCount);
if (NS_SUCCEEDED(rv)) {
Report(CacheFileIOManager::gInstance->mIOThread);
}
}
mCallback->OnDataRead(mHandle, mBuf, rv);
@ -693,12 +749,14 @@ protected:
nsCOMPtr<CacheFileIOListener> mCallback;
};
class WriteEvent : public Runnable {
class WriteEvent : public Runnable
, public IOPerfReportEvent {
public:
WriteEvent(CacheFileHandle *aHandle, int64_t aOffset, const char *aBuf,
int32_t aCount, bool aValidate, bool aTruncate,
CacheFileIOListener *aCallback)
: mHandle(aHandle)
: IOPerfReportEvent(CacheFileUtils::CachePerfStats::IO_WRITE)
, mHandle(aHandle)
, mOffset(aOffset)
, mBuf(aBuf)
, mCount(aCount)
@ -706,6 +764,9 @@ public:
, mTruncate(aTruncate)
, mCallback(aCallback)
{
if (!mHandle->IsSpecialFile()) {
Start(CacheFileIOManager::gInstance->mIOThread);
}
}
protected:
@ -732,6 +793,9 @@ public:
} else {
rv = CacheFileIOManager::gInstance->WriteInternal(
mHandle, mOffset, mBuf, mCount, mValidate, mTruncate);
if (NS_SUCCEEDED(rv)) {
Report(CacheFileIOManager::gInstance->mIOThread);
}
if (NS_FAILED(rv) && !mCallback) {
// No listener is going to handle the error, doom the file
CacheFileIOManager::gInstance->DoomFileInternal(mHandle);

View File

@ -511,6 +511,147 @@ DetailedCacheHitTelemetry::AddRecord(ERecType aType, TimeStamp aLoadStart)
}
}
StaticMutex CachePerfStats::sLock;
CachePerfStats::PerfData CachePerfStats::sData[CachePerfStats::LAST];
CachePerfStats::MMA::MMA(uint32_t aTotalWeight, bool aFilter)
: mSum(0)
, mSumSq(0)
, mCnt(0)
, mWeight(aTotalWeight)
, mFilter(aFilter)
{
}
void
CachePerfStats::MMA::AddValue(uint32_t aValue)
{
if (mFilter) {
// Filter high spikes
uint32_t avg = GetAverage();
uint32_t stddev = GetStdDev();
if (avg && aValue > 3 * stddev + avg) {
return;
}
}
if (mCnt < mWeight) {
// Compute arithmetic average until we have at least mWeight values
CheckedInt<uint64_t> newSumSq = static_cast<uint64_t>(aValue) * aValue;
newSumSq += mSumSq;
if (!newSumSq.isValid()) {
return; // ignore this value
}
mSumSq = newSumSq.value();
mSum += aValue;
++mCnt;
} else {
CheckedInt<uint64_t> newSumSq = mSumSq - mSumSq / mCnt;
newSumSq += static_cast<uint64_t>(aValue) * aValue;
if (!newSumSq.isValid()) {
return; // ignore this value
}
mSumSq = newSumSq.value();
// Compute modified moving average for more values:
// newAvg = ((weight - 1) * oldAvg + newValue) / weight
mSum -= GetAverage();
mSum += aValue;
}
}
uint32_t
CachePerfStats::MMA::GetAverage()
{
if (mCnt == 0) {
return 0;
}
return mSum / mCnt;
}
uint32_t
CachePerfStats::MMA::GetStdDev()
{
if (mCnt == 0) {
return 0;
}
uint32_t avg = GetAverage();
uint64_t avgSq = static_cast<uint64_t>(avg) * avg;
uint64_t variance = mSumSq / mCnt;
MOZ_RELEASE_ASSERT(variance >= avgSq);
variance -= avgSq;
return sqrt(variance);
}
CachePerfStats::PerfData::PerfData()
: mFilteredAvg(50, true)
, mShortAvg(3, false)
{
}
void
CachePerfStats::PerfData::AddValue(uint32_t aValue, bool aShortOnly)
{
if (!aShortOnly) {
mFilteredAvg.AddValue(aValue);
}
mShortAvg.AddValue(aValue);
}
uint32_t
CachePerfStats::PerfData::GetAverage(bool aFiltered)
{
return aFiltered ? mFilteredAvg.GetAverage() : mShortAvg.GetAverage();
}
uint32_t
CachePerfStats::PerfData::GetStdDev(bool aFiltered)
{
return aFiltered ? mFilteredAvg.GetStdDev() : mShortAvg.GetStdDev();
}
// static
void
CachePerfStats::AddValue(EDataType aType, uint32_t aValue, bool aShortOnly)
{
StaticMutexAutoLock lock(sLock);
sData[aType].AddValue(aValue, aShortOnly);
}
// static
uint32_t
CachePerfStats::GetAverage(EDataType aType, bool aFiltered)
{
StaticMutexAutoLock lock(sLock);
return sData[aType].GetAverage(aFiltered);
}
//static
bool
CachePerfStats::IsCacheSlow()
{
// Compare mShortAvg with mFilteredAvg to find out whether cache is getting
// slower. Use only data about single IO operations because ENTRY_OPEN can be
// affected by more factors than a slow disk.
for (uint32_t i = 0; i < ENTRY_OPEN; ++i) {
uint32_t avgLong = sData[i].GetAverage(true);
if (avgLong == 0) {
// We have no perf data yet, skip this data type.
continue;
}
uint32_t avgShort = sData[i].GetAverage(false);
uint32_t stddevLong = sData[i].GetStdDev(true);
if (avgShort > 3 * stddevLong + avgLong) {
return true;
}
}
return false;
}
void
FreeBuffer(void *aBuf) {
#ifndef NS_FREE_PERMANENT_DATA

View File

@ -148,6 +148,71 @@ private:
static HitRate sHRStats[kNumOfRanges];
};
class CachePerfStats {
public:
enum EDataType {
IO_OPEN = 0,
IO_READ = 1,
IO_WRITE = 2,
ENTRY_OPEN = 3,
LAST = 4
};
static void AddValue(EDataType aType, uint32_t aValue, bool aShortOnly);
static uint32_t GetAverage(EDataType aType, bool aFiltered);
static bool IsCacheSlow();
private:
// This class computes average and standard deviation, it returns an
// arithmetic avg and stddev until total number of values reaches mWeight.
// Then it returns modified moving average computed as follows:
//
// avg = (1-a)*avg + a*value
// avgsq = (1-a)*avgsq + a*value^2
// stddev = sqrt(avgsq - avg^2)
//
// where
// avgsq is an average of the square of the values
// a = 1 / weight
class MMA {
public:
MMA(uint32_t aTotalWeight, bool aFilter);
void AddValue(uint32_t aValue);
uint32_t GetAverage();
uint32_t GetStdDev();
private:
uint64_t mSum;
uint64_t mSumSq;
uint32_t mCnt;
uint32_t mWeight;
bool mFilter;
};
class PerfData {
public:
PerfData();
void AddValue(uint32_t aValue, bool aShortOnly);
uint32_t GetAverage(bool aFiltered);
uint32_t GetStdDev(bool aFiltered);
private:
// Contains filtered data (i.e. times when we think the cache and disk was
// not busy) for a longer time.
MMA mFilteredAvg;
// Contains unfiltered average of few recent values.
MMA mShortAvg;
};
static StaticMutex sLock;
static PerfData sData[LAST];
};
void
FreeBuffer(void *aBuf);

View File

@ -234,6 +234,7 @@ CacheIOThread::CacheIOThread()
, mRerunCurrentEvent(false)
, mShutdown(false)
, mIOCancelableEvents(0)
, mEventCounter(0)
#ifdef DEBUG
, mInsideLoop(true)
#endif
@ -491,6 +492,7 @@ loopStart:
nsIThread *thread = mXPCOMThread;
rv = thread->ProcessNextEvent(false, &processedEvent);
++mEventCounter;
MOZ_ASSERT(mBlockingIOWatcher);
mBlockingIOWatcher->NotifyOperationDone();
} while (NS_SUCCEEDED(rv) && processedEvent);
@ -576,6 +578,7 @@ void CacheIOThread::LoopOneLevel(uint32_t aLevel)
break;
}
++mEventCounter;
--mQueueLength[aLevel];
// Release outside the lock.

View File

@ -69,6 +69,8 @@ public:
uint32_t QueueSize(bool highPriority);
uint32_t EventCounter() const { return mEventCounter; }
/**
* Callable only on this thread, checks if there is an event waiting in
* the event queue with a higher execution priority. If so, the result
@ -136,6 +138,8 @@ private:
// can be canceled when after shutdown, see the Shutdown() method
// for usage. Made a counter to allow nesting of the Cancelable class.
Atomic<uint32_t, Relaxed> mIOCancelableEvents;
// Event counter that increases with every event processed.
Atomic<uint32_t, Relaxed> mEventCounter;
#ifdef DEBUG
bool mInsideLoop;
#endif

View File

@ -112,6 +112,7 @@
#include "nsIFileStreams.h"
#include "nsIMIMEInputStream.h"
#include "nsIMultiplexInputStream.h"
#include "../../cache2/CacheFileUtils.h"
#ifdef MOZ_TASK_TRACER
#include "GeckoTaskTracer.h"
@ -4487,6 +4488,19 @@ nsHttpChannel::OnNormalCacheEntryAvailable(nsICacheEntry *aEntry,
mCacheEntry = aEntry;
mCacheEntryIsWriteOnly = aNew;
if (!aNew && !mAsyncOpenTime.IsNull()) {
// We use microseconds for IO operations. For consistency let's use
// microseconds here too.
uint32_t duration = (TimeStamp::Now() - mAsyncOpenTime).ToMicroseconds();
bool isSlow = false;
if ((mCacheOpenWithPriority && mCacheQueueSizeWhenOpen >= sRCWNQueueSizePriority) ||
(!mCacheOpenWithPriority && mCacheQueueSizeWhenOpen >= sRCWNQueueSizeNormal)) {
isSlow = true;
}
CacheFileUtils::CachePerfStats::AddValue(
CacheFileUtils::CachePerfStats::ENTRY_OPEN, duration, isSlow);
}
if (mLoadFlags & LOAD_INITIAL_DOCUMENT_URI) {
Telemetry::Accumulate(Telemetry::HTTP_OFFLINE_CACHE_DOCUMENT_LOAD,
false);
@ -9192,16 +9206,22 @@ nsHttpChannel::MaybeRaceCacheWithNetwork()
return NS_OK;
}
uint32_t threshold = mCacheOpenWithPriority ? sRCWNQueueSizePriority
: sRCWNQueueSizeNormal;
// No need to trigger to trigger the racing, since most likely the cache
// will be faster.
if (mCacheQueueSizeWhenOpen < threshold) {
return NS_OK;
uint32_t delay;
if (CacheFileUtils::CachePerfStats::IsCacheSlow()) {
// If the cache is slow, trigger the network request immediately.
delay = 0;
} else {
// Give cache a headstart of 3 times the average cache entry open time.
delay = CacheFileUtils::CachePerfStats::GetAverage(
CacheFileUtils::CachePerfStats::ENTRY_OPEN, true) * 3;
// We use microseconds in CachePerfStats but we need milliseconds
// for TriggerNetwork.
delay /= 1000;
}
MOZ_ASSERT(sRCWNEnabled, "The pref must be truned on.");
LOG(("nsHttpChannel::MaybeRaceCacheWithNetwork [this=%p]\n", this));
LOG(("nsHttpChannel::MaybeRaceCacheWithNetwork [this=%p, delay=%u]\n",
this, delay));
if (!mOnCacheAvailableCalled) {
// If the network was triggered before onCacheEntryAvailable was
@ -9209,7 +9229,7 @@ nsHttpChannel::MaybeRaceCacheWithNetwork()
mRaceCacheWithNetwork = true;
}
return TriggerNetwork(0);
return TriggerNetwork(delay);
}
NS_IMETHODIMP