From da6008021449f74d83995c3dd197961a9eae239f Mon Sep 17 00:00:00 2001 From: Nicholas Hurley Date: Fri, 17 Jan 2014 17:45:48 -0800 Subject: [PATCH] Bug 947745 - Clean up the seer database when it gets too big. r=honzab --- b2g/app/b2g.js | 4 + mobile/android/app/mobile.js | 4 + modules/libpref/src/init/all.js | 2 + netwerk/base/src/Seer.cpp | 371 +++++++++++++++++++ netwerk/base/src/Seer.h | 16 + toolkit/components/telemetry/Histograms.json | 24 ++ 6 files changed, 421 insertions(+) diff --git a/b2g/app/b2g.js b/b2g/app/b2g.js index 61f334039ea5..741744d67f84 100644 --- a/b2g/app/b2g.js +++ b/b2g/app/b2g.js @@ -68,6 +68,10 @@ pref("network.http.spdy.push-allowance", 32768); pref("network.buffer.cache.count", 24); pref("network.buffer.cache.size", 16384); +// predictive actions +pref("network.seer.max-db-size", 2097152); // bytes +pref("network.seer.preserve", 50); // percentage of seer data to keep when cleaning up + /* session history */ pref("browser.sessionhistory.max_total_viewers", 1); pref("browser.sessionhistory.max_entries", 50); diff --git a/mobile/android/app/mobile.js b/mobile/android/app/mobile.js index c04391171944..1e354b4450f5 100644 --- a/mobile/android/app/mobile.js +++ b/mobile/android/app/mobile.js @@ -102,6 +102,10 @@ pref("network.http.spdy.push-allowance", 32768); pref("network.buffer.cache.count", 24); pref("network.buffer.cache.size", 16384); +// predictive actions +pref("network.seer.max-db-size", 2097152); // bytes +pref("network.seer.preserve", 50); // percentage of seer data to keep when cleaning up + /* history max results display */ pref("browser.display.history.maxresults", 100); diff --git a/modules/libpref/src/init/all.js b/modules/libpref/src/init/all.js index b2f24fc73bf7..edadc5fd4fc9 100644 --- a/modules/libpref/src/init/all.js +++ b/modules/libpref/src/init/all.js @@ -1273,6 +1273,8 @@ pref("network.seer.preconnect-min-confidence", 90); pref("network.seer.preresolve-min-confidence", 60); pref("network.seer.redirect-likely-confidence", 75); pref("network.seer.max-queue-size", 50); +pref("network.seer.max-db-size", 157286400); // bytes +pref("network.seer.preserve", 80); // percentage of seer data to keep when cleaning up // The following prefs pertain to the negotiate-auth extension (see bug 17578), diff --git a/netwerk/base/src/Seer.cpp b/netwerk/base/src/Seer.cpp index 571522771893..68d7b6317eab 100644 --- a/netwerk/base/src/Seer.cpp +++ b/netwerk/base/src/Seer.cpp @@ -100,6 +100,11 @@ const int REDIRECT_LIKELY_DEFAULT = 75; const char SEER_MAX_QUEUE_SIZE_PREF[] = "network.seer.max-queue-size"; const uint32_t SEER_MAX_QUEUE_SIZE_DEFAULT = 50; +const char SEER_MAX_DB_SIZE_PREF[] = "network.seer.max-db-size"; +const int32_t SEER_MAX_DB_SIZE_DEFAULT_BYTES = 150 * 1024 * 1024; +const char SEER_PRESERVE_PERCENTAGE_PREF[] = "network.seer.preserve"; +const int32_t SEER_PRESERVE_PERCENTAGE_DEFAULT = 80; + // All these time values are in usec const long long ONE_DAY = 86400LL * 1000000LL; const long long ONE_WEEK = 7LL * ONE_DAY; @@ -192,6 +197,10 @@ Seer::Seer() ,mStartupCount(0) ,mQueueSize(0) ,mQueueSizeLock("Seer.mQueueSizeLock") + ,mCleanupScheduled(false) + ,mMaxDBSize(SEER_MAX_DB_SIZE_DEFAULT_BYTES) + ,mPreservePercentage(SEER_PRESERVE_PERCENTAGE_DEFAULT) + ,mLastCleanupTime(0) { #if defined(PR_LOGGING) gSeerLog = PR_NewLogModule("NetworkSeer"); @@ -276,6 +285,12 @@ Seer::InstallObserver() Preferences::AddIntVarCache(&mMaxQueueSize, SEER_MAX_QUEUE_SIZE_PREF, SEER_MAX_QUEUE_SIZE_DEFAULT); + Preferences::AddIntVarCache(&mMaxDBSize, SEER_MAX_DB_SIZE_PREF, + SEER_MAX_DB_SIZE_DEFAULT_BYTES); + Preferences::AddIntVarCache(&mPreservePercentage, + SEER_PRESERVE_PERCENTAGE_PREF, + SEER_PRESERVE_PERCENTAGE_DEFAULT); + return rv; } @@ -320,6 +335,7 @@ class SeerNewTransactionEvent : public nsRunnable { gSeer->CommitTransaction(); gSeer->BeginTransaction(); + gSeer->MaybeScheduleCleanup(); nsRefPtr event = new SeerCommitTimerInitEvent(); NS_DispatchToMainThread(event); return NS_OK; @@ -574,6 +590,11 @@ Seer::EnsureInitStorage() "ON moz_hosts (origin);")); NS_ENSURE_SUCCESS(rv, rv); + rv = mDB->ExecuteSimpleSQL( + NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS host_load_index " + "ON moz_hosts (last_load);")); + NS_ENSURE_SUCCESS(rv, rv); + // And this is the table that keeps track of the hosts for subresources of a // pageload. rv = mDB->ExecuteSimpleSQL( @@ -675,6 +696,11 @@ Seer::EnsureInitStorage() "ON moz_startup_pages (uri);")); NS_ENSURE_SUCCESS(rv, rv); + rv = mDB->ExecuteSimpleSQL( + NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS startup_page_hit_index " + "ON moz_startup_pages (last_hit);")); + NS_ENSURE_SUCCESS(rv, rv); + // This table is similar to moz_hosts above, but uses full URIs instead of // hosts so that we can get more specific predictions for URIs that people // visit often (such as their email or social network home pages). @@ -944,6 +970,8 @@ public: Telemetry::AccumulateTimeDelta(Telemetry::SEER_PREDICT_WORK_TIME, startTime); + gSeer->MaybeScheduleCleanup(); + return rv; } @@ -1725,6 +1753,8 @@ public: Telemetry::AccumulateTimeDelta(Telemetry::SEER_LEARN_WORK_TIME, startTime); + gSeer->MaybeScheduleCleanup(); + return rv; } private: @@ -2234,6 +2264,347 @@ Seer::Reset() return mIOThread->Dispatch(event, NS_DISPATCH_NORMAL); } +class SeerCleanupEvent : public nsRunnable +{ +public: + NS_IMETHOD Run() MOZ_OVERRIDE + { + gSeer->Cleanup(); + gSeer->mCleanupScheduled = false; + return NS_OK; + } +}; + +// Returns the current size (in bytes) of the db file on disk +int64_t +Seer::GetDBFileSize() +{ + MOZ_ASSERT(!NS_IsMainThread(), "GetDBFileSize called on main thread!"); + + CommitTransaction(); + + nsCOMPtr countStmt = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("PRAGMA page_count;")); + if (!countStmt) { + return 0; + } + mozStorageStatementScoper scopedCount(countStmt); + bool hasRows; + nsresult rv = countStmt->ExecuteStep(&hasRows); + if (NS_FAILED(rv) || !hasRows) { + return 0; + } + int64_t pageCount; + rv = countStmt->GetInt64(0, &pageCount); + if (NS_FAILED(rv)) { + return 0; + } + + nsCOMPtr sizeStmt = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("PRAGMA page_size;")); + if (!sizeStmt) { + return 0; + } + mozStorageStatementScoper scopedSize(sizeStmt); + rv = sizeStmt->ExecuteStep(&hasRows); + if (NS_FAILED(rv) || !hasRows) { + return 0; + } + int64_t pageSize; + rv = sizeStmt->GetInt64(0, &pageSize); + if (NS_FAILED(rv)) { + return 0; + } + + BeginTransaction(); + + return pageCount * pageSize; +} + +// Returns the size (in bytes) that the db file will consume on disk AFTER we +// vacuum the db. +int64_t +Seer::GetDBFileSizeAfterVacuum() +{ + MOZ_ASSERT(!NS_IsMainThread(), "GetDBFileSizeAfterVacuum called on main thread!"); + + CommitTransaction(); + + nsCOMPtr countStmt = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("PRAGMA page_count;")); + if (!countStmt) { + return 0; + } + mozStorageStatementScoper scopedCount(countStmt); + bool hasRows; + nsresult rv = countStmt->ExecuteStep(&hasRows); + if (NS_FAILED(rv) || !hasRows) { + return 0; + } + int64_t pageCount; + rv = countStmt->GetInt64(0, &pageCount); + if (NS_FAILED(rv)) { + return 0; + } + + nsCOMPtr sizeStmt = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("PRAGMA page_size;")); + if (!sizeStmt) { + return 0; + } + mozStorageStatementScoper scopedSize(sizeStmt); + rv = sizeStmt->ExecuteStep(&hasRows); + if (NS_FAILED(rv) || !hasRows) { + return 0; + } + int64_t pageSize; + rv = sizeStmt->GetInt64(0, &pageSize); + if (NS_FAILED(rv)) { + return 0; + } + + nsCOMPtr freeStmt = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("PRAGMA freelist_count;")); + if (!freeStmt) { + return 0; + } + mozStorageStatementScoper scopedFree(freeStmt); + rv = freeStmt->ExecuteStep(&hasRows); + if (NS_FAILED(rv) || !hasRows) { + return 0; + } + int64_t freelistCount; + rv = freeStmt->GetInt64(0, &freelistCount); + if (NS_FAILED(rv)) { + return 0; + } + + BeginTransaction(); + + return (pageCount - freelistCount) * pageSize; +} + +void +Seer::MaybeScheduleCleanup() +{ + MOZ_ASSERT(!NS_IsMainThread(), "MaybeScheduleCleanup called on main thread!"); + + if (mCleanupScheduled) { + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false); + return; + } + + int64_t dbFileSize = GetDBFileSize(); + if (dbFileSize < mMaxDBSize) { + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false); + return; + } + + mCleanupScheduled = true; + + nsRefPtr event = new SeerCleanupEvent(); + nsresult rv = mIOThread->Dispatch(event, NS_DISPATCH_NORMAL); + if (NS_FAILED(rv)) { + mCleanupScheduled = false; + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, false); + } else { + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SCHEDULED, true); + } +} + +#ifndef ANDROID +static const long long CLEANUP_CUTOFF = ONE_MONTH; +#else +static const long long CLEANUP_CUTOFF = ONE_WEEK; +#endif + +void +Seer::CleanupOrigins(PRTime now) +{ + PRTime cutoff = now - CLEANUP_CUTOFF; + + nsCOMPtr deleteOrigins = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("DELETE FROM moz_hosts WHERE last_load <= :cutoff")); + if (!deleteOrigins) { + return; + } + mozStorageStatementScoper scopedOrigins(deleteOrigins); + + nsresult rv = deleteOrigins->BindInt32ByName(NS_LITERAL_CSTRING("cutoff"), + cutoff); + RETURN_IF_FAILED(rv); + + deleteOrigins->Execute(); +} + +void +Seer::CleanupStartupPages(PRTime now) +{ + PRTime cutoff = now - ONE_WEEK; + + nsCOMPtr deletePages = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("DELETE FROM moz_startup_pages WHERE " + "last_hit <= :cutoff")); + if (!deletePages) { + return; + } + mozStorageStatementScoper scopedPages(deletePages); + + nsresult rv = deletePages->BindInt32ByName(NS_LITERAL_CSTRING("cutoff"), + cutoff); + RETURN_IF_FAILED(rv); + + deletePages->Execute(); +} + +int32_t +Seer::GetSubresourceCount() +{ + nsCOMPtr count = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("SELECT COUNT(id) FROM moz_subresources")); + if (!count) { + return 0; + } + mozStorageStatementScoper scopedCount(count); + + bool hasRows; + nsresult rv = count->ExecuteStep(&hasRows); + if (NS_FAILED(rv) || !hasRows) { + return 0; + } + + int32_t subresourceCount = 0; + count->GetInt32(0, &subresourceCount); + + return subresourceCount; +} + +void +Seer::Cleanup() +{ + MOZ_ASSERT(!NS_IsMainThread(), "Seer::Cleanup called on main thread!"); + + nsresult rv = EnsureInitStorage(); + if (NS_FAILED(rv)) { + return; + } + + int64_t dbFileSize = GetDBFileSize(); + float preservePercentage = static_cast(mPreservePercentage) / 100.0; + int64_t evictionCutoff = static_cast(mMaxDBSize) * preservePercentage; + if (dbFileSize < evictionCutoff) { + return; + } + + CommitTransaction(); + BeginTransaction(); + + PRTime now = PR_Now(); + if (mLastCleanupTime) { + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_DELTA, + (now - mLastCleanupTime) / 1000); + } + mLastCleanupTime = now; + + CleanupOrigins(now); + CleanupStartupPages(now); + + dbFileSize = GetDBFileSizeAfterVacuum(); + if (dbFileSize < evictionCutoff) { + // We've deleted enough stuff, time to free up the disk space and be on + // our way. + VacuumDatabase(); + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, true); + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_TIME, + (PR_Now() - mLastCleanupTime) / 1000); + return; + } + + bool canDelete = true; + while (canDelete && (dbFileSize >= evictionCutoff)) { + int32_t subresourceCount = GetSubresourceCount(); + if (!subresourceCount) { + canDelete = false; + break; + } + + // DB size scales pretty much linearly with the number of rows in + // moz_subresources, so we can guess how many rows we need to delete pretty + // accurately. + float percentNeeded = static_cast(dbFileSize - evictionCutoff) / + static_cast(dbFileSize); + + int32_t subresourcesToDelete = static_cast(percentNeeded * subresourceCount); + if (!subresourcesToDelete) { + // We're getting pretty close to nothing here, anyway, so we may as well + // just trash it all. This delete cascades to moz_subresources, as well. + rv = mDB->ExecuteSimpleSQL(NS_LITERAL_CSTRING("DELETE FROM moz_pages;")); + if (NS_FAILED(rv)) { + canDelete = false; + break; + } + } else { + nsCOMPtr deleteStatement = mStatements.GetCachedStatement( + NS_LITERAL_CSTRING("DELETE FROM moz_subresources WHERE id IN " + "(SELECT id FROM moz_subresources ORDER BY " + "last_hit ASC LIMIT :limit);")); + if (!deleteStatement) { + canDelete = false; + break; + } + mozStorageStatementScoper scopedDelete(deleteStatement); + + rv = deleteStatement->BindInt32ByName(NS_LITERAL_CSTRING("limit"), + subresourcesToDelete); + if (NS_FAILED(rv)) { + canDelete = false; + break; + } + + rv = deleteStatement->Execute(); + if (NS_FAILED(rv)) { + canDelete = false; + break; + } + + // Now we clean up pages that no longer reference any subresources + rv = mDB->ExecuteSimpleSQL( + NS_LITERAL_CSTRING("DELETE FROM moz_pages WHERE id NOT IN " + "(SELECT DISTINCT(pid) FROM moz_subresources);")); + if (NS_FAILED(rv)) { + canDelete = false; + break; + } + } + + if (canDelete) { + dbFileSize = GetDBFileSizeAfterVacuum(); + } + } + + if (!canDelete || (dbFileSize >= evictionCutoff)) { + // Last-ditch effort to free up space + ResetInternal(); + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, false); + } else { + // We do this to actually free up the space on disk + VacuumDatabase(); + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_SUCCEEDED, true); + } + Telemetry::Accumulate(Telemetry::SEER_CLEANUP_TIME, + (PR_Now() - mLastCleanupTime) / 1000); +} + +void +Seer::VacuumDatabase() +{ + MOZ_ASSERT(!NS_IsMainThread(), "VacuumDatabase called on main thread!"); + + CommitTransaction(); + mDB->ExecuteSimpleSQL(NS_LITERAL_CSTRING("VACUUM;")); + BeginTransaction(); +} + #ifdef SEER_TESTS class SeerPrepareForDnsTestEvent : public nsRunnable { diff --git a/netwerk/base/src/Seer.h b/netwerk/base/src/Seer.h index 179c3309c88f..be92c5b069c4 100644 --- a/netwerk/base/src/Seer.h +++ b/netwerk/base/src/Seer.h @@ -64,6 +64,7 @@ private: friend class SeerDBShutdownRunner; friend class SeerCommitTimerInitEvent; friend class SeerNewTransactionEvent; + friend class SeerCleanupEvent; void CheckForAndDeleteOldDBFile(); nsresult EnsureInitStorage(); @@ -165,6 +166,16 @@ private: mDB->CommitTransaction(); } + int64_t GetDBFileSize(); + int64_t GetDBFileSizeAfterVacuum(); + void MaybeScheduleCleanup(); + void Cleanup(); + void CleanupOrigins(PRTime now); + void CleanupStartupPages(PRTime now); + int32_t GetSubresourceCount(); + + void VacuumDatabase(); + // Observer-related stuff nsresult InstallObserver(); void RemoveObserver(); @@ -220,6 +231,11 @@ private: friend class SeerPrepareForDnsTestEvent; void PrepareForDnsTestInternal(int64_t timestamp, const nsACString &uri); #endif + + bool mCleanupScheduled; + int32_t mMaxDBSize; + int32_t mPreservePercentage; + PRTime mLastCleanupTime; }; } // ::mozilla::net diff --git a/toolkit/components/telemetry/Histograms.json b/toolkit/components/telemetry/Histograms.json index d3bb2f74a8ea..06b39c94ebe9 100644 --- a/toolkit/components/telemetry/Histograms.json +++ b/toolkit/components/telemetry/Histograms.json @@ -2231,6 +2231,30 @@ "n_buckets": 10, "description": "How long it takes from the time Predict() is called to the time we figure out there's nothing to do" }, + "SEER_CLEANUP_DELTA": { + "expires_in_version": "never", + "kind": "exponential", + "high": "60000", + "n_buckets": 50, + "description": "How long between seer db cleanups, in ms" + }, + "SEER_CLEANUP_SUCCEEDED": { + "expires_in_version": "never", + "kind": "boolean", + "description": "Whether or not the seer cleanup succeeded" + }, + "SEER_CLEANUP_TIME": { + "expires_in_version": "never", + "kind": "exponential", + "high": "5000", + "n_buckets": 10, + "description": "How long it takes to run the seer cleanup" + }, + "SEER_CLEANUP_SCHEDULED": { + "expires_in_version": "never", + "kind": "boolean", + "description": "Whether or not we actually try the cleanup method when we think about it" + }, "FIND_PLUGINS": { "expires_in_version": "never", "kind": "exponential",