Bug 476298 - Switch RecalculateFrecencies to just fix invalid frecencies. r=dietrich

Bug 476300 - Calculate all page frecencies when upgrading/migrating
Bug 482351 - 1 sort operation required by mDBInvalidFrecencies
Create a new method FixInvalidFrecencies that finds invalid (negative) frecencies and recalculates them. Use it for handling creating/migrating DBs as well as recalculating invalid places on daily idle (place frecencies are already estimated by decay). This obviates a few preferences, queries and methods related to recalculating on idle. The test uses mork history with a number of pages that now all get their frecencies calculated on migrate, where before this fix, the test fails with a bunch of pages still with negative frecencies.
This commit is contained in:
Edward Lee 2009-04-10 10:24:10 -05:00
parent 8bf2ecbda6
commit da1fbb8ad9
5 changed files with 140 additions and 167 deletions

View File

@ -747,18 +747,6 @@ pref("accessibility.blockautorefresh", false);
// when calculating frecency
pref("places.frecency.numVisits", 10);
// Number of records to update frecency for when idle.
pref("places.frecency.numCalcOnIdle", 50);
// Number of records to update frecency for when migrating from
// a pre-frecency build.
pref("places.frecency.numCalcOnMigrate", 50);
// Perform frecency recalculation after this amount of idle, repeating.
// A value of zero disables updating of frecency on idle.
// Default is 1 minute (60000ms).
pref("places.frecency.updateIdleTime", 60000);
// buckets (in days) for frecency calculation
pref("places.frecency.firstBucketCutoff", 4);
pref("places.frecency.secondBucketCutoff", 14);

View File

@ -135,9 +135,6 @@
#define PREF_AUTOCOMPLETE_SEARCH_TIMEOUT "urlbar.search.timeout"
#define PREF_DB_CACHE_PERCENTAGE "history_cache_percentage"
#define PREF_FRECENCY_NUM_VISITS "places.frecency.numVisits"
#define PREF_FRECENCY_CALC_ON_IDLE "places.frecency.numCalcOnIdle"
#define PREF_FRECENCY_CALC_ON_MIGRATE "places.frecency.numCalcOnMigrate"
#define PREF_FRECENCY_UPDATE_IDLE_TIME "places.frecency.updateIdleTime"
#define PREF_FRECENCY_FIRST_BUCKET_CUTOFF "places.frecency.firstBucketCutoff"
#define PREF_FRECENCY_SECOND_BUCKET_CUTOFF "places.frecency.secondBucketCutoff"
#define PREF_FRECENCY_THIRD_BUCKET_CUTOFF "places.frecency.thirdBucketCutoff"
@ -1588,9 +1585,8 @@ nsNavHistory::MigrateV7Up(mozIStorageConnection* aDBConn)
getter_AddRefs(hasFrecencyStatement));
if (NS_FAILED(rv)) {
// add frecency column to moz_places, default to -1
// so that all the frecencies are invalid and we'll
// recalculate them on idle.
// Add frecency column to moz_places, default to -1 so that all the
// frecencies are invalid
rv = aDBConn->ExecuteSimpleSQL(NS_LITERAL_CSTRING(
"ALTER TABLE moz_places ADD frecency INTEGER DEFAULT -1 NOT NULL"));
NS_ENSURE_SUCCESS(rv, rv);
@ -1602,10 +1598,6 @@ nsNavHistory::MigrateV7Up(mozIStorageConnection* aDBConn)
"ON moz_places (frecency)"));
NS_ENSURE_SUCCESS(rv, rv);
// XXX todo
// forcibly call the "on idle" timer here to do a little work
// but the rest will happen on idle.
// for place: items and unvisited livemark items, we need to set
// the frecency to 0 so that they don't show up in url bar autocomplete
rv = FixInvalidFrecenciesForExcludedPlaces();
@ -2083,12 +2075,6 @@ nsNavHistory::LoadPrefs(PRBool aInitializing)
if (prefs) {
prefs->GetIntPref(PREF_FRECENCY_NUM_VISITS,
&mNumVisitsForFrecency);
prefs->GetIntPref(PREF_FRECENCY_CALC_ON_IDLE,
&mNumCalculateFrecencyOnIdle);
prefs->GetIntPref(PREF_FRECENCY_CALC_ON_MIGRATE,
&mNumCalculateFrecencyOnMigrate);
prefs->GetIntPref(PREF_FRECENCY_UPDATE_IDLE_TIME,
&mFrecencyUpdateIdleTime);
prefs->GetIntPref(PREF_FRECENCY_FIRST_BUCKET_CUTOFF,
&mFirstBucketCutoffInDays);
prefs->GetIntPref(PREF_FRECENCY_SECOND_BUCKET_CUTOFF,
@ -4538,10 +4524,6 @@ nsNavHistory::RemovePagesInternal(const nsCString& aPlaceIdsQueryString)
rv = FixInvalidFrecenciesForExcludedPlaces();
NS_ENSURE_SUCCESS(rv, rv);
// XXX todo
// forcibly call the "on idle" timer here to do a little work
// but the rest will happen on idle.
return transaction.Commit();
}
@ -5595,9 +5577,8 @@ nsNavHistory::Observe(nsISupports *aSubject, const char *aTopic,
mExpire.OnExpirationChanged();
}
else if (strcmp(aTopic, gIdleDaily) == 0) {
// Recalculate some frecency values (zero time means don't recalculate)
if (mFrecencyUpdateIdleTime)
(void)RecalculateFrecencies(mNumCalculateFrecencyOnIdle, PR_TRUE);
// Update frecency values
(void)FixInvalidFrecencies();
if (mDBConn) {
// Globally decay places frecency rankings to estimate reduced frecency
@ -5656,10 +5637,8 @@ nsNavHistory::Observe(nsISupports *aSubject, const char *aTopic,
(void)os->RemoveObserver(this, PLACES_INIT_COMPLETE_EVENT_TOPIC);
// This code is only called if we've either imported or done a migration
// from a pre-frecency build, so we will calculate the first cutoff period's
// frecencies now.
(void)RecalculateFrecencies(mNumCalculateFrecencyOnMigrate,
PR_FALSE /* don't recalculate old */);
// from a pre-frecency build, so we will calculate all their frecencies.
(void)FixInvalidFrecencies();
}
return NS_OK;
@ -7464,9 +7443,6 @@ nsNavHistory::CalculateFrecencyInternal(PRInt64 aPlaceId,
bonus += mUnvisitedTypedBonus;
// assume "now" as our ageInDays, so use the first bucket.
// note, when we recalculate "old frecencies" (see mDBOldFrecencies)
// this frecency value could be off by an order of
// (mFirstBucketWeight / mDefaultBucketWeight)
pointsForSampledVisits = mFirstBucketWeight * (bonus / (float)100.0);
// for a unvisited bookmark, produce a non-zero frecency
@ -7508,38 +7484,39 @@ nsNavHistory::CalculateFrecency(PRInt64 aPlaceId,
}
nsresult
nsNavHistory::RecalculateFrecencies(PRInt32 aCount, PRBool aRecalcOld)
nsNavHistory::FixInvalidFrecencies()
{
mozStorageTransaction transaction(mDBConn, PR_TRUE);
nsresult rv = RecalculateFrecenciesInternal(GetDBInvalidFrecencies(), aCount);
NS_ENSURE_SUCCESS(rv, rv);
if (aRecalcOld) {
rv = RecalculateFrecenciesInternal(GetDBOldFrecencies(), aCount);
NS_ENSURE_SUCCESS(rv, rv);
}
return NS_OK;
}
nsresult
nsNavHistory::RecalculateFrecenciesInternal(mozIStorageStatement *aStatement, PRInt32 aCount)
{
mozStorageStatementScoper scoper(aStatement);
nsresult rv = aStatement->BindInt32Parameter(0, aCount);
// Find all places with invalid frecencies (frecency < 0) that occur when:
// 1) we've done "clear private data"
// 2) we've expired or deleted visits
// 3) we've migrated from an older version, before global frecency
//
// From older versions, unmigrated bookmarks might be hidden, so we can't
// exclude hidden places (by doing "WHERE hidden <> 1") from our query, as we
// want to calculate the frecency for those places and unhide them (if they
// are not livemark items and not place: queries.)
//
// Note, we are not limiting ourselves to places with visits because we may
// not have any if the place is a bookmark and we expired or deleted all the
// visits.
nsCOMPtr<mozIStorageStatement> invalidFrecencies;
nsresult rv = mDBConn->CreateStatement(NS_LITERAL_CSTRING(
"SELECT id, typed, hidden, frecency, url "
"FROM moz_places_view "
"WHERE frecency < 0"),
getter_AddRefs(invalidFrecencies));
NS_ENSURE_SUCCESS(rv, rv);
PRBool hasMore = PR_FALSE;
while (NS_SUCCEEDED(aStatement->ExecuteStep(&hasMore)) && hasMore) {
PRInt64 placeId = aStatement->AsInt64(0);
// for frecency, we don't use visit_count, aStatement->AsInt32(1)
PRInt32 hidden = aStatement->AsInt32(2);
PRInt32 typed = aStatement->AsInt32(3);
PRInt32 oldFrecency = aStatement->AsInt32(4);
while (NS_SUCCEEDED(invalidFrecencies->ExecuteStep(&hasMore)) && hasMore) {
PRInt64 placeId = invalidFrecencies->AsInt64(0);
PRInt32 typed = invalidFrecencies->AsInt32(1);
PRInt32 hidden = invalidFrecencies->AsInt32(2);
PRInt32 oldFrecency = invalidFrecencies->AsInt32(3);
nsCAutoString url;
aStatement->GetUTF8String(5, url);
invalidFrecencies->GetUTF8String(4, url);
PRInt32 newFrecency = 0;
PRInt32 visitCountForFrecency = 0;
@ -7736,85 +7713,6 @@ nsNavHistory::GetDBBookmarkToUrlResult()
return mDBBookmarkToUrlResult;
}
mozIStorageStatement *
nsNavHistory::GetDBInvalidFrecencies()
{
if (mDBInvalidFrecencies)
return mDBInvalidFrecencies;
// find places with invalid frecencies (frecency < 0)
// invalid frecencies can happen in these scenarios:
// 1) we've done "clear private data"
// 2) we've expired or deleted visits
// 3) we've migrated from an older version, before global frecency
//
// from older versions, unmigrated bookmarks might be hidden,
// so we can't exclude hidden places (by doing "WHERE hidden <> 1")
// from our query, as we want to calculate the frecency for those
// places and unhide them (if they are not livemark items and not
// place: queries.)
//
// Note, we are not limiting ourselves to places with visits
// because we may not have any if the place is a bookmark and
// we expired or deleted all the visits.
// We get two sets of places that are 1) most visited and 2) random so that
// we don't get stuck recalculating frecencies that end up being -1 every
// time
// Since we don't need real random results and ORDER BY RANDOM() is slow
// we will jump at a random rowid in the table and we will get random results
// only from moz_places since temp will be synched there sometimes.
// Notice that frecency is invalidated as frecency = -visit_count
nsresult rv = mDBConn->CreateStatement(NS_LITERAL_CSTRING(
"/* do not warn (bug 482351) */ "
"SELECT * FROM ( "
"SELECT id, visit_count, hidden, typed, frecency, url "
"FROM ( "
"SELECT * FROM moz_places_temp "
"WHERE frecency < 0 "
"UNION ALL "
"SELECT * FROM ( "
"SELECT * FROM moz_places "
"WHERE +id NOT IN (SELECT id FROM moz_places_temp) "
"AND frecency < 0 "
"ORDER BY frecency ASC LIMIT ROUND(?1 / 2) "
") "
") ORDER BY frecency ASC LIMIT ROUND(?1 / 2)) "
"UNION "
"SELECT * FROM ( "
"SELECT id, visit_count, hidden, typed, frecency, url "
"FROM moz_places "
"WHERE frecency < 0 "
"AND ROWID >= ABS(RANDOM() % (SELECT MAX(ROWID) FROM moz_places)) "
"LIMIT ROUND(?1 / 2))"),
getter_AddRefs(mDBInvalidFrecencies));
NS_ENSURE_SUCCESS(rv, nsnull);
return mDBInvalidFrecencies;
}
mozIStorageStatement *
nsNavHistory::GetDBOldFrecencies()
{
if (mDBOldFrecencies)
return mDBOldFrecencies;
// This query finds random old places to update frecency because frequently
// visited places will have their frecencies updated when visited.
// We can limit the selection to moz_places since results in temp tables
// have been most likely visited recently.
// Since we don't need real random results and ORDER BY RANDOM() is slow
// we will jump at a random rowid in the table.
nsresult rv = mDBConn->CreateStatement(NS_LITERAL_CSTRING(
"SELECT id, visit_count, hidden, typed, frecency, url "
"FROM moz_places "
"WHERE ROWID >= ABS(RANDOM() % (SELECT MAX(ROWID) FROM moz_places)) "
"LIMIT ?1"),
getter_AddRefs(mDBOldFrecencies));
NS_ENSURE_SUCCESS(rv, nsnull);
return mDBOldFrecencies;
}
nsresult
nsNavHistory::FinalizeStatements() {
mozIStorageStatement* stmts[] = {
@ -7837,8 +7735,6 @@ nsNavHistory::FinalizeStatements() {
mDBUpdateFrecencyAndHidden,
mDBGetPlaceVisitStats,
mDBFullVisitCount,
mDBInvalidFrecencies,
mDBOldFrecencies,
mDBCurrentQuery,
mDBAutoCompleteQuery,
mDBAutoCompleteTypedQuery,

View File

@ -219,6 +219,14 @@ public:
nsresult UpdateFrecency(PRInt64 aPageID, PRBool isBookmark);
/**
* Calculate frecencies for places that don't have a valid value yet
*/
nsresult FixInvalidFrecencies();
/**
* Set the frecencies of excluded places so they don't show up in queries
*/
nsresult FixInvalidFrecenciesForExcludedPlaces();
/**
@ -453,28 +461,12 @@ protected:
// nsICharsetResolver
NS_DECL_NSICHARSETRESOLVER
/**
* Recalculates aCount frecencies. If aRecalcOld, it will also calculate
* the frecency of aCount history visits that have not occurred recently.
*
* @param aCount
* The number of entries to update.
* @param aRecalcOld
* Indicates that we should update old visits as well.
*/
nsresult RecalculateFrecencies(PRInt32 aCount, PRBool aRecalcOld);
nsresult RecalculateFrecenciesInternal(mozIStorageStatement *aStatement, PRInt32 aCount);
nsresult CalculateFrecency(PRInt64 aPageID, PRInt32 aTyped, PRInt32 aVisitCount, nsCAutoString &aURL, PRInt32 *aFrecency);
nsresult CalculateFrecencyInternal(PRInt64 aPageID, PRInt32 aTyped, PRInt32 aVisitCount, PRBool aIsBookmarked, PRInt32 *aFrecency);
nsCOMPtr<mozIStorageStatement> mDBVisitsForFrecency;
nsCOMPtr<mozIStorageStatement> mDBUpdateFrecencyAndHidden;
nsCOMPtr<mozIStorageStatement> mDBGetPlaceVisitStats;
nsCOMPtr<mozIStorageStatement> mDBFullVisitCount;
mozIStorageStatement *GetDBInvalidFrecencies();
nsCOMPtr<mozIStorageStatement> mDBInvalidFrecencies;
mozIStorageStatement *GetDBOldFrecencies();
nsCOMPtr<mozIStorageStatement> mDBOldFrecencies;
/**
* Initializes the database file. If the database does not exist, was
@ -789,9 +781,6 @@ protected:
// frecency prefs
PRInt32 mNumVisitsForFrecency;
PRInt32 mNumCalculateFrecencyOnIdle;
PRInt32 mNumCalculateFrecencyOnMigrate;
PRInt32 mFrecencyUpdateIdleTime;
PRInt32 mFirstBucketCutoffInDays;
PRInt32 mSecondBucketCutoffInDays;
PRInt32 mThirdBucketCutoffInDays;

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,99 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Places Test Code.
*
* The Initial Developer of the Original Code is Mozilla Corporation.
* Portions created by the Initial Developer are Copyright (C) 2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Edward Lee <edilee@mozilla.com> (Original Author)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/**
* Make sure all places get their frecency calculated on migrate for bug 476300.
*/
function _(msg) {
dump(".-* DEBUG *-. " + msg + "\n");
}
function run_test() {
_("Copy the history file with plenty of data to migrate");
let dirSvc = Cc["@mozilla.org/file/directory_service;1"].
getService(Ci.nsIProperties);
let deleteMork = function() {
let mork = dirSvc.get("ProfD", Ci.nsIFile);
mork.append("history.dat");
if (mork.exists())
mork.remove(false);
};
deleteMork();
let file = do_get_file("migrateFrecency.dat");
file.copyTo(dirSvc.get("ProfD", Ci.nsIFile), "history.dat");
_("Wait until places is done initializing to check migration");
let places = null;
const NS_PLACES_INIT_COMPLETE_TOPIC = "places-init-complete";
let os = Cc["@mozilla.org/observer-service;1"].
getService(Ci.nsIObserverService);
let observer = {
observe: function (subject, topic, data) {
switch (topic) {
case NS_PLACES_INIT_COMPLETE_TOPIC:
_("Clean up after ourselves: remove observer and mork file");
os.removeObserver(observer, NS_PLACES_INIT_COMPLETE_TOPIC);
deleteMork();
_("Now that places has migrated, check that it calculated frecencies");
places.DBConnection.createStatement(
"SELECT COUNT(*) FROM moz_places_view WHERE frecency < 0").
executeAsync({
handleResult: function(results) {
_("Should always get a result from COUNT(*)");
let row = results.getNextRow();
do_check_true(!!row);
_("We should have no negative frecencies after migrate");
do_check_eq(row.getResultByIndex(0), 0);
},
handleCompletion: do_test_finished,
handleError: do_throw,
});
break;
}
},
};
os.addObserver(observer, NS_PLACES_INIT_COMPLETE_TOPIC, false);
_("Start places to make it migrate");
places = Cc["@mozilla.org/browser/nav-history-service;1"].
getService(Ci.nsPIPlacesDatabase);
do_test_pending();
}