Bug 1265525 - Part 2: Frecency calculation and top sites query updates r=sebastian

MozReview-Commit-ID: 7tqr4IT9635

--HG--
extra : rebase_source : 3f4be86948343e4d893b3d2c999897431c53460c
extra : source : 64ff3f5121d75d61e6f6a9798240f7a79332d006
This commit is contained in:
Grigory Kruglov 2016-05-02 14:10:23 -07:00
parent 65936d960e
commit 425e0888e2
4 changed files with 145 additions and 19 deletions

View File

@ -8,6 +8,8 @@ package org.mozilla.gecko.db;
import org.mozilla.gecko.AppConstants;
import android.net.Uri;
import android.support.annotation.NonNull;
import org.mozilla.gecko.annotation.RobocopTarget;
@RobocopTarget
@ -57,19 +59,75 @@ public class BrowserContract {
AGGRESSIVE
}
static public String getFrecencySortOrder(boolean includesBookmarks, boolean asc) {
final String age = "(" + Combined.DATE_LAST_VISITED + " - " + System.currentTimeMillis() + ") / 86400000";
StringBuilder order = new StringBuilder(Combined.VISITS + " * MAX(1, 100 * 225 / (" + age + "*" + age + " + 225)) ");
/**
* Produces a SQL expression used for sorting results of the "combined" view by frecency.
* Combines remote and local frecency calculations, weighting local visits much heavier.
*
* @param includesBookmarks When URL is bookmarked, should we give it bonus frecency points?
* @param ascending Indicates if sorting order ascending
* @return Combined frecency sorting expression
*/
static public String getCombinedFrecencySortOrder(boolean includesBookmarks, boolean ascending) {
final long now = System.currentTimeMillis();
StringBuilder order = new StringBuilder(getRemoteFrecencySQL(now) + " + " + getLocalFrecencySQL(now));
if (includesBookmarks) {
order.insert(0, "(CASE WHEN " + Combined.BOOKMARK_ID + " > -1 THEN 100 ELSE 0 END) + ");
}
order.append(asc ? " ASC" : " DESC");
order.append(ascending ? " ASC" : " DESC");
return order.toString();
}
/**
* See Bug 1265525 for details (explanation + graphs) on how Remote frecency compares to Local frecency for different
* combinations of visits count and age.
*
* @param now Base time in milliseconds for age calculation
* @return remote frecency SQL calculation
*/
static public String getRemoteFrecencySQL(final long now) {
return getFrecencyCalculation(now, 1, 110, Combined.REMOTE_VISITS_COUNT, Combined.REMOTE_DATE_LAST_VISITED);
}
/**
* Local frecency SQL calculation. Note higher scale factor and squared visit count which achieve
* visits generated locally being much preferred over remote visits.
* See Bug 1265525 for details (explanation + comparison graphs).
*
* @param now Base time in milliseconds for age calculation
* @return local frecency SQL calculation
*/
static public String getLocalFrecencySQL(final long now) {
String visitCountExpr = "(" + Combined.LOCAL_VISITS_COUNT + " + 2)";
visitCountExpr = visitCountExpr + " * " + visitCountExpr;
return getFrecencyCalculation(now, 2, 225, visitCountExpr, Combined.LOCAL_DATE_LAST_VISITED);
}
/**
* Our version of frecency is computed by scaling the number of visits by a multiplier
* that approximates Gaussian decay, based on how long ago the entry was last visited.
* Since we're limited by the math we can do with sqlite, we're calculating this
* approximation using the Cauchy distribution: multiplier = scale_const / (age^2 + scale_const).
* For example, with 15 as our scale parameter, we get a scale constant 15^2 = 225. Then:
* frecencyScore = numVisits * max(1, 100 * 225 / (age*age + 225)). (See bug 704977)
*
* @param now Base time in milliseconds for age calculation
* @param minFrecency Minimum allowed frecency value
* @param multiplier Scale constant
* @param visitCountExpr Expression which will produce a visit count
* @param lastVisitExpr Expression which will produce "last-visited" timestamp
* @return Frecency SQL calculation
*/
static public String getFrecencyCalculation(final long now, final int minFrecency, final int multiplier, @NonNull final String visitCountExpr, @NonNull final String lastVisitExpr) {
final long nowInMicroseconds = now * 1000;
final long microsecondsPerDay = 86400000000L;
final String ageExpr = "(" + nowInMicroseconds + " - " + lastVisitExpr + ") / " + microsecondsPerDay;
return visitCountExpr + " * MAX(" + minFrecency + ", 100 * " + multiplier + " / (" + ageExpr + " * " + ageExpr + " + " + multiplier + "))";
}
@RobocopTarget
public interface CommonColumns {
public static final String _ID = "_id";
@ -245,6 +303,12 @@ public class BrowserContract {
public static final String BOOKMARK_ID = "bookmark_id";
public static final String HISTORY_ID = "history_id";
public static final String REMOTE_VISITS_COUNT = "remoteVisitCount";
public static final String REMOTE_DATE_LAST_VISITED = "remoteDateLastVisited";
public static final String LOCAL_VISITS_COUNT = "localVisitCount";
public static final String LOCAL_DATE_LAST_VISITED = "localDateLastVisited";
}
public static final class Schema {

View File

@ -314,7 +314,7 @@ public class BrowserProvider extends SharedBrowserDatabaseProvider {
*
* Provide <code>keepAfter</code> less than or equal to zero to skip that check.
*
* Items will be removed according to an approximate frecency calculation.
* Items will be removed according to last visited date.
*/
private void expireHistory(final SQLiteDatabase db, final int retain, final long keepAfter) {
Log.d(LOGTAG, "Expiring history.");
@ -325,7 +325,6 @@ public class BrowserProvider extends SharedBrowserDatabaseProvider {
return;
}
final String sortOrder = BrowserContract.getFrecencySortOrder(false, true);
final long toRemove = rows - retain;
debug("Expiring at most " + toRemove + " rows earlier than " + keepAfter + ".");
@ -335,12 +334,12 @@ public class BrowserProvider extends SharedBrowserDatabaseProvider {
"WHERE MAX(" + History.DATE_LAST_VISITED + ", " + History.DATE_MODIFIED + ") < " + keepAfter + " " +
" AND " + History._ID + " IN ( SELECT " +
History._ID + " FROM " + TABLE_HISTORY + " " +
"ORDER BY " + sortOrder + " LIMIT " + toRemove +
"ORDER BY " + History.DATE_LAST_VISITED + " ASC LIMIT " + toRemove +
")";
} else {
sql = "DELETE FROM " + TABLE_HISTORY + " WHERE " + History._ID + " " +
"IN ( SELECT " + History._ID + " FROM " + TABLE_HISTORY + " " +
"ORDER BY " + sortOrder + " LIMIT " + toRemove + ")";
"ORDER BY " + History.DATE_LAST_VISITED + " ASC LIMIT " + toRemove + ")";
}
trace("Deleting using query: " + sql);
@ -356,7 +355,7 @@ public class BrowserProvider extends SharedBrowserDatabaseProvider {
*/
private void expireThumbnails(final SQLiteDatabase db) {
Log.d(LOGTAG, "Expiring thumbnails.");
final String sortOrder = BrowserContract.getFrecencySortOrder(true, false);
final String sortOrder = BrowserContract.getCombinedFrecencySortOrder(true, false);
final String sql = "DELETE FROM " + TABLE_THUMBNAILS +
" WHERE " + Thumbnails.URL + " NOT IN ( " +
" SELECT " + Combined.URL +
@ -902,7 +901,7 @@ public class BrowserProvider extends SharedBrowserDatabaseProvider {
TopSites.TYPE_TOP + " AS " + TopSites.TYPE +
" FROM " + Combined.VIEW_NAME +
" WHERE " + ignoreForTopSitesWhereClause +
" ORDER BY " + BrowserContract.getFrecencySortOrder(true, false) +
" ORDER BY " + BrowserContract.getCombinedFrecencySortOrder(true, false) +
" LIMIT " + totalLimit,
ignoreForTopSitesArgs);

View File

@ -569,14 +569,10 @@ public class LocalBrowserDB implements BrowserDB {
selectionArgs = DBUtils.appendSelectionArgs(selectionArgs, new String[] { urlFilter.toString() });
}
// Our version of frecency is computed by scaling the number of visits by a multiplier
// that approximates Gaussian decay, based on how long ago the entry was last visited.
// Since we're limited by the math we can do with sqlite, we're calculating this
// approximation using the Cauchy distribution: multiplier = 15^2 / (age^2 + 15^2).
// Using 15 as our scale parameter, we get a constant 15^2 = 225. Following this math,
// frecencyScore = numVisits * max(1, 100 * 225 / (age*age + 225)). (See bug 704977)
// We also give bookmarks an extra bonus boost by adding 100 points to their frecency score.
final String sortOrder = BrowserContract.getFrecencySortOrder(true, false);
// Order by combined remote+local frecency score.
// Local visits are preferred, so they will by far outweigh remote visits.
// Bookmarked history items get extra frecency points.
final String sortOrder = BrowserContract.getCombinedFrecencySortOrder(true, false);
return cr.query(combinedUriWithLimit(limit),
projection,

View File

@ -0,0 +1,67 @@
/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
package org.mozilla.gecko.db;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mozilla.gecko.background.testhelpers.TestRunner;
import static org.junit.Assert.*;
@RunWith(TestRunner.class)
public class BrowserContractTest {
@Test
/**
* Test that bookmark and sorting order clauses are set correctly
*/
public void testGetCombinedFrecencySortOrder() throws Exception {
String sqlNoBookmarksDesc = BrowserContract.getCombinedFrecencySortOrder(false, false);
String sqlNoBookmarksAsc = BrowserContract.getCombinedFrecencySortOrder(false, true);
String sqlBookmarksDesc = BrowserContract.getCombinedFrecencySortOrder(true, false);
String sqlBookmarksAsc = BrowserContract.getCombinedFrecencySortOrder(true, true);
assertTrue(sqlBookmarksAsc.endsWith(" ASC"));
assertTrue(sqlBookmarksDesc.endsWith(" DESC"));
assertTrue(sqlNoBookmarksAsc.endsWith(" ASC"));
assertTrue(sqlNoBookmarksDesc.endsWith(" DESC"));
assertTrue(sqlBookmarksAsc.startsWith("(CASE WHEN bookmark_id > -1 THEN 100 ELSE 0 END) + "));
assertTrue(sqlBookmarksDesc.startsWith("(CASE WHEN bookmark_id > -1 THEN 100 ELSE 0 END) + "));
}
@Test
/**
* Test that calculation string is correct for remote visits
* maxFrecency=1, scaleConst=110, correct sql params for visit count and last date
* and that time is converted to microseconds.
*/
public void testGetRemoteFrecencySQL() throws Exception {
long now = 1;
String sql = BrowserContract.getRemoteFrecencySQL(now);
String ageExpr = "(" + now * 1000 + " - remoteDateLastVisited) / 86400000000";
assertEquals(
"remoteVisitCount * MAX(1, 100 * 110 / (" + ageExpr + " * " + ageExpr + " + 110))",
sql
);
}
@Test
/**
* Test that calculation string is correct for remote visits
* maxFrecency=2, scaleConst=225, correct sql params for visit count and last date
* and that time is converted to microseconds.
*/
public void testGetLocalFrecencySQL() throws Exception {
long now = 1;
String sql = BrowserContract.getLocalFrecencySQL(now);
String ageExpr = "(" + now * 1000 + " - localDateLastVisited) / 86400000000";
String visitCountExpr = "(localVisitCount + 2) * (localVisitCount + 2)";
assertEquals(
visitCountExpr + " * MAX(2, 100 * 225 / (" + ageExpr + " * " + ageExpr + " + 225))",
sql
);
}
}