Bug 1159884 - Implement inadjacency with a hardcoded list of hashed sites [r=adw]

This commit is contained in:
Maxim Zhilyaev 2015-05-20 17:21:16 -07:00
parent f61ddc3ffa
commit 99ab481e6c
4 changed files with 3525 additions and 9 deletions

File diff suppressed because it is too large Load Diff

View File

@ -110,6 +110,7 @@ browser.jar:
content/browser/newtab/newTab.xul (content/newtab/newTab.xul)
* content/browser/newtab/newTab.js (content/newtab/newTab.js)
content/browser/newtab/newTab.css (content/newtab/newTab.css)
content/browser/newtab/newTab.inadjacent.json (content/newtab/newTab.inadjacent.json)
* content/browser/pageinfo/pageInfo.xul (content/pageinfo/pageInfo.xul)
content/browser/pageinfo/pageInfo.js (content/pageinfo/pageInfo.js)
content/browser/pageinfo/pageInfo.css (content/pageinfo/pageInfo.css)

View File

@ -28,9 +28,22 @@ XPCOMUtils.defineLazyModuleGetter(this, "Promise",
"resource://gre/modules/Promise.jsm");
XPCOMUtils.defineLazyModuleGetter(this, "UpdateChannel",
"resource://gre/modules/UpdateChannel.jsm");
XPCOMUtils.defineLazyServiceGetter(this, "eTLD",
"@mozilla.org/network/effective-tld-service;1",
"nsIEffectiveTLDService");
XPCOMUtils.defineLazyGetter(this, "gTextDecoder", () => {
return new TextDecoder();
});
XPCOMUtils.defineLazyGetter(this, "gCryptoHash", function () {
return Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash);
});
XPCOMUtils.defineLazyGetter(this, "gUnicodeConverter", function () {
let converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Ci.nsIScriptableUnicodeConverter);
converter.charset = 'utf8';
return converter;
});
// The filename where directory links are stored locally
const DIRECTORY_LINKS_FILE = "directoryLinks.json";
@ -108,12 +121,18 @@ const DEFAULT_PRUNE_TIME_DELTA = 10*24*60*60*1000;
// The min number of visible (not blocked) history tiles to have before showing suggested tiles
const MIN_VISIBLE_HISTORY_TILES = 8;
// The max number of visible (not blocked) history tiles to test for inadjacency
const MAX_VISIBLE_HISTORY_TILES = 15;
// Divide frecency by this amount for pings
const PING_SCORE_DIVISOR = 10000;
// Allowed ping actions remotely stored as columns: case-insensitive [a-z0-9_]
const PING_ACTIONS = ["block", "click", "pin", "sponsored", "sponsored_link", "unpin", "view"];
// Location of inadjacent sites json
const INADJACENCY_SOURCE = "chrome://browser/content/newtab/newTab.inadjacent.json";
/**
* Singleton that serves as the provider of directory links.
* Directory links are a hard-coded set of links shown if a user's link
@ -151,6 +170,23 @@ let DirectoryLinksProvider = {
*/
_topSitesWithSuggestedLinks: new Set(),
/**
* lookup Set of inadjacent domains
*/
_inadjacentSites: new Set(),
/**
* This flag is set if there is a suggested tile configured to avoid
* inadjacent sites in new tab
*/
_avoidInadjacentSites: false,
/**
* This flag is set if _avoidInadjacentSites is true and there is
* an inadjacent site in the new tab
*/
_newTabHasInadjacentSite: false,
get _observedPrefs() Object.freeze({
enhanced: PREF_NEWTAB_ENHANCED,
linksURL: PREF_DIRECTORY_SOURCE,
@ -274,22 +310,26 @@ let DirectoryLinksProvider = {
uri = uri.replace("%LOCALE%", this.locale);
uri = uri.replace("%CHANNEL%", UpdateChannel.get());
return this._downloadJsonData(uri).then(json => {
return OS.File.writeAtomic(this._directoryFilePath, json, {tmpPath: this._directoryFilePath + ".tmp"});
});
},
/**
* Downloads a links with json content
* @param download uri
* @return promise resolved to json string, "{}" returned if status != 200
*/
_downloadJsonData: function DirectoryLinksProvider__downloadJsonData(uri) {
let deferred = Promise.defer();
let xmlHttp = this._newXHR();
let self = this;
xmlHttp.onload = function(aResponse) {
let json = this.responseText;
if (this.status && this.status != 200) {
json = "{}";
}
OS.File.writeAtomic(self._directoryFilePath, json, {tmpPath: self._directoryFilePath + ".tmp"})
.then(() => {
deferred.resolve();
},
() => {
deferred.reject("Error writing uri data in profD.");
});
deferred.resolve(json);
};
xmlHttp.onerror = function(e) {
@ -616,6 +656,7 @@ let DirectoryLinksProvider = {
this._enhancedLinks.clear();
this._suggestedLinks.clear();
this._clearCampaignTimeout();
this._avoidInadjacentSites = false;
// Only check base domain for images when using the default pref
let checkBase = !this.__linksURLModified;
@ -640,6 +681,10 @@ let DirectoryLinksProvider = {
link.explanation = link.explanation ? ParserUtils.convertToPlainText(link.explanation, sanitizeFlags, 0) : "";
link.targetedName = ParserUtils.convertToPlainText(link.adgroup_name, sanitizeFlags, 0) || name;
link.lastVisitDate = rawLinks.suggested.length - position;
// check if link wants to avoid inadjacent sites
if (link.check_inadjacency) {
this._avoidInadjacentSites = true;
}
// We cache suggested tiles here but do not push any of them in the links list yet.
// The decision for which suggested tile to include will be made separately.
@ -689,6 +734,8 @@ let DirectoryLinksProvider = {
// setup frequency cap file path
this._frequencyCapFilePath = OS.Path.join(OS.Constants.Path.localProfileDir, FREQUENCY_CAP_FILE);
// setup inadjacent sites URL
this._inadjacentSitesUrl = INADJACENCY_SOURCE;
NewTabUtils.placesProvider.addObserver(this);
NewTabUtils.links.addObserver(this);
@ -704,6 +751,8 @@ let DirectoryLinksProvider = {
yield this._readFrequencyCapFile();
// fetch directory on startup without force
yield this._fetchAndCacheLinksIfNecessary();
// fecth inadjacent sites on startup
yield this._loadInadjacentSites();
}.bind(this));
},
@ -736,6 +785,13 @@ let DirectoryLinksProvider = {
this._topSitesWithSuggestedLinks.add(changedLinkSite);
return true;
}
// always run _updateSuggestedTile if aLink is inadjacent
// and there are tiles configured to avoid it
if (this._avoidInadjacentSites && this._isInadjacentLink(aLink)) {
return true;
}
return false;
},
@ -779,11 +835,17 @@ let DirectoryLinksProvider = {
_getCurrentTopSiteCount: function() {
let visibleTopSiteCount = 0;
for (let link of NewTabUtils.links.getLinks().slice(0, MIN_VISIBLE_HISTORY_TILES)) {
let newTabLinks = NewTabUtils.links.getLinks();
for (let link of newTabLinks.slice(0, MIN_VISIBLE_HISTORY_TILES)) {
// compute visibleTopSiteCount for suggested tiles
if (link && (link.type == "history" || link.type == "enhanced")) {
visibleTopSiteCount++;
}
}
// since newTabLinks are available, set _newTabHasInadjacentSite here
// note that _shouldUpdateSuggestedTile is called by _updateSuggestedTile
this._newTabHasInadjacentSite = this._avoidInadjacentSites && this._checkForInadjacentSites(newTabLinks);
return visibleTopSiteCount;
},
@ -872,6 +934,11 @@ let DirectoryLinksProvider = {
return;
}
// Skip link if it avoids inadjacent sites and newtab has one
if (suggestedLink.check_inadjacency && this._newTabHasInadjacentSite) {
return;
}
possibleLinks.set(url, suggestedLink);
// Keep a map of URL to targeted sites. We later use this to show the user
@ -913,6 +980,66 @@ let DirectoryLinksProvider = {
return chosenSuggestedLink;
},
/**
* Loads inadjacent sites
* @return a promise resolved when lookup Set for sites is built
*/
_loadInadjacentSites: function DirectoryLinksProvider_loadInadjacentSites() {
return this._downloadJsonData(this._inadjacentSitesUrl).then(jsonString => {
let jsonObject = {};
try {
jsonObject = JSON.parse(jsonString);
}
catch (e) {
Cu.reportError(e);
}
this._inadjacentSites = new Set(jsonObject.domains);
});
},
/**
* Genegrates hash suitable for looking up inadjacent site
* @param value to hsh
* @return hased value, base64-ed
*/
_generateHash: function DirectoryLinksProvider_generateHash(value) {
let byteArr = gUnicodeConverter.convertToByteArray(value);
gCryptoHash.init(gCryptoHash.MD5);
gCryptoHash.update(byteArr, byteArr.length);
return gCryptoHash.finish(true);
},
/**
* Checks if link belongs to inadjacent domain
* @param link to check
* @return true for inadjacent domains, false otherwise
*/
_isInadjacentLink: function DirectoryLinksProvider_isInadjacentLink(link) {
let baseDomain = link.baseDomain || NewTabUtils.extractSite(link.url || "");
if (!baseDomain) {
return false;
}
// check if hashed domain is inadjacent
return this._inadjacentSites.has(this._generateHash(baseDomain));
},
/**
* Checks if new tab has inadjacent site
* @param new tab links (or nothing, in which case NewTabUtils.links.getLinks() is called
* @return true if new tab shows has inadjacent site
*/
_checkForInadjacentSites: function DirectoryLinksProvider_checkForInadjacentSites(newTabLink) {
let links = newTabLink || NewTabUtils.links.getLinks();
for (let link of links.slice(0, MAX_VISIBLE_HISTORY_TILES)) {
// check links against inadjacent list - specifically include ALL link types
if (this._isInadjacentLink(link)) {
return true;
}
}
return false;
},
/**
* Reads json file, parses its content, and returns resulting object
* @param json file path

View File

@ -209,6 +209,7 @@ function promiseCleanDirectoryLinksProvider() {
yield promiseDirectoryDownloadOnPrefChange(kLocalePref, "en-US");
yield promiseDirectoryDownloadOnPrefChange(kSourceUrlPref, kTestURL);
yield DirectoryLinksProvider._clearFrequencyCap();
yield DirectoryLinksProvider._loadInadjacentSites();
DirectoryLinksProvider._lastDownloadMS = 0;
DirectoryLinksProvider.reset();
});
@ -1745,3 +1746,181 @@ add_task(function test_sanitizeExplanation() {
do_check_eq(suggestedLink.explanation, "This is an evil tile X muhahaha");
do_check_eq(suggestedLink.targetedName, "WE ARE EVIL ");
});
add_task(function test_inadjecentSites() {
let suggestedTile = Object.assign({
check_inadjacency: true
}, suggestedTile1);
// Initial setup
let topSites = ["1040.com", "site2.com", "hrblock.com", "site4.com", "freetaxusa.com", "site6.com"];
let data = {"suggested": [suggestedTile], "directory": [someOtherSite]};
let dataURI = 'data:application/json,' + JSON.stringify(data);
let testObserver = new TestFirstRun();
DirectoryLinksProvider.addObserver(testObserver);
let origGetFrecentSitesName = DirectoryLinksProvider.getFrecentSitesName;
DirectoryLinksProvider.getFrecentSitesName = () => "";
yield promiseSetupDirectoryLinksProvider({linksURL: dataURI});
let links = yield fetchData();
let origIsTopPlacesSite = NewTabUtils.isTopPlacesSite;
NewTabUtils.isTopPlacesSite = function(site) {
return topSites.indexOf(site) >= 0;
}
let origGetProviderLinks = NewTabUtils.getProviderLinks;
NewTabUtils.getProviderLinks = function(provider) {
return links;
}
let origCurrentTopSiteCount = DirectoryLinksProvider._getCurrentTopSiteCount;
DirectoryLinksProvider._getCurrentTopSiteCount = () => {
origCurrentTopSiteCount.apply(DirectoryLinksProvider);
return 8;
};
// store oroginal inadjacent sites url
let origInadjacentSitesUrl = DirectoryLinksProvider._inadjacentSitesUrl;
// loading inadjacent sites list function
function setInadjacentSites(sites) {
let badSiteB64 = [];
sites.forEach(site => {
badSiteB64.push(DirectoryLinksProvider._generateHash(site));
});
let theList = {"domains": badSiteB64};
let dataURI = 'data:application/json,' + JSON.stringify(theList);
DirectoryLinksProvider._inadjacentSitesUrl = dataURI;
return DirectoryLinksProvider._loadInadjacentSites();
};
// setup gLinks loader
let gLinks = NewTabUtils.links;
gLinks.addProvider(DirectoryLinksProvider);
function updateNewTabCache() {
gLinks.populateCache();
return new Promise(resolve => {
NewTabUtils.allPages.register({
observe: _ => _,
update() {
NewTabUtils.allPages.unregister(this);
resolve();
}
});
});
}
// no suggested file
do_check_eq(DirectoryLinksProvider._updateSuggestedTile(), undefined);
// _avoidInadjacentSites should be set, since link.check_inadjacency is on
do_check_true(DirectoryLinksProvider._avoidInadjacentSites);
// make sure example.com is included in inadjacent sites list
do_check_true(DirectoryLinksProvider._isInadjacentLink({baseDomain: "example.com"}));
function TestFirstRun() {
this.promise = new Promise(resolve => {
this.onLinkChanged = (directoryLinksProvider, link) => {
do_check_eq(link.url, suggestedTile.url);
do_check_eq(link.type, "affiliate");
resolve();
};
});
}
// Test first call to '_updateSuggestedTile()', called when fetching directory links.
yield testObserver.promise;
DirectoryLinksProvider.removeObserver(testObserver);
// update newtab cache
yield updateNewTabCache();
// this should have set
do_check_true(DirectoryLinksProvider._avoidInadjacentSites);
// there should be siggested link
let link = DirectoryLinksProvider._updateSuggestedTile();
do_check_eq(link.url, "http://turbotax.com");
// and it should have avoidInadjacentSites flag
do_check_true(link.check_inadjacency);
// make someothersite.com inadjacent
yield setInadjacentSites(["someothersite.com"]);
// there should be no suggested link
link = DirectoryLinksProvider._updateSuggestedTile();
do_check_false(link);
do_check_true(DirectoryLinksProvider._newTabHasInadjacentSite);
// _handleLinkChanged must return true on inadjacent site
do_check_true(DirectoryLinksProvider._handleLinkChanged({
url: "http://someothersite.com",
type: "history",
}));
// _handleLinkChanged must return false on ok site
do_check_false(DirectoryLinksProvider._handleLinkChanged({
url: "http://foobar.com",
type: "history",
}));
// change inadjacent list to sites not on newtab page
yield setInadjacentSites(["foo.com", "bar.com"]);
link = DirectoryLinksProvider._updateSuggestedTile();
// we should now have a link
do_check_true(link);
do_check_eq(link.url, "http://turbotax.com");
// make newtab offending again
yield setInadjacentSites(["someothersite.com", "foo.com"]);
// there should be no suggested link
link = DirectoryLinksProvider._updateSuggestedTile();
do_check_false(link);
do_check_true(DirectoryLinksProvider._newTabHasInadjacentSite);
// remove avoidInadjacentSites flag from suggested tile and reload json
delete suggestedTile.check_inadjacency;
data = {"suggested": [suggestedTile], "directory": [someOtherSite]};
dataURI = 'data:application/json,' + JSON.stringify(data);
yield promiseDirectoryDownloadOnPrefChange(kSourceUrlPref, dataURI);
yield fetchData();
// inadjacent checking should be disabled
do_check_false(DirectoryLinksProvider._avoidInadjacentSites);
link = DirectoryLinksProvider._updateSuggestedTile();
do_check_true(link);
do_check_eq(link.url, "http://turbotax.com");
do_check_false(DirectoryLinksProvider._newTabHasInadjacentSite);
// _handleLinkChanged should return false now, even if newtab has bad site
do_check_false(DirectoryLinksProvider._handleLinkChanged({
url: "http://someothersite.com",
type: "history",
}));
// test _isInadjacentLink
do_check_true(DirectoryLinksProvider._isInadjacentLink({baseDomain: "someothersite.com"}));
do_check_false(DirectoryLinksProvider._isInadjacentLink({baseDomain: "bar.com"}));
do_check_true(DirectoryLinksProvider._isInadjacentLink({url: "http://www.someothersite.com"}));
do_check_false(DirectoryLinksProvider._isInadjacentLink({url: "http://www.bar.com"}));
// try to crash _isInadjacentLink
do_check_false(DirectoryLinksProvider._isInadjacentLink({baseDomain: ""}));
do_check_false(DirectoryLinksProvider._isInadjacentLink({url: ""}));
do_check_false(DirectoryLinksProvider._isInadjacentLink({url: "http://localhost:8081/"}));
do_check_false(DirectoryLinksProvider._isInadjacentLink({url: "abracodabra"}));
do_check_false(DirectoryLinksProvider._isInadjacentLink({}));
// test _checkForInadjacentSites
do_check_true(DirectoryLinksProvider._checkForInadjacentSites());
// Cleanup
gLinks.removeProvider(DirectoryLinksProvider);
DirectoryLinksProvider._inadjacentSitesUrl = origInadjacentSitesUrl;
DirectoryLinksProvider.getFrecentSitesName = origGetFrecentSitesName;
NewTabUtils.isTopPlacesSite = origIsTopPlacesSite;
NewTabUtils.getProviderLinks = origGetProviderLinks;
DirectoryLinksProvider._getCurrentTopSiteCount = origCurrentTopSiteCount;
yield promiseCleanDirectoryLinksProvider();
});