Bug 1648468 - Part 5 - Move some deduping code from UnifiedComplete to the UrlbarMuxer. r=adw,mak

Differential Revision: https://phabricator.services.mozilla.com/D82800
This commit is contained in:
Harry Twyford 2020-07-11 21:32:19 +00:00
parent 8ece52ef72
commit 2ec05b9282
6 changed files with 131 additions and 74 deletions

View File

@ -131,6 +131,9 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
let resultsWithSuggestedIndex = [];
let formHistoryResults = new Set();
let formHistorySuggestions = new Set();
// Used to deduped URLs based on their stripped prefix and title. Schema:
// {string} strippedUrl => {prefix, title, rank}
let strippedUrlToTopPrefixAndTitle = new Map();
let maxFormHistoryCount = Math.min(
UrlbarPrefs.get("maxHistoricalSearchSuggestions"),
context.maxResults
@ -177,6 +180,28 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
) {
canShowTailSuggestions = false;
}
if (result.type == UrlbarUtils.RESULT_TYPE.URL && result.payload.url) {
let [strippedUrl, prefix] = UrlbarUtils.stripPrefixAndTrim(
result.payload.url,
{
stripHttp: true,
stripHttps: true,
stripWww: true,
trimEmptyQuery: true,
}
);
let prefixRank = UrlbarUtils.getPrefixRank(prefix);
let topPrefixData = strippedUrlToTopPrefixAndTitle.get(strippedUrl);
let topPrefixRank = topPrefixData ? topPrefixData.rank : -1;
if (topPrefixRank < prefixRank) {
strippedUrlToTopPrefixAndTitle.set(strippedUrl, {
prefix,
title: result.payload.title,
rank: prefixRank,
});
}
}
}
// Do the second pass through results to build the list of unsorted results.
@ -187,6 +212,51 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
continue;
}
// We expect UnifiedComplete sent us the highest-ranked www. and non-www
// origins, if any. Now, compare them to each other and to the heuristic
// result.
// 1. If the heuristic result is lower ranked than both, discard the www
// origin, unless it has a different page title than the non-www
// origin. This is a guard against deduping when www.site.com and
// site.com have different content.
// 2. If the heuristic result is higher than either the www origin or
// non-www origin:
// 2a. If the heuristic is a www origin, discard the non-www origin.
// 2b. If the heuristic is a non-www origin, discard the www origin.
if (
!result.heuristic &&
result.type == UrlbarUtils.RESULT_TYPE.URL &&
result.payload.url
) {
let [strippedUrl, prefix] = UrlbarUtils.stripPrefixAndTrim(
result.payload.url,
{
stripHttp: true,
stripHttps: true,
stripWww: true,
trimEmptyQuery: true,
}
);
let topPrefixData = strippedUrlToTopPrefixAndTitle.get(strippedUrl);
// We don't expect completely identical URLs in the results at this
// point, so if the prefixes are the same, then we're deduping a result
// against itself.
if (topPrefixData && prefix != topPrefixData.prefix) {
let prefixRank = UrlbarUtils.getPrefixRank(prefix);
if (
topPrefixData.rank > prefixRank &&
prefix.endsWith("www.") == topPrefixData.prefix.endsWith("www.")
) {
continue;
} else if (
topPrefixData.rank > prefixRank &&
result.payload?.title == topPrefixData.title
) {
continue;
}
}
}
// Exclude results that dupe autofill.
if (
context.heuristicResult &&

View File

@ -444,6 +444,16 @@ var UrlbarUtils = {
return [submission.uri.spec, submission.postData];
},
// Ranks a URL prefix from 3 - 0 with the following preferences:
// https:// > https://www. > http:// > http://www.
// Higher is better for the purposes of deduping URLs.
// Returns -1 if the prefix does not match any of the above.
getPrefixRank(prefix) {
return ["http://www.", "http://", "https://www.", "https://"].indexOf(
prefix
);
},
/**
* Get the number of rows a result should span in the autocomplete dropdown.
*

View File

@ -31,22 +31,23 @@ add_task(async function dedupe_prefix() {
},
]);
// We should get https://www. as the heuristic result and https:// in the
// We should get https://www. as the heuristic result but https:// in the
// results since the latter's prefix is a higher priority.
await check_autocomplete({
search: "example.com/foo/",
let context = createContext("example.com/foo/", { isPrivate: false });
await check_results({
context,
autofilled: "example.com/foo/",
completed: "https://www.example.com/foo/",
matches: [
{
value: "example.com/foo/",
comment: "https://www.example.com/foo/",
style: ["autofill", "heuristic"],
},
{
value: "https://example.com/foo/",
comment: "Example Page",
},
makeVisitResult(context, {
uri: "https://www.example.com/foo/",
title: "https://www.example.com/foo/",
heuristic: true,
}),
makeVisitResult(context, {
uri: "https://example.com/foo/",
title: "Example Page",
}),
],
});
@ -62,20 +63,21 @@ add_task(async function dedupe_prefix() {
]);
}
await check_autocomplete({
search: "example.com/foo/",
context = createContext("example.com/foo/", { isPrivate: false });
await check_results({
context,
autofilled: "example.com/foo/",
completed: "http://www.example.com/foo/",
matches: [
{
value: "example.com/foo/",
comment: "www.example.com/foo/",
style: ["autofill", "heuristic"],
},
{
value: "https://example.com/foo/",
comment: "Example Page",
},
makeVisitResult(context, {
uri: "http://www.example.com/foo/",
title: "www.example.com/foo/",
heuristic: true,
}),
makeVisitResult(context, {
uri: "https://example.com/foo/",
title: "Example Page",
}),
],
});
@ -92,22 +94,23 @@ add_task(async function dedupe_prefix() {
]);
}
await check_autocomplete({
search: "example.com/foo/",
context = createContext("example.com/foo/", { isPrivate: false });
await check_results({
context,
autofilled: "example.com/foo/",
completed: "https://example.com/foo/",
matches: [
{
value: "example.com/foo/",
comment: "https://example.com/foo/",
style: ["autofill", "heuristic"],
},
{
value: "https://www.example.com/foo/",
comment: "Example Page",
},
makeVisitResult(context, {
uri: "https://example.com/foo/",
title: "https://example.com/foo/",
heuristic: true,
}),
makeVisitResult(context, {
uri: "https://www.example.com/foo/",
title: "Example Page",
}),
],
});
await cleanup();
await cleanupPlaces();
});

View File

@ -15,6 +15,7 @@ support-files =
[test_avoid_middle_complete.js]
[test_avoid_stripping_to_empty_tokens.js]
[test_casing.js]
[test_dedupe_prefix.js]
[test_dupe_urls.js]
[test_encoded_urls.js]
[test_keywords.js]

View File

@ -1336,16 +1336,6 @@ Search.prototype = {
this.notifyResult(true, match.type == UrlbarUtils.RESULT_GROUP.HEURISTIC);
},
// Ranks a URL prefix from 3 - 0 with the following preferences:
// https:// > https://www. > http:// > http://www.
// Higher is better.
// Returns -1 if the prefix does not match any of the above.
_getPrefixRank(prefix) {
return ["http://www.", "http://", "https://www.", "https://"].indexOf(
prefix
);
},
/**
* Check for duplicates and either discard the duplicate or replace the
* original match, in case the new one is more specific. For example,
@ -1411,12 +1401,9 @@ Search.prototype = {
// 1. If the two URLs are the same, dedupe whichever is not the
// heuristic result.
// 2. If they both contain www. or both do not contain it, prefer https.
// 3. If they differ by www.:
// 3a. If the page titles are different, keep both. This is a guard
// against deduping when www.site.com and site.com have different
// content.
// 3b. Otherwise, dedupe based on the priorities in _getPrefixRank.
let prefixRank = this._getPrefixRank(prefix);
// 3. If they differ by www., send both results to the Muxer and allow
// it to decide based on results from other providers.
let prefixRank = UrlbarUtils.getPrefixRank(prefix);
for (let i = 0; i < this._usedURLs.length; ++i) {
if (!this._usedURLs[i]) {
// This is true when the result at [i] is a searchengine result.
@ -1427,10 +1414,9 @@ Search.prototype = {
key: existingKey,
prefix: existingPrefix,
type: existingType,
comment: existingComment,
} = this._usedURLs[i];
let existingPrefixRank = this._getPrefixRank(existingPrefix);
let existingPrefixRank = UrlbarUtils.getPrefixRank(existingPrefix);
if (ObjectUtils.deepEqual(existingKey, urlMapKey)) {
isDupe = true;
@ -1475,26 +1461,13 @@ Search.prototype = {
continue;
}
} else {
// If either result is the heuristic, this will be true and we
// will keep both results.
if (match.comment != existingComment) {
// We have two identical URLs that differ only by www. We need to
// be sure what the heuristic result is before deciding how we
// should dedupe. We mark these as non-duplicates and let the
// muxer handle it.
isDupe = false;
continue;
}
if (prefixRank <= existingPrefixRank) {
break; // Replace match.
} else {
this._usedURLs[i] = {
key: urlMapKey,
action,
type: match.type,
prefix,
comment: match.comment,
};
return { index: i, replace: true };
}
}
}
}
}

View File

@ -31,11 +31,11 @@ add_task(async function test_swap_protocol() {
{ uri: uri8, title: "title" },
]);
// uri1, uri2, and uri5 won't appear since they are duplicates of uri6, except
// for their prefixes.
// uri1 and uri2 won't appear since they are lower-ranked duplicates of uri6.
let allMatches = [
{ uri: uri3, title: "title" },
{ uri: uri4, title: "title" },
{ uri: uri5, title: "title" },
{ uri: uri6, title: "title" },
];