Bug 1648468 - Part 5 - Move some deduping code from UnifiedComplete to the UrlbarMuxer. r=adw,mak

Differential Revision: https://phabricator.services.mozilla.com/D82800
2024-10-20 08:45:46 +00:00 · 2020-07-11 21:32:19 +00:00 · 2020-07-11 21:32:19 +00:00 · 2ec05b9282
commit 2ec05b9282
parent 8ece52ef72
6 changed files with 131 additions and 74 deletions
--- a/browser/components/urlbar/UrlbarMuxerUnifiedComplete.jsm
+++ b/browser/components/urlbar/UrlbarMuxerUnifiedComplete.jsm
@ -131,6 +131,9 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
    let resultsWithSuggestedIndex = [];
    let formHistoryResults = new Set();
    let formHistorySuggestions = new Set();
+    // Used to deduped URLs based on their stripped prefix and title. Schema:
+    //   {string} strippedUrl => {prefix, title, rank}
+    let strippedUrlToTopPrefixAndTitle = new Map();
    let maxFormHistoryCount = Math.min(
      UrlbarPrefs.get("maxHistoricalSearchSuggestions"),
      context.maxResults
@ -177,6 +180,28 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
      ) {
        canShowTailSuggestions = false;
      }
+
+      if (result.type == UrlbarUtils.RESULT_TYPE.URL && result.payload.url) {
+        let [strippedUrl, prefix] = UrlbarUtils.stripPrefixAndTrim(
+          result.payload.url,
+          {
+            stripHttp: true,
+            stripHttps: true,
+            stripWww: true,
+            trimEmptyQuery: true,
+          }
+        );
+        let prefixRank = UrlbarUtils.getPrefixRank(prefix);
+        let topPrefixData = strippedUrlToTopPrefixAndTitle.get(strippedUrl);
+        let topPrefixRank = topPrefixData ? topPrefixData.rank : -1;
+        if (topPrefixRank < prefixRank) {
+          strippedUrlToTopPrefixAndTitle.set(strippedUrl, {
+            prefix,
+            title: result.payload.title,
+            rank: prefixRank,
+          });
+        }
+      }
    }

    // Do the second pass through results to build the list of unsorted results.
@ -187,6 +212,51 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
        continue;
      }

+      // We expect UnifiedComplete sent us the highest-ranked www. and non-www
+      // origins, if any. Now, compare them to each other and to the heuristic
+      // result.
+      // 1. If the heuristic result is lower ranked than both, discard the www
+      //    origin, unless it has a different page title than the non-www
+      //    origin. This is a guard against deduping when www.site.com and
+      //    site.com have different content.
+      // 2. If the heuristic result is higher than either the www origin or
+      //    non-www origin:
+      //    2a. If the heuristic is a www origin, discard the non-www origin.
+      //    2b. If the heuristic is a non-www origin, discard the www origin.
+      if (
+        !result.heuristic &&
+        result.type == UrlbarUtils.RESULT_TYPE.URL &&
+        result.payload.url
+      ) {
+        let [strippedUrl, prefix] = UrlbarUtils.stripPrefixAndTrim(
+          result.payload.url,
+          {
+            stripHttp: true,
+            stripHttps: true,
+            stripWww: true,
+            trimEmptyQuery: true,
+          }
+        );
+        let topPrefixData = strippedUrlToTopPrefixAndTitle.get(strippedUrl);
+        // We don't expect completely identical URLs in the results at this
+        // point, so if the prefixes are the same, then we're deduping a result
+        // against itself.
+        if (topPrefixData && prefix != topPrefixData.prefix) {
+          let prefixRank = UrlbarUtils.getPrefixRank(prefix);
+          if (
+            topPrefixData.rank > prefixRank &&
+            prefix.endsWith("www.") == topPrefixData.prefix.endsWith("www.")
+          ) {
+            continue;
+          } else if (
+            topPrefixData.rank > prefixRank &&
+            result.payload?.title == topPrefixData.title
+          ) {
+            continue;
+          }
+        }
+      }
+
      // Exclude results that dupe autofill.
      if (
        context.heuristicResult &&
--- a/browser/components/urlbar/UrlbarUtils.jsm
+++ b/browser/components/urlbar/UrlbarUtils.jsm
@ -444,6 +444,16 @@ var UrlbarUtils = {
    return [submission.uri.spec, submission.postData];
  },

+  // Ranks a URL prefix from 3 - 0 with the following preferences:
+  // https:// > https://www. > http:// > http://www.
+  // Higher is better for the purposes of deduping URLs.
+  // Returns -1 if the prefix does not match any of the above.
+  getPrefixRank(prefix) {
+    return ["http://www.", "http://", "https://www.", "https://"].indexOf(
+      prefix
+    );
+  },
+
  /**
   * Get the number of rows a result should span in the autocomplete dropdown.
   *
--- a/toolkit/components/places/tests/unifiedcomplete/test_dedupe_prefix.js
+++ b/toolkit/components/places/tests/unifiedcomplete/test_dedupe_prefix.js
@ -31,22 +31,23 @@ add_task(async function dedupe_prefix() {
    },
  ]);

-  // We should get https://www. as the heuristic result and https:// in the
+  // We should get https://www. as the heuristic result but https:// in the
  // results since the latter's prefix is a higher priority.
-  await check_autocomplete({
-    search: "example.com/foo/",
+  let context = createContext("example.com/foo/", { isPrivate: false });
+  await check_results({
+    context,
    autofilled: "example.com/foo/",
    completed: "https://www.example.com/foo/",
    matches: [
-      {
-        value: "example.com/foo/",
-        comment: "https://www.example.com/foo/",
-        style: ["autofill", "heuristic"],
-      },
-      {
-        value: "https://example.com/foo/",
-        comment: "Example Page",
-      },
+      makeVisitResult(context, {
+        uri: "https://www.example.com/foo/",
+        title: "https://www.example.com/foo/",
+        heuristic: true,
+      }),
+      makeVisitResult(context, {
+        uri: "https://example.com/foo/",
+        title: "Example Page",
+      }),
    ],
  });

@ -62,20 +63,21 @@ add_task(async function dedupe_prefix() {
    ]);
  }

-  await check_autocomplete({
-    search: "example.com/foo/",
+  context = createContext("example.com/foo/", { isPrivate: false });
+  await check_results({
+    context,
    autofilled: "example.com/foo/",
    completed: "http://www.example.com/foo/",
    matches: [
-      {
-        value: "example.com/foo/",
-        comment: "www.example.com/foo/",
-        style: ["autofill", "heuristic"],
-      },
-      {
-        value: "https://example.com/foo/",
-        comment: "Example Page",
-      },
+      makeVisitResult(context, {
+        uri: "http://www.example.com/foo/",
+        title: "www.example.com/foo/",
+        heuristic: true,
+      }),
+      makeVisitResult(context, {
+        uri: "https://example.com/foo/",
+        title: "Example Page",
+      }),
    ],
  });

@ -92,22 +94,23 @@ add_task(async function dedupe_prefix() {
    ]);
  }

-  await check_autocomplete({
-    search: "example.com/foo/",
+  context = createContext("example.com/foo/", { isPrivate: false });
+  await check_results({
+    context,
    autofilled: "example.com/foo/",
    completed: "https://example.com/foo/",
    matches: [
-      {
-        value: "example.com/foo/",
-        comment: "https://example.com/foo/",
-        style: ["autofill", "heuristic"],
-      },
-      {
-        value: "https://www.example.com/foo/",
-        comment: "Example Page",
-      },
+      makeVisitResult(context, {
+        uri: "https://example.com/foo/",
+        title: "https://example.com/foo/",
+        heuristic: true,
+      }),
+      makeVisitResult(context, {
+        uri: "https://www.example.com/foo/",
+        title: "Example Page",
+      }),
    ],
  });

-  await cleanup();
+  await cleanupPlaces();
 });
--- a/browser/components/urlbar/tests/unit/xpcshell.ini
+++ b/browser/components/urlbar/tests/unit/xpcshell.ini
@ -15,6 +15,7 @@ support-files =
 [test_avoid_middle_complete.js]
 [test_avoid_stripping_to_empty_tokens.js]
 [test_casing.js]
+[test_dedupe_prefix.js]
 [test_dupe_urls.js]
 [test_encoded_urls.js]
 [test_keywords.js]
--- a/toolkit/components/places/UnifiedComplete.jsm
+++ b/toolkit/components/places/UnifiedComplete.jsm
@ -1336,16 +1336,6 @@ Search.prototype = {
    this.notifyResult(true, match.type == UrlbarUtils.RESULT_GROUP.HEURISTIC);
  },

-  // Ranks a URL prefix from 3 - 0 with the following preferences:
-  // https:// > https://www. > http:// > http://www.
-  // Higher is better.
-  // Returns -1 if the prefix does not match any of the above.
-  _getPrefixRank(prefix) {
-    return ["http://www.", "http://", "https://www.", "https://"].indexOf(
-      prefix
-    );
-  },
-
  /**
   * Check for duplicates and either discard the duplicate or replace the
   * original match, in case the new one is more specific. For example,
@ -1411,12 +1401,9 @@ Search.prototype = {
        // 1. If the two URLs are the same, dedupe whichever is not the
        //    heuristic result.
        // 2. If they both contain www. or both do not contain it, prefer https.
-        // 3. If they differ by www.:
-        //    3a. If the page titles are different, keep both. This is a guard
-        //        against deduping when www.site.com and site.com have different
-        //        content.
-        //    3b. Otherwise, dedupe based on the priorities in _getPrefixRank.
-        let prefixRank = this._getPrefixRank(prefix);
+        // 3. If they differ by www., send both results to the Muxer and allow
+        //    it to decide based on results from other providers.
+        let prefixRank = UrlbarUtils.getPrefixRank(prefix);
        for (let i = 0; i < this._usedURLs.length; ++i) {
          if (!this._usedURLs[i]) {
            // This is true when the result at [i] is a searchengine result.
@ -1427,10 +1414,9 @@ Search.prototype = {
            key: existingKey,
            prefix: existingPrefix,
            type: existingType,
-            comment: existingComment,
          } = this._usedURLs[i];

-          let existingPrefixRank = this._getPrefixRank(existingPrefix);
+          let existingPrefixRank = UrlbarUtils.getPrefixRank(existingPrefix);
          if (ObjectUtils.deepEqual(existingKey, urlMapKey)) {
            isDupe = true;

@ -1475,26 +1461,13 @@ Search.prototype = {
                continue;
              }
            } else {
-              // If either result is the heuristic, this will be true and we
-              // will keep both results.
-              if (match.comment != existingComment) {
+              // We have two identical URLs that differ only by www. We need to
+              // be sure what the heuristic result is before deciding how we
+              // should dedupe. We mark these as non-duplicates and let the
+              // muxer handle it.
              isDupe = false;
              continue;
            }
-
-              if (prefixRank <= existingPrefixRank) {
-                break; // Replace match.
-              } else {
-                this._usedURLs[i] = {
-                  key: urlMapKey,
-                  action,
-                  type: match.type,
-                  prefix,
-                  comment: match.comment,
-                };
-                return { index: i, replace: true };
-              }
-            }
          }
        }
      }
--- a/toolkit/components/places/tests/unifiedcomplete/test_swap_protocol.js
+++ b/toolkit/components/places/tests/unifiedcomplete/test_swap_protocol.js
@ -31,11 +31,11 @@ add_task(async function test_swap_protocol() {
    { uri: uri8, title: "title" },
  ]);

-  // uri1, uri2, and uri5 won't appear since they are duplicates of uri6, except
-  // for their prefixes.
+  // uri1 and uri2 won't appear since they are lower-ranked duplicates of uri6.
  let allMatches = [
    { uri: uri3, title: "title" },
    { uri: uri4, title: "title" },
+    { uri: uri5, title: "title" },
    { uri: uri6, title: "title" },
  ];