Bug 1606731 - Ignore diacritics when searching the browser history. r=jfkthame,mak

Differential Revision: https://phabricator.services.mozilla.com/D58584

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Alex Henrie 2020-01-22 17:39:56 +00:00
parent fa7b42ae1e
commit b670bcef3e
11 changed files with 159 additions and 20 deletions

View File

@ -886,6 +886,9 @@ pref("accessibility.blockautorefresh", false);
// Whether history is enabled or not.
pref("places.history.enabled", true);
// Whether or not diacritics must match in history text searches.
pref("places.search.matchDiacritics", false);
// the (maximum) number of the recent visits to sample
// when calculating frecency
pref("places.frecency.numVisits", 10);

View File

@ -296,18 +296,45 @@ var UrlbarUtils = {
let hits = new Array(str.length).fill(
highlightType == this.HIGHLIGHT.SUGGESTED ? 1 : 0
);
for (let { lowerCaseValue } of tokens) {
for (let { lowerCaseValue: needle } of tokens) {
// Ideally we should never hit the empty token case, but just in case
// the `needle` check protects us from an infinite loop.
for (let index = 0, needle = lowerCaseValue; index >= 0 && needle; ) {
if (!needle) {
continue;
}
let index = 0;
let found = false;
// First try a diacritic-sensitive search.
for (;;) {
index = str.indexOf(needle, index);
if (index >= 0) {
hits.fill(
highlightType == this.HIGHLIGHT.SUGGESTED ? 0 : 1,
index,
index + needle.length
);
index += needle.length;
if (index < 0) {
break;
}
hits.fill(
highlightType == this.HIGHLIGHT.SUGGESTED ? 0 : 1,
index,
index + needle.length
);
index += needle.length;
found = true;
}
// If that fails to match anything, try a (computationally intensive)
// diacritic-insensitive search.
if (!found) {
const options = { sensitivity: "base" };
index = 0;
while (index < str.length) {
let hay = str.substr(index, needle.length);
if (needle.localeCompare(hay, [], options) == 0) {
hits.fill(
highlightType == this.HIGHLIGHT.SUGGESTED ? 0 : 1,
index,
index + needle.length
);
index += needle.length;
} else {
index++;
}
}
}
}

View File

@ -149,6 +149,11 @@ add_task(function test() {
phrase: "mozilla mozzarella momo",
expected: [[0, 2], [8, 2], [19, 4]],
},
{
tokens: ["resume"],
phrase: "résumé",
expected: [[0, 6]],
},
];
for (let { tokens, phrase, expected } of tests) {
tokens = tokens.map(t => ({

View File

@ -199,3 +199,48 @@ add_task(async function test_bookmarkBehaviorDisabled_untagged() {
await PlacesUtils.history.clear();
await PlacesUtils.bookmarks.eraseEverything();
});
add_task(async function test_diacritics() {
Services.prefs.setBoolPref(SUGGEST_PREF, false);
Services.prefs.setBoolPref(SUGGEST_ENABLED_PREF, false);
// Enable the bookmark behavior in UnifiedComplete.
Services.prefs.setBoolPref("browser.urlbar.suggest.bookmark", true);
let controller = UrlbarTestUtils.newMockController();
let searchString = "agui";
let context = createContext(searchString, { isPrivate: false });
await PlacesUtils.bookmarks.insert({
url: "https://bookmark.mozilla.org/%C3%A3gu%C4%A9",
title: "Test bookmark with accents in path",
parentGuid: PlacesUtils.bookmarks.unfiledGuid,
});
await controller.startQuery(context);
info(
"Results:\n" +
context.results.map(m => `${m.title} - ${m.payload.url}`).join("\n")
);
Assert.equal(
context.results.length,
2,
"Found the expected number of matches"
);
Assert.deepEqual(
[UrlbarUtils.RESULT_TYPE.SEARCH, UrlbarUtils.RESULT_TYPE.URL],
context.results.map(m => m.type),
"Check result types"
);
Assert.deepEqual(
[searchString, "Test bookmark with accents in path"],
context.results.map(m => m.title),
"Check match titles"
);
await PlacesUtils.history.clear();
await PlacesUtils.bookmarks.eraseEverything();
});

View File

@ -432,7 +432,8 @@ int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,
bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
const char* aLeftEnd, const char* aRightEnd,
const char** aLeftNext,
const char** aRightNext, bool* aErr) {
const char** aRightNext, bool* aErr,
bool aMatchDiacritics) {
NS_ASSERTION(aLeftNext, "Out pointer shouldn't be null.");
NS_ASSERTION(aRightNext, "Out pointer shouldn't be null.");
NS_ASSERTION(aErr, "Out pointer shouldn't be null.");
@ -455,6 +456,11 @@ bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
// Can't have an error past this point.
*aErr = false;
if (!aMatchDiacritics) {
leftChar = ToNaked(leftChar);
rightChar = ToNaked(rightChar);
}
return leftChar == rightChar;
}

View File

@ -126,8 +126,9 @@ uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,
/**
* This function determines whether the UTF-8 sequence pointed to by aLeft is
* case-insensitively-equal to the UTF-8 sequence pointed to by aRight, as
* defined by having matching lower-cased codepoints.
* case insensitively equal to the UTF-8 sequence pointed to by aRight (or
* optionally, case and diacritic insensitively equal), as defined by having
* matching (naked) lower-cased codepoints.
*
* aLeftEnd marks the first memory location past aLeft that is not part of
* aLeft; aRightEnd similarly marks the end of aRight.
@ -143,11 +144,15 @@ uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,
* false, possibly leaving aLeftNext and aRightNext uninitialized. If the
* function returns true, aErr is guaranteed to be false and both aLeftNext and
* aRightNext are guaranteed to be initialized.
*
* If aMatchDiacritics is false, the comparison is neither case-sensitive nor
* diacritic-sensitive.
*/
bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
const char* aLeftEnd, const char* aRightEnd,
const char** aLeftNext,
const char** aRightNext, bool* aErr);
const char** aRightNext, bool* aErr,
bool aMatchDiacritics = true);
namespace mozilla {

View File

@ -109,7 +109,7 @@ static MOZ_ALWAYS_INLINE bool isOnBoundary(const_char_iterator aPos) {
/**
* Check whether a token string matches a particular position of a source
* string, case insensitively.
* string, case insensitively (or optionally, case and diacritic insensitively).
*
* @param aTokenStart
* An iterator pointing to the start of the token string.
@ -120,6 +120,8 @@ static MOZ_ALWAYS_INLINE bool isOnBoundary(const_char_iterator aPos) {
* matching at.
* @param aSourceEnd
* An iterator pointing past-the-end of the source string.
* @param aMatchDiacritics
* Whether or not the match is diacritic-sensitive.
*
* @return true if the string [aTokenStart, aTokenEnd) matches the start of
* the string [aSourceStart, aSourceEnd, false otherwise.
@ -127,7 +129,8 @@ static MOZ_ALWAYS_INLINE bool isOnBoundary(const_char_iterator aPos) {
static MOZ_ALWAYS_INLINE bool stringMatch(const_char_iterator aTokenStart,
const_char_iterator aTokenEnd,
const_char_iterator aSourceStart,
const_char_iterator aSourceEnd) {
const_char_iterator aSourceEnd,
bool aMatchDiacritics) {
const_char_iterator tokenCur = aTokenStart, sourceCur = aSourceStart;
while (tokenCur < aTokenEnd) {
@ -137,8 +140,8 @@ static MOZ_ALWAYS_INLINE bool stringMatch(const_char_iterator aTokenStart,
bool error;
if (!CaseInsensitiveUTF8CharsEqual(sourceCur, tokenCur, aSourceEnd,
aTokenEnd, &sourceCur, &tokenCur,
&error)) {
aTokenEnd, &sourceCur, &tokenCur, &error,
aMatchDiacritics)) {
return false;
}
}
@ -174,6 +177,9 @@ static bool findInString(const nsDependentCSubstring& aToken,
return false;
}
const nsNavHistory* history = nsNavHistory::GetConstHistoryService();
bool matchDiacritics = history && history->MatchDiacritics();
const_char_iterator tokenStart(aToken.BeginReading()),
tokenEnd(aToken.EndReading()), tokenNext,
sourceStart(aSourceString.BeginReading()),
@ -184,10 +190,15 @@ static bool findInString(const nsDependentCSubstring& aToken,
if (tokenFirstChar == uint32_t(-1)) {
return false;
}
if (!matchDiacritics) {
tokenFirstChar = ToNaked(tokenFirstChar);
}
for (;;) {
// Scan forward to the next viable candidate (if any).
goToNextSearchCandidate(sourceCur, sourceEnd, tokenFirstChar);
if (matchDiacritics) {
// Scan forward to the next viable candidate (if any).
goToNextSearchCandidate(sourceCur, sourceEnd, tokenFirstChar);
}
if (sourceCur == sourceEnd) {
break;
}
@ -200,11 +211,15 @@ static bool findInString(const nsDependentCSubstring& aToken,
if (sourceFirstChar == uint32_t(-1)) {
return false;
}
if (!matchDiacritics) {
sourceFirstChar = ToNaked(sourceFirstChar);
}
if (sourceFirstChar == tokenFirstChar &&
(aBehavior != eFindOnBoundary || sourceCur == sourceStart ||
isOnBoundary(sourceCur)) &&
stringMatch(tokenNext, tokenEnd, sourceNext, sourceEnd)) {
stringMatch(tokenNext, tokenEnd, sourceNext, sourceEnd,
matchDiacritics)) {
return true;
}

View File

@ -54,6 +54,7 @@ using namespace mozilla::places;
// preference ID strings
#define PREF_HISTORY_ENABLED "places.history.enabled"
#define PREF_MATCH_DIACRITICS "places.search.matchDiacritics"
#define PREF_FREC_NUM_VISITS "places.frecency.numVisits"
#define PREF_FREC_NUM_VISITS_DEF 10
@ -140,6 +141,7 @@ using namespace mozilla::places;
#define TOPIC_PROFILE_CHANGE "profile-before-change"
static const char* kObservedPrefs[] = {PREF_HISTORY_ENABLED,
PREF_MATCH_DIACRITICS,
PREF_FREC_NUM_VISITS,
PREF_FREC_FIRST_BUCKET_CUTOFF,
PREF_FREC_SECOND_BUCKET_CUTOFF,
@ -375,6 +377,7 @@ nsNavHistory::nsNavHistory()
mRecentLink(RECENT_EVENTS_INITIAL_CACHE_LENGTH),
mRecentBookmark(RECENT_EVENTS_INITIAL_CACHE_LENGTH),
mHistoryEnabled(true),
mMatchDiacritics(false),
mNumVisitsForFrecency(10),
mDecayFrecencyPendingCount(0),
mTagsFolder(-1),
@ -560,6 +563,7 @@ nsresult nsNavHistory::GetOrCreateIdForPage(nsIURI* aURI, int64_t* _pageId,
void nsNavHistory::LoadPrefs() {
// History preferences.
mHistoryEnabled = Preferences::GetBool(PREF_HISTORY_ENABLED, true);
mMatchDiacritics = Preferences::GetBool(PREF_MATCH_DIACRITICS, false);
// Frecency preferences.
#define FRECENCY_PREF(_prop, _pref) \

View File

@ -176,6 +176,9 @@ class nsNavHistory final : public nsSupportsWeakReference,
// Returns whether history is enabled or not.
bool IsHistoryDisabled() { return !mHistoryEnabled; }
// Returns whether or not diacritics must match in history text searches.
bool MatchDiacritics() const { return mMatchDiacritics; }
// Constants for the columns returned by the above statement.
static const int32_t kGetInfoIndex_PageID;
static const int32_t kGetInfoIndex_URL;
@ -483,6 +486,10 @@ class nsNavHistory final : public nsSupportsWeakReference,
// Will mimic value of the places.history.enabled preference.
bool mHistoryEnabled;
// Whether or not diacritics must match in history text searches.
// Will mimic value of the places.search.matchDiacritics preference.
bool mMatchDiacritics;
// Frecency preferences.
int32_t mNumVisitsForFrecency;
int32_t mFirstBucketCutoffInDays;

View File

@ -0,0 +1,21 @@
/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
let hs = PlacesUtils.history;
/* Test for diacritic-insensitive history search */
add_task(async function test_execute() {
const TEST_URL = "http://example.net/El_%C3%81rea_51";
const SEARCH_TERM = "area";
await PlacesTestUtils.addVisits(uri(TEST_URL));
let query = hs.getNewQuery();
query.searchTerms = SEARCH_TERM;
let options = hs.getNewQueryOptions();
let result = hs.executeQuery(query, options);
result.root.containerOpen = true;
Assert.ok(result.root.childCount == 1);
});

View File

@ -42,6 +42,7 @@ skip-if = appname == "thunderbird"
[test_1085291.js]
[test_1105208.js]
[test_1105866.js]
[test_1606731.js]
[test_annotations.js]
[test_asyncExecuteLegacyQueries.js]
[test_async_transactions.js]