mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-24 05:11:16 +00:00
Bug 1606731 - Ignore diacritics when searching the browser history. r=jfkthame,mak
Differential Revision: https://phabricator.services.mozilla.com/D58584 --HG-- extra : moz-landing-system : lando
This commit is contained in:
parent
fa7b42ae1e
commit
b670bcef3e
@ -886,6 +886,9 @@ pref("accessibility.blockautorefresh", false);
|
||||
// Whether history is enabled or not.
|
||||
pref("places.history.enabled", true);
|
||||
|
||||
// Whether or not diacritics must match in history text searches.
|
||||
pref("places.search.matchDiacritics", false);
|
||||
|
||||
// the (maximum) number of the recent visits to sample
|
||||
// when calculating frecency
|
||||
pref("places.frecency.numVisits", 10);
|
||||
|
@ -296,18 +296,45 @@ var UrlbarUtils = {
|
||||
let hits = new Array(str.length).fill(
|
||||
highlightType == this.HIGHLIGHT.SUGGESTED ? 1 : 0
|
||||
);
|
||||
for (let { lowerCaseValue } of tokens) {
|
||||
for (let { lowerCaseValue: needle } of tokens) {
|
||||
// Ideally we should never hit the empty token case, but just in case
|
||||
// the `needle` check protects us from an infinite loop.
|
||||
for (let index = 0, needle = lowerCaseValue; index >= 0 && needle; ) {
|
||||
if (!needle) {
|
||||
continue;
|
||||
}
|
||||
let index = 0;
|
||||
let found = false;
|
||||
// First try a diacritic-sensitive search.
|
||||
for (;;) {
|
||||
index = str.indexOf(needle, index);
|
||||
if (index >= 0) {
|
||||
hits.fill(
|
||||
highlightType == this.HIGHLIGHT.SUGGESTED ? 0 : 1,
|
||||
index,
|
||||
index + needle.length
|
||||
);
|
||||
index += needle.length;
|
||||
if (index < 0) {
|
||||
break;
|
||||
}
|
||||
hits.fill(
|
||||
highlightType == this.HIGHLIGHT.SUGGESTED ? 0 : 1,
|
||||
index,
|
||||
index + needle.length
|
||||
);
|
||||
index += needle.length;
|
||||
found = true;
|
||||
}
|
||||
// If that fails to match anything, try a (computationally intensive)
|
||||
// diacritic-insensitive search.
|
||||
if (!found) {
|
||||
const options = { sensitivity: "base" };
|
||||
index = 0;
|
||||
while (index < str.length) {
|
||||
let hay = str.substr(index, needle.length);
|
||||
if (needle.localeCompare(hay, [], options) == 0) {
|
||||
hits.fill(
|
||||
highlightType == this.HIGHLIGHT.SUGGESTED ? 0 : 1,
|
||||
index,
|
||||
index + needle.length
|
||||
);
|
||||
index += needle.length;
|
||||
} else {
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -149,6 +149,11 @@ add_task(function test() {
|
||||
phrase: "mozilla mozzarella momo",
|
||||
expected: [[0, 2], [8, 2], [19, 4]],
|
||||
},
|
||||
{
|
||||
tokens: ["resume"],
|
||||
phrase: "résumé",
|
||||
expected: [[0, 6]],
|
||||
},
|
||||
];
|
||||
for (let { tokens, phrase, expected } of tests) {
|
||||
tokens = tokens.map(t => ({
|
||||
|
@ -199,3 +199,48 @@ add_task(async function test_bookmarkBehaviorDisabled_untagged() {
|
||||
await PlacesUtils.history.clear();
|
||||
await PlacesUtils.bookmarks.eraseEverything();
|
||||
});
|
||||
|
||||
add_task(async function test_diacritics() {
|
||||
Services.prefs.setBoolPref(SUGGEST_PREF, false);
|
||||
Services.prefs.setBoolPref(SUGGEST_ENABLED_PREF, false);
|
||||
|
||||
// Enable the bookmark behavior in UnifiedComplete.
|
||||
Services.prefs.setBoolPref("browser.urlbar.suggest.bookmark", true);
|
||||
|
||||
let controller = UrlbarTestUtils.newMockController();
|
||||
let searchString = "agui";
|
||||
let context = createContext(searchString, { isPrivate: false });
|
||||
|
||||
await PlacesUtils.bookmarks.insert({
|
||||
url: "https://bookmark.mozilla.org/%C3%A3gu%C4%A9",
|
||||
title: "Test bookmark with accents in path",
|
||||
parentGuid: PlacesUtils.bookmarks.unfiledGuid,
|
||||
});
|
||||
|
||||
await controller.startQuery(context);
|
||||
|
||||
info(
|
||||
"Results:\n" +
|
||||
context.results.map(m => `${m.title} - ${m.payload.url}`).join("\n")
|
||||
);
|
||||
Assert.equal(
|
||||
context.results.length,
|
||||
2,
|
||||
"Found the expected number of matches"
|
||||
);
|
||||
|
||||
Assert.deepEqual(
|
||||
[UrlbarUtils.RESULT_TYPE.SEARCH, UrlbarUtils.RESULT_TYPE.URL],
|
||||
context.results.map(m => m.type),
|
||||
"Check result types"
|
||||
);
|
||||
|
||||
Assert.deepEqual(
|
||||
[searchString, "Test bookmark with accents in path"],
|
||||
context.results.map(m => m.title),
|
||||
"Check match titles"
|
||||
);
|
||||
|
||||
await PlacesUtils.history.clear();
|
||||
await PlacesUtils.bookmarks.eraseEverything();
|
||||
});
|
||||
|
@ -432,7 +432,8 @@ int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,
|
||||
bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
|
||||
const char* aLeftEnd, const char* aRightEnd,
|
||||
const char** aLeftNext,
|
||||
const char** aRightNext, bool* aErr) {
|
||||
const char** aRightNext, bool* aErr,
|
||||
bool aMatchDiacritics) {
|
||||
NS_ASSERTION(aLeftNext, "Out pointer shouldn't be null.");
|
||||
NS_ASSERTION(aRightNext, "Out pointer shouldn't be null.");
|
||||
NS_ASSERTION(aErr, "Out pointer shouldn't be null.");
|
||||
@ -455,6 +456,11 @@ bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
|
||||
// Can't have an error past this point.
|
||||
*aErr = false;
|
||||
|
||||
if (!aMatchDiacritics) {
|
||||
leftChar = ToNaked(leftChar);
|
||||
rightChar = ToNaked(rightChar);
|
||||
}
|
||||
|
||||
return leftChar == rightChar;
|
||||
}
|
||||
|
||||
|
@ -126,8 +126,9 @@ uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,
|
||||
|
||||
/**
|
||||
* This function determines whether the UTF-8 sequence pointed to by aLeft is
|
||||
* case-insensitively-equal to the UTF-8 sequence pointed to by aRight, as
|
||||
* defined by having matching lower-cased codepoints.
|
||||
* case insensitively equal to the UTF-8 sequence pointed to by aRight (or
|
||||
* optionally, case and diacritic insensitively equal), as defined by having
|
||||
* matching (naked) lower-cased codepoints.
|
||||
*
|
||||
* aLeftEnd marks the first memory location past aLeft that is not part of
|
||||
* aLeft; aRightEnd similarly marks the end of aRight.
|
||||
@ -143,11 +144,15 @@ uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,
|
||||
* false, possibly leaving aLeftNext and aRightNext uninitialized. If the
|
||||
* function returns true, aErr is guaranteed to be false and both aLeftNext and
|
||||
* aRightNext are guaranteed to be initialized.
|
||||
*
|
||||
* If aMatchDiacritics is false, the comparison is neither case-sensitive nor
|
||||
* diacritic-sensitive.
|
||||
*/
|
||||
bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
|
||||
const char* aLeftEnd, const char* aRightEnd,
|
||||
const char** aLeftNext,
|
||||
const char** aRightNext, bool* aErr);
|
||||
const char** aRightNext, bool* aErr,
|
||||
bool aMatchDiacritics = true);
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
|
@ -109,7 +109,7 @@ static MOZ_ALWAYS_INLINE bool isOnBoundary(const_char_iterator aPos) {
|
||||
|
||||
/**
|
||||
* Check whether a token string matches a particular position of a source
|
||||
* string, case insensitively.
|
||||
* string, case insensitively (or optionally, case and diacritic insensitively).
|
||||
*
|
||||
* @param aTokenStart
|
||||
* An iterator pointing to the start of the token string.
|
||||
@ -120,6 +120,8 @@ static MOZ_ALWAYS_INLINE bool isOnBoundary(const_char_iterator aPos) {
|
||||
* matching at.
|
||||
* @param aSourceEnd
|
||||
* An iterator pointing past-the-end of the source string.
|
||||
* @param aMatchDiacritics
|
||||
* Whether or not the match is diacritic-sensitive.
|
||||
*
|
||||
* @return true if the string [aTokenStart, aTokenEnd) matches the start of
|
||||
* the string [aSourceStart, aSourceEnd, false otherwise.
|
||||
@ -127,7 +129,8 @@ static MOZ_ALWAYS_INLINE bool isOnBoundary(const_char_iterator aPos) {
|
||||
static MOZ_ALWAYS_INLINE bool stringMatch(const_char_iterator aTokenStart,
|
||||
const_char_iterator aTokenEnd,
|
||||
const_char_iterator aSourceStart,
|
||||
const_char_iterator aSourceEnd) {
|
||||
const_char_iterator aSourceEnd,
|
||||
bool aMatchDiacritics) {
|
||||
const_char_iterator tokenCur = aTokenStart, sourceCur = aSourceStart;
|
||||
|
||||
while (tokenCur < aTokenEnd) {
|
||||
@ -137,8 +140,8 @@ static MOZ_ALWAYS_INLINE bool stringMatch(const_char_iterator aTokenStart,
|
||||
|
||||
bool error;
|
||||
if (!CaseInsensitiveUTF8CharsEqual(sourceCur, tokenCur, aSourceEnd,
|
||||
aTokenEnd, &sourceCur, &tokenCur,
|
||||
&error)) {
|
||||
aTokenEnd, &sourceCur, &tokenCur, &error,
|
||||
aMatchDiacritics)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -174,6 +177,9 @@ static bool findInString(const nsDependentCSubstring& aToken,
|
||||
return false;
|
||||
}
|
||||
|
||||
const nsNavHistory* history = nsNavHistory::GetConstHistoryService();
|
||||
bool matchDiacritics = history && history->MatchDiacritics();
|
||||
|
||||
const_char_iterator tokenStart(aToken.BeginReading()),
|
||||
tokenEnd(aToken.EndReading()), tokenNext,
|
||||
sourceStart(aSourceString.BeginReading()),
|
||||
@ -184,10 +190,15 @@ static bool findInString(const nsDependentCSubstring& aToken,
|
||||
if (tokenFirstChar == uint32_t(-1)) {
|
||||
return false;
|
||||
}
|
||||
if (!matchDiacritics) {
|
||||
tokenFirstChar = ToNaked(tokenFirstChar);
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
// Scan forward to the next viable candidate (if any).
|
||||
goToNextSearchCandidate(sourceCur, sourceEnd, tokenFirstChar);
|
||||
if (matchDiacritics) {
|
||||
// Scan forward to the next viable candidate (if any).
|
||||
goToNextSearchCandidate(sourceCur, sourceEnd, tokenFirstChar);
|
||||
}
|
||||
if (sourceCur == sourceEnd) {
|
||||
break;
|
||||
}
|
||||
@ -200,11 +211,15 @@ static bool findInString(const nsDependentCSubstring& aToken,
|
||||
if (sourceFirstChar == uint32_t(-1)) {
|
||||
return false;
|
||||
}
|
||||
if (!matchDiacritics) {
|
||||
sourceFirstChar = ToNaked(sourceFirstChar);
|
||||
}
|
||||
|
||||
if (sourceFirstChar == tokenFirstChar &&
|
||||
(aBehavior != eFindOnBoundary || sourceCur == sourceStart ||
|
||||
isOnBoundary(sourceCur)) &&
|
||||
stringMatch(tokenNext, tokenEnd, sourceNext, sourceEnd)) {
|
||||
stringMatch(tokenNext, tokenEnd, sourceNext, sourceEnd,
|
||||
matchDiacritics)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,7 @@ using namespace mozilla::places;
|
||||
|
||||
// preference ID strings
|
||||
#define PREF_HISTORY_ENABLED "places.history.enabled"
|
||||
#define PREF_MATCH_DIACRITICS "places.search.matchDiacritics"
|
||||
|
||||
#define PREF_FREC_NUM_VISITS "places.frecency.numVisits"
|
||||
#define PREF_FREC_NUM_VISITS_DEF 10
|
||||
@ -140,6 +141,7 @@ using namespace mozilla::places;
|
||||
#define TOPIC_PROFILE_CHANGE "profile-before-change"
|
||||
|
||||
static const char* kObservedPrefs[] = {PREF_HISTORY_ENABLED,
|
||||
PREF_MATCH_DIACRITICS,
|
||||
PREF_FREC_NUM_VISITS,
|
||||
PREF_FREC_FIRST_BUCKET_CUTOFF,
|
||||
PREF_FREC_SECOND_BUCKET_CUTOFF,
|
||||
@ -375,6 +377,7 @@ nsNavHistory::nsNavHistory()
|
||||
mRecentLink(RECENT_EVENTS_INITIAL_CACHE_LENGTH),
|
||||
mRecentBookmark(RECENT_EVENTS_INITIAL_CACHE_LENGTH),
|
||||
mHistoryEnabled(true),
|
||||
mMatchDiacritics(false),
|
||||
mNumVisitsForFrecency(10),
|
||||
mDecayFrecencyPendingCount(0),
|
||||
mTagsFolder(-1),
|
||||
@ -560,6 +563,7 @@ nsresult nsNavHistory::GetOrCreateIdForPage(nsIURI* aURI, int64_t* _pageId,
|
||||
void nsNavHistory::LoadPrefs() {
|
||||
// History preferences.
|
||||
mHistoryEnabled = Preferences::GetBool(PREF_HISTORY_ENABLED, true);
|
||||
mMatchDiacritics = Preferences::GetBool(PREF_MATCH_DIACRITICS, false);
|
||||
|
||||
// Frecency preferences.
|
||||
#define FRECENCY_PREF(_prop, _pref) \
|
||||
|
@ -176,6 +176,9 @@ class nsNavHistory final : public nsSupportsWeakReference,
|
||||
// Returns whether history is enabled or not.
|
||||
bool IsHistoryDisabled() { return !mHistoryEnabled; }
|
||||
|
||||
// Returns whether or not diacritics must match in history text searches.
|
||||
bool MatchDiacritics() const { return mMatchDiacritics; }
|
||||
|
||||
// Constants for the columns returned by the above statement.
|
||||
static const int32_t kGetInfoIndex_PageID;
|
||||
static const int32_t kGetInfoIndex_URL;
|
||||
@ -483,6 +486,10 @@ class nsNavHistory final : public nsSupportsWeakReference,
|
||||
// Will mimic value of the places.history.enabled preference.
|
||||
bool mHistoryEnabled;
|
||||
|
||||
// Whether or not diacritics must match in history text searches.
|
||||
// Will mimic value of the places.search.matchDiacritics preference.
|
||||
bool mMatchDiacritics;
|
||||
|
||||
// Frecency preferences.
|
||||
int32_t mNumVisitsForFrecency;
|
||||
int32_t mFirstBucketCutoffInDays;
|
||||
|
21
toolkit/components/places/tests/unit/test_1606731.js
Normal file
21
toolkit/components/places/tests/unit/test_1606731.js
Normal file
@ -0,0 +1,21 @@
|
||||
/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
let hs = PlacesUtils.history;
|
||||
|
||||
/* Test for diacritic-insensitive history search */
|
||||
|
||||
add_task(async function test_execute() {
|
||||
const TEST_URL = "http://example.net/El_%C3%81rea_51";
|
||||
const SEARCH_TERM = "area";
|
||||
await PlacesTestUtils.addVisits(uri(TEST_URL));
|
||||
let query = hs.getNewQuery();
|
||||
query.searchTerms = SEARCH_TERM;
|
||||
let options = hs.getNewQueryOptions();
|
||||
let result = hs.executeQuery(query, options);
|
||||
result.root.containerOpen = true;
|
||||
Assert.ok(result.root.childCount == 1);
|
||||
});
|
@ -42,6 +42,7 @@ skip-if = appname == "thunderbird"
|
||||
[test_1085291.js]
|
||||
[test_1105208.js]
|
||||
[test_1105866.js]
|
||||
[test_1606731.js]
|
||||
[test_annotations.js]
|
||||
[test_asyncExecuteLegacyQueries.js]
|
||||
[test_async_transactions.js]
|
||||
|
Loading…
Reference in New Issue
Block a user