Bug 1614868 - Ignore combining diacritic characters in history search. r=jfkthame,mak

IsCombiningDiacritic(-1) returns false, so there is no need to specially
handle -1 in GetLowerUTF8Codepoint_inline.

It is no longer necessary for GetNaked to check whether a character is a
combining character because all callers now skip combining diacritics
and GetNaked already makes sure that decomposition removes a diacritic
and not something else.

Differential Revision: https://phabricator.services.mozilla.com/D62533

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Alex Henrie 2020-02-17 20:42:04 +00:00
parent df11faa804
commit 676b1a533d
3 changed files with 25 additions and 11 deletions

View File

@ -212,7 +212,7 @@ add_task(async function test_diacritics() {
let context = createContext(searchString, { isPrivate: false });
await PlacesUtils.bookmarks.insert({
url: "https://bookmark.mozilla.org/%C3%A3gu%C4%A9",
url: "https://bookmark.mozilla.org/%C3%A3g%CC%83u%C4%A9",
title: "Test bookmark with accents in path",
parentGuid: PlacesUtils.bookmarks.unfiledGuid,
});

View File

@ -434,6 +434,23 @@ int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,
return 0;
}
static MOZ_ALWAYS_INLINE uint32_t
GetLowerUTF8Codepoint_inline(const char* aStr, const char* aEnd,
const char** aNext, bool aMatchDiacritics) {
uint32_t c;
for (;;) {
c = GetLowerUTF8Codepoint_inline(aStr, aEnd, aNext);
if (aMatchDiacritics) {
break;
}
if (!mozilla::unicode::IsCombiningDiacritic(c)) {
break;
}
aStr = *aNext;
}
return c;
}
bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
const char* aLeftEnd, const char* aRightEnd,
const char** aLeftNext,
@ -445,14 +462,15 @@ bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
NS_ASSERTION(aLeft < aLeftEnd, "aLeft must be less than aLeftEnd.");
NS_ASSERTION(aRight < aRightEnd, "aRight must be less than aRightEnd.");
uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, aLeftEnd, aLeftNext);
uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, aLeftEnd, aLeftNext,
aMatchDiacritics);
if (MOZ_UNLIKELY(leftChar == uint32_t(-1))) {
*aErr = true;
return false;
}
uint32_t rightChar =
GetLowerUTF8Codepoint_inline(aRight, aRightEnd, aRightNext);
uint32_t rightChar = GetLowerUTF8Codepoint_inline(
aRight, aRightEnd, aRightNext, aMatchDiacritics);
if (MOZ_UNLIKELY(rightChar == uint32_t(-1))) {
*aErr = true;
return false;

View File

@ -315,6 +315,9 @@ uint32_t GetNaked(uint32_t aCh) {
static const UNormalizer2* normalizer;
static HashMap<uint32_t, uint32_t> nakedCharCache;
NS_ASSERTION(!IsCombiningDiacritic(aCh),
"This character needs to be skipped");
HashMap<uint32_t, uint32_t>::Ptr entry = nakedCharCache.lookup(aCh);
if (entry.found()) {
return entry->value();
@ -340,13 +343,6 @@ uint32_t GetNaked(uint32_t aCh) {
return aCh;
}
if (u_getIntPropertyValue(aCh, UCHAR_GENERAL_CATEGORY) & U_GC_M_MASK) {
// The character is itself a combining character, and we don't want to use
// its decomposition into multiple combining characters.
baseChar = aCh;
goto cache;
}
if (NS_IS_HIGH_SURROGATE(decomposition[0])) {
baseChar = SURROGATE_TO_UCS4(decomposition[0], decomposition[1]);
combiners = decomposition + 2;