mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-27 23:02:20 +00:00
Bug 1614868 - Ignore combining diacritic characters in history search. r=jfkthame,mak
IsCombiningDiacritic(-1) returns false, so there is no need to specially handle -1 in GetLowerUTF8Codepoint_inline. It is no longer necessary for GetNaked to check whether a character is a combining character because all callers now skip combining diacritics and GetNaked already makes sure that decomposition removes a diacritic and not something else. Differential Revision: https://phabricator.services.mozilla.com/D62533 --HG-- extra : moz-landing-system : lando
This commit is contained in:
parent
df11faa804
commit
676b1a533d
@ -212,7 +212,7 @@ add_task(async function test_diacritics() {
|
||||
let context = createContext(searchString, { isPrivate: false });
|
||||
|
||||
await PlacesUtils.bookmarks.insert({
|
||||
url: "https://bookmark.mozilla.org/%C3%A3gu%C4%A9",
|
||||
url: "https://bookmark.mozilla.org/%C3%A3g%CC%83u%C4%A9",
|
||||
title: "Test bookmark with accents in path",
|
||||
parentGuid: PlacesUtils.bookmarks.unfiledGuid,
|
||||
});
|
||||
|
@ -434,6 +434,23 @@ int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static MOZ_ALWAYS_INLINE uint32_t
|
||||
GetLowerUTF8Codepoint_inline(const char* aStr, const char* aEnd,
|
||||
const char** aNext, bool aMatchDiacritics) {
|
||||
uint32_t c;
|
||||
for (;;) {
|
||||
c = GetLowerUTF8Codepoint_inline(aStr, aEnd, aNext);
|
||||
if (aMatchDiacritics) {
|
||||
break;
|
||||
}
|
||||
if (!mozilla::unicode::IsCombiningDiacritic(c)) {
|
||||
break;
|
||||
}
|
||||
aStr = *aNext;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
|
||||
const char* aLeftEnd, const char* aRightEnd,
|
||||
const char** aLeftNext,
|
||||
@ -445,14 +462,15 @@ bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
|
||||
NS_ASSERTION(aLeft < aLeftEnd, "aLeft must be less than aLeftEnd.");
|
||||
NS_ASSERTION(aRight < aRightEnd, "aRight must be less than aRightEnd.");
|
||||
|
||||
uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, aLeftEnd, aLeftNext);
|
||||
uint32_t leftChar = GetLowerUTF8Codepoint_inline(aLeft, aLeftEnd, aLeftNext,
|
||||
aMatchDiacritics);
|
||||
if (MOZ_UNLIKELY(leftChar == uint32_t(-1))) {
|
||||
*aErr = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t rightChar =
|
||||
GetLowerUTF8Codepoint_inline(aRight, aRightEnd, aRightNext);
|
||||
uint32_t rightChar = GetLowerUTF8Codepoint_inline(
|
||||
aRight, aRightEnd, aRightNext, aMatchDiacritics);
|
||||
if (MOZ_UNLIKELY(rightChar == uint32_t(-1))) {
|
||||
*aErr = true;
|
||||
return false;
|
||||
|
@ -315,6 +315,9 @@ uint32_t GetNaked(uint32_t aCh) {
|
||||
static const UNormalizer2* normalizer;
|
||||
static HashMap<uint32_t, uint32_t> nakedCharCache;
|
||||
|
||||
NS_ASSERTION(!IsCombiningDiacritic(aCh),
|
||||
"This character needs to be skipped");
|
||||
|
||||
HashMap<uint32_t, uint32_t>::Ptr entry = nakedCharCache.lookup(aCh);
|
||||
if (entry.found()) {
|
||||
return entry->value();
|
||||
@ -340,13 +343,6 @@ uint32_t GetNaked(uint32_t aCh) {
|
||||
return aCh;
|
||||
}
|
||||
|
||||
if (u_getIntPropertyValue(aCh, UCHAR_GENERAL_CATEGORY) & U_GC_M_MASK) {
|
||||
// The character is itself a combining character, and we don't want to use
|
||||
// its decomposition into multiple combining characters.
|
||||
baseChar = aCh;
|
||||
goto cache;
|
||||
}
|
||||
|
||||
if (NS_IS_HIGH_SURROGATE(decomposition[0])) {
|
||||
baseChar = SURROGATE_TO_UCS4(decomposition[0], decomposition[1]);
|
||||
combiners = decomposition + 2;
|
||||
|
Loading…
Reference in New Issue
Block a user