mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-27 12:50:09 +00:00

The motivation of this patch is to remove rarely used API in WordBreaker. WordBreaker::BreakInBetween() is used only in nsFind::BreakInBetween() in production, and it can be replaced by Next(). If the user wants to know whether there is a word break between two strings such as the use cases in gtest, joining the two strings and passing the result to Next() is the preferred way. Note: I delete the buggy forward word search algorithm in TestFindWordBreakFromPosition() because from the test expectations, it doesn't expect to continue the search in previous fragments. Also, the buggy part comes from the following code, which had undefined behavior before Part 4, and does nothing after Part 4. ``` wbk->FindWord(prevFragText.get(), prevFragText.Length(), prevFragText.Length()); ``` Differential Revision: https://phabricator.services.mozilla.com/D125151
65 lines
1.9 KiB
C++
65 lines
1.9 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
#ifndef mozilla_intl_WordBreaker_h__
|
|
#define mozilla_intl_WordBreaker_h__
|
|
|
|
#include "nscore.h"
|
|
#include "nsISupports.h"
|
|
|
|
#define NS_WORDBREAKER_NEED_MORE_TEXT -1
|
|
|
|
namespace mozilla {
|
|
namespace intl {
|
|
|
|
struct WordRange {
|
|
uint32_t mBegin;
|
|
uint32_t mEnd;
|
|
};
|
|
|
|
class WordBreaker {
|
|
public:
|
|
NS_INLINE_DECL_REFCOUNTING(WordBreaker)
|
|
|
|
static already_AddRefed<WordBreaker> Create();
|
|
|
|
// Find the word boundary by scanning forward and backward from aPos.
|
|
//
|
|
// @return WordRange where mBegin equals to the offset to first character in
|
|
// the word and mEnd equals to the offset to the last character plus 1. mEnd
|
|
// can be aLen if the desired word is at the end of aText.
|
|
//
|
|
// If aPos is already at the end of aText or beyond, both mBegin and mEnd
|
|
// equals to aLen.
|
|
WordRange FindWord(const char16_t* aText, uint32_t aLen, uint32_t aPos);
|
|
|
|
// Find the next word break opportunity starting from aPos + 1. It can return
|
|
// aLen if there's no break opportunity between [aPos + 1, aLen - 1].
|
|
//
|
|
// If aPos is already at the end of aText or beyond, i.e. aPos >= aLen, return
|
|
// NS_WORDBREAKER_NEED_MORE_TEXT.
|
|
int32_t Next(const char16_t* aText, uint32_t aLen, uint32_t aPos);
|
|
|
|
private:
|
|
~WordBreaker() = default;
|
|
|
|
enum WordBreakClass : uint8_t {
|
|
kWbClassSpace = 0,
|
|
kWbClassAlphaLetter,
|
|
kWbClassPunct,
|
|
kWbClassHanLetter,
|
|
kWbClassKatakanaLetter,
|
|
kWbClassHiraganaLetter,
|
|
kWbClassHWKatakanaLetter,
|
|
kWbClassScriptioContinua
|
|
};
|
|
|
|
static WordBreakClass GetClass(char16_t aChar);
|
|
};
|
|
|
|
} // namespace intl
|
|
} // namespace mozilla
|
|
|
|
#endif /* mozilla_intl_WordBreaker_h__ */
|