2017-08-12 16:38:52 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2005, 2007, 2010, 2013, 2016 Apple Inc. All rights reserved.
|
|
|
|
* Copyright (C) 2011 Google Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Library General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Library General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Library General Public License
|
|
|
|
* along with this library; see the file COPYING.LIB. If not, write to
|
|
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
|
|
* Boston, MA 02110-1301, USA.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <unicode/ubrk.h>
|
|
|
|
#include <wtf/ASCIICType.h>
|
|
|
|
#include <wtf/StdLibExtras.h>
|
|
|
|
#include <wtf/text/TextBreakIterator.h>
|
|
|
|
#include <wtf/unicode/CharacterNames.h>
|
|
|
|
|
|
|
|
namespace WebCore {
|
|
|
|
|
|
|
|
static const UChar lineBreakTableFirstCharacter = '!';
|
|
|
|
static const UChar lineBreakTableLastCharacter = 127;
|
|
|
|
static const unsigned lineBreakTableColumnCount = (lineBreakTableLastCharacter - lineBreakTableFirstCharacter) / 8 + 1;
|
|
|
|
|
|
|
|
WEBCORE_EXPORT extern const unsigned char lineBreakTable[][lineBreakTableColumnCount];
|
|
|
|
|
|
|
|
enum class NonBreakingSpaceBehavior {
|
|
|
|
IgnoreNonBreakingSpace,
|
|
|
|
TreatNonBreakingSpaceAsBreak,
|
|
|
|
};
|
|
|
|
|
2018-08-04 14:51:43 +00:00
|
|
|
enum class CanUseShortcut {
|
|
|
|
Yes,
|
|
|
|
No
|
|
|
|
};
|
|
|
|
|
2017-08-12 16:38:52 +00:00
|
|
|
template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
|
|
|
|
static inline bool isBreakableSpace(UChar character)
|
|
|
|
{
|
|
|
|
switch (character) {
|
|
|
|
case ' ':
|
|
|
|
case '\n':
|
|
|
|
case '\t':
|
|
|
|
return true;
|
|
|
|
case noBreakSpace:
|
|
|
|
return nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool shouldBreakAfter(UChar lastCharacter, UChar character, UChar nextCharacter)
|
|
|
|
{
|
|
|
|
// Don't allow line breaking between '-' and a digit if the '-' may mean a minus sign in the context,
|
|
|
|
// while allow breaking in 'ABCD-1234' and '1234-5678' which may be in long URLs.
|
|
|
|
if (character == '-' && isASCIIDigit(nextCharacter))
|
|
|
|
return isASCIIAlphanumeric(lastCharacter);
|
|
|
|
|
|
|
|
// If both ch and nextCh are ASCII characters, use a lookup table for enhanced speed and for compatibility
|
|
|
|
// with other browsers (see comments for asciiLineBreakTable for details).
|
|
|
|
if (character >= lineBreakTableFirstCharacter && character <= lineBreakTableLastCharacter && nextCharacter >= lineBreakTableFirstCharacter && nextCharacter <= lineBreakTableLastCharacter) {
|
|
|
|
const unsigned char* tableRow = lineBreakTable[character - lineBreakTableFirstCharacter];
|
|
|
|
unsigned nextCharacterIndex = nextCharacter - lineBreakTableFirstCharacter;
|
|
|
|
return tableRow[nextCharacterIndex / 8] & (1 << (nextCharacterIndex % 8));
|
|
|
|
}
|
|
|
|
// Otherwise defer to the Unicode algorithm by returning false.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
|
|
|
|
inline bool needsLineBreakIterator(UChar character)
|
|
|
|
{
|
|
|
|
if (nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak)
|
|
|
|
return character > lineBreakTableLastCharacter;
|
|
|
|
return character > lineBreakTableLastCharacter && character != noBreakSpace;
|
|
|
|
}
|
|
|
|
|
|
|
|
// When in non-loose mode, we can use the ASCII shortcut table.
|
2018-08-04 14:51:43 +00:00
|
|
|
template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior, CanUseShortcut canUseShortcut>
|
|
|
|
inline unsigned nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition)
|
2017-08-12 16:38:52 +00:00
|
|
|
{
|
2023-01-25 00:42:21 +00:00
|
|
|
Optional<unsigned> nextBreak;
|
2017-08-12 16:38:52 +00:00
|
|
|
|
|
|
|
CharacterType lastLastCharacter = startPosition > 1 ? string[startPosition - 2] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter());
|
|
|
|
CharacterType lastCharacter = startPosition > 0 ? string[startPosition - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
|
|
|
|
unsigned priorContextLength = lazyBreakIterator.priorContextLength();
|
|
|
|
for (unsigned i = startPosition; i < length; i++) {
|
|
|
|
CharacterType character = string[i];
|
|
|
|
|
2018-08-04 14:51:43 +00:00
|
|
|
if (isBreakableSpace<nonBreakingSpaceBehavior>(character) || (canUseShortcut == CanUseShortcut::Yes && shouldBreakAfter(lastLastCharacter, lastCharacter, character)))
|
2017-08-12 16:38:52 +00:00
|
|
|
return i;
|
|
|
|
|
2018-08-04 14:51:43 +00:00
|
|
|
if (canUseShortcut == CanUseShortcut::No || needsLineBreakIterator<nonBreakingSpaceBehavior>(character) || needsLineBreakIterator<nonBreakingSpaceBehavior>(lastCharacter)) {
|
2017-08-12 16:38:52 +00:00
|
|
|
if (!nextBreak || nextBreak.value() < i) {
|
|
|
|
// Don't break if positioned at start of primary context and there is no prior context.
|
|
|
|
if (i || priorContextLength) {
|
|
|
|
UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
|
|
|
|
if (breakIterator) {
|
|
|
|
int candidate = ubrk_following(breakIterator, i - 1 + priorContextLength);
|
|
|
|
if (candidate == UBRK_DONE)
|
2023-01-25 00:42:21 +00:00
|
|
|
nextBreak = WTF::nullopt;
|
2017-08-12 16:38:52 +00:00
|
|
|
else {
|
|
|
|
unsigned result = candidate;
|
|
|
|
ASSERT(result >= priorContextLength);
|
|
|
|
nextBreak = result - priorContextLength;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i == nextBreak && !isBreakableSpace<nonBreakingSpaceBehavior>(lastCharacter))
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
lastLastCharacter = lastCharacter;
|
|
|
|
lastCharacter = character;
|
|
|
|
}
|
|
|
|
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
|
|
|
|
inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition)
|
|
|
|
{
|
|
|
|
for (unsigned i = startPosition; i < length; i++) {
|
|
|
|
if (isBreakableSpace<nonBreakingSpaceBehavior>(string[i]))
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
|
|
|
|
{
|
|
|
|
auto stringView = lazyBreakIterator.stringView();
|
|
|
|
if (stringView.is8Bit())
|
|
|
|
return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters8(), stringView.length(), startPosition);
|
|
|
|
return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters16(), stringView.length(), startPosition);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, unsigned startPosition)
|
|
|
|
{
|
|
|
|
auto stringView = iterator.stringView();
|
|
|
|
if (stringView.is8Bit())
|
|
|
|
return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters8(), stringView.length(), startPosition);
|
|
|
|
return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters16(), stringView.length(), startPosition);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline unsigned nextBreakablePosition(LazyLineBreakIterator& iterator, unsigned startPosition)
|
|
|
|
{
|
|
|
|
auto stringView = iterator.stringView();
|
|
|
|
if (stringView.is8Bit())
|
2018-08-04 14:51:43 +00:00
|
|
|
return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters8(), stringView.length(), startPosition);
|
|
|
|
return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters16(), stringView.length(), startPosition);
|
2017-08-12 16:38:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
inline unsigned nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
|
|
|
|
{
|
|
|
|
auto stringView = lazyBreakIterator.stringView();
|
|
|
|
if (stringView.is8Bit())
|
2018-08-04 14:51:43 +00:00
|
|
|
return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
|
|
|
|
return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
|
2017-08-12 16:38:52 +00:00
|
|
|
}
|
|
|
|
|
2018-08-04 14:51:43 +00:00
|
|
|
inline unsigned nextBreakablePositionWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
|
2017-08-12 16:38:52 +00:00
|
|
|
{
|
|
|
|
auto stringView = lazyBreakIterator.stringView();
|
|
|
|
if (stringView.is8Bit())
|
2018-08-04 14:51:43 +00:00
|
|
|
return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
|
|
|
|
return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
|
2017-08-12 16:38:52 +00:00
|
|
|
}
|
|
|
|
|
2018-08-04 14:51:43 +00:00
|
|
|
inline unsigned nextBreakablePositionIgnoringNBSPWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
|
2017-08-12 16:38:52 +00:00
|
|
|
{
|
|
|
|
auto stringView = lazyBreakIterator.stringView();
|
|
|
|
if (stringView.is8Bit())
|
2018-08-04 14:51:43 +00:00
|
|
|
return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
|
|
|
|
return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
|
2017-08-12 16:38:52 +00:00
|
|
|
}
|
|
|
|
|
2023-01-25 00:42:21 +00:00
|
|
|
inline unsigned nextBreakablePositionBreakCharacter(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
|
|
|
|
{
|
|
|
|
auto stringView = lazyBreakIterator.stringView();
|
|
|
|
ASSERT(startPosition <= stringView.length());
|
|
|
|
// FIXME: Can/Should we implement this using a Shared Iterator (performance issue)
|
|
|
|
// https://bugs.webkit.org/show_bug.cgi?id=197876
|
|
|
|
NonSharedCharacterBreakIterator iterator(stringView);
|
|
|
|
Optional<unsigned> next = ubrk_following(iterator, startPosition);
|
|
|
|
return next.valueOr(stringView.length());
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition, Optional<unsigned>& nextBreakable, bool breakNBSP, bool canUseShortcut, bool keepAllWords, bool breakAnywhere)
|
2017-08-12 16:38:52 +00:00
|
|
|
{
|
|
|
|
if (nextBreakable && nextBreakable.value() >= startPosition)
|
|
|
|
return startPosition == nextBreakable;
|
|
|
|
|
2023-01-25 00:42:21 +00:00
|
|
|
if (breakAnywhere)
|
|
|
|
nextBreakable = nextBreakablePositionBreakCharacter(lazyBreakIterator, startPosition);
|
|
|
|
else if (keepAllWords) {
|
2017-08-12 16:38:52 +00:00
|
|
|
if (breakNBSP)
|
|
|
|
nextBreakable = nextBreakablePositionKeepingAllWords(lazyBreakIterator, startPosition);
|
|
|
|
else
|
|
|
|
nextBreakable = nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, startPosition);
|
2018-08-04 14:51:43 +00:00
|
|
|
} else if (!canUseShortcut) {
|
2017-08-12 16:38:52 +00:00
|
|
|
if (breakNBSP)
|
2018-08-04 14:51:43 +00:00
|
|
|
nextBreakable = nextBreakablePositionWithoutShortcut(lazyBreakIterator, startPosition);
|
2017-08-12 16:38:52 +00:00
|
|
|
else
|
2018-08-04 14:51:43 +00:00
|
|
|
nextBreakable = nextBreakablePositionIgnoringNBSPWithoutShortcut(lazyBreakIterator, startPosition);
|
2017-08-12 16:38:52 +00:00
|
|
|
} else {
|
|
|
|
if (breakNBSP)
|
|
|
|
nextBreakable = nextBreakablePosition(lazyBreakIterator, startPosition);
|
|
|
|
else
|
|
|
|
nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, startPosition);
|
|
|
|
}
|
|
|
|
return startPosition == nextBreakable;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace WebCore
|