/* * Copyright (C) 2005, 2007, 2010, 2013, 2016 Apple Inc. All rights reserved. * Copyright (C) 2011 Google Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #pragma once #include #include #include #include #include namespace WebCore { static const UChar lineBreakTableFirstCharacter = '!'; static const UChar lineBreakTableLastCharacter = 127; static const unsigned lineBreakTableColumnCount = (lineBreakTableLastCharacter - lineBreakTableFirstCharacter) / 8 + 1; WEBCORE_EXPORT extern const unsigned char lineBreakTable[][lineBreakTableColumnCount]; enum class NonBreakingSpaceBehavior { IgnoreNonBreakingSpace, TreatNonBreakingSpaceAsBreak, }; enum class CanUseShortcut { Yes, No }; template static inline bool isBreakableSpace(UChar character) { switch (character) { case ' ': case '\n': case '\t': return true; case noBreakSpace: return nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak; default: return false; } } inline bool shouldBreakAfter(UChar lastCharacter, UChar character, UChar nextCharacter) { // Don't allow line breaking between '-' and a digit if the '-' may mean a minus sign in the context, // while allow breaking in 'ABCD-1234' and '1234-5678' which may be in long URLs. if (character == '-' && isASCIIDigit(nextCharacter)) return isASCIIAlphanumeric(lastCharacter); // If both ch and nextCh are ASCII characters, use a lookup table for enhanced speed and for compatibility // with other browsers (see comments for asciiLineBreakTable for details). if (character >= lineBreakTableFirstCharacter && character <= lineBreakTableLastCharacter && nextCharacter >= lineBreakTableFirstCharacter && nextCharacter <= lineBreakTableLastCharacter) { const unsigned char* tableRow = lineBreakTable[character - lineBreakTableFirstCharacter]; unsigned nextCharacterIndex = nextCharacter - lineBreakTableFirstCharacter; return tableRow[nextCharacterIndex / 8] & (1 << (nextCharacterIndex % 8)); } // Otherwise defer to the Unicode algorithm by returning false. return false; } template inline bool needsLineBreakIterator(UChar character) { if (nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak) return character > lineBreakTableLastCharacter; return character > lineBreakTableLastCharacter && character != noBreakSpace; } // When in non-loose mode, we can use the ASCII shortcut table. template inline unsigned nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition) { Optional nextBreak; CharacterType lastLastCharacter = startPosition > 1 ? string[startPosition - 2] : static_cast(lazyBreakIterator.secondToLastCharacter()); CharacterType lastCharacter = startPosition > 0 ? string[startPosition - 1] : static_cast(lazyBreakIterator.lastCharacter()); unsigned priorContextLength = lazyBreakIterator.priorContextLength(); for (unsigned i = startPosition; i < length; i++) { CharacterType character = string[i]; if (isBreakableSpace(character) || (canUseShortcut == CanUseShortcut::Yes && shouldBreakAfter(lastLastCharacter, lastCharacter, character))) return i; if (canUseShortcut == CanUseShortcut::No || needsLineBreakIterator(character) || needsLineBreakIterator(lastCharacter)) { if (!nextBreak || nextBreak.value() < i) { // Don't break if positioned at start of primary context and there is no prior context. if (i || priorContextLength) { UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength); if (breakIterator) { int candidate = ubrk_following(breakIterator, i - 1 + priorContextLength); if (candidate == UBRK_DONE) nextBreak = WTF::nullopt; else { unsigned result = candidate; ASSERT(result >= priorContextLength); nextBreak = result - priorContextLength; } } } } if (i == nextBreak && !isBreakableSpace(lastCharacter)) return i; } lastLastCharacter = lastCharacter; lastCharacter = character; } return length; } template inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition) { for (unsigned i = startPosition; i < length; i++) { if (isBreakableSpace(string[i])) return i; } return length; } inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition) { auto stringView = lazyBreakIterator.stringView(); if (stringView.is8Bit()) return nextBreakablePositionKeepingAllWords(stringView.characters8(), stringView.length(), startPosition); return nextBreakablePositionKeepingAllWords(stringView.characters16(), stringView.length(), startPosition); } inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, unsigned startPosition) { auto stringView = iterator.stringView(); if (stringView.is8Bit()) return nextBreakablePositionKeepingAllWords(stringView.characters8(), stringView.length(), startPosition); return nextBreakablePositionKeepingAllWords(stringView.characters16(), stringView.length(), startPosition); } inline unsigned nextBreakablePosition(LazyLineBreakIterator& iterator, unsigned startPosition) { auto stringView = iterator.stringView(); if (stringView.is8Bit()) return nextBreakablePosition(iterator, stringView.characters8(), stringView.length(), startPosition); return nextBreakablePosition(iterator, stringView.characters16(), stringView.length(), startPosition); } inline unsigned nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition) { auto stringView = lazyBreakIterator.stringView(); if (stringView.is8Bit()) return nextBreakablePosition(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition); return nextBreakablePosition(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition); } inline unsigned nextBreakablePositionWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition) { auto stringView = lazyBreakIterator.stringView(); if (stringView.is8Bit()) return nextBreakablePosition(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition); return nextBreakablePosition(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition); } inline unsigned nextBreakablePositionIgnoringNBSPWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition) { auto stringView = lazyBreakIterator.stringView(); if (stringView.is8Bit()) return nextBreakablePosition(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition); return nextBreakablePosition(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition); } inline unsigned nextBreakablePositionBreakCharacter(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition) { auto stringView = lazyBreakIterator.stringView(); ASSERT(startPosition <= stringView.length()); // FIXME: Can/Should we implement this using a Shared Iterator (performance issue) // https://bugs.webkit.org/show_bug.cgi?id=197876 NonSharedCharacterBreakIterator iterator(stringView); Optional next = ubrk_following(iterator, startPosition); return next.valueOr(stringView.length()); } inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition, Optional& nextBreakable, bool breakNBSP, bool canUseShortcut, bool keepAllWords, bool breakAnywhere) { if (nextBreakable && nextBreakable.value() >= startPosition) return startPosition == nextBreakable; if (breakAnywhere) nextBreakable = nextBreakablePositionBreakCharacter(lazyBreakIterator, startPosition); else if (keepAllWords) { if (breakNBSP) nextBreakable = nextBreakablePositionKeepingAllWords(lazyBreakIterator, startPosition); else nextBreakable = nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, startPosition); } else if (!canUseShortcut) { if (breakNBSP) nextBreakable = nextBreakablePositionWithoutShortcut(lazyBreakIterator, startPosition); else nextBreakable = nextBreakablePositionIgnoringNBSPWithoutShortcut(lazyBreakIterator, startPosition); } else { if (breakNBSP) nextBreakable = nextBreakablePosition(lazyBreakIterator, startPosition); else nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, startPosition); } return startPosition == nextBreakable; } } // namespace WebCore