mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-02 15:15:23 +00:00
ac77c942fa
text in runs and changed the text transformer code to do the buffering instead. It was already copying the transformed text into its internal buffer anyway, so this saves the extra copy
1144 lines
32 KiB
C++
1144 lines
32 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is Mozilla Communicator client code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape Communications
|
|
* Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
#include "nsCOMPtr.h"
|
|
#include "nsTextTransformer.h"
|
|
#include "nsIContent.h"
|
|
#include "nsIFrame.h"
|
|
#include "nsIStyleContext.h"
|
|
#include "nsITextContent.h"
|
|
#include "nsStyleConsts.h"
|
|
#include "nsILineBreaker.h"
|
|
#include "nsIWordBreaker.h"
|
|
#include "nsHTMLIIDs.h"
|
|
#include "nsIServiceManager.h"
|
|
#include "nsUnicharUtilCIID.h"
|
|
#include "nsICaseConversion.h"
|
|
#include "prenv.h"
|
|
|
|
nsAutoTextBuffer::nsAutoTextBuffer()
|
|
: mBuffer(mAutoBuffer),
|
|
mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE)
|
|
{
|
|
}
|
|
|
|
nsAutoTextBuffer::~nsAutoTextBuffer()
|
|
{
|
|
if (mBuffer && (mBuffer != mAutoBuffer)) {
|
|
delete [] mBuffer;
|
|
}
|
|
}
|
|
|
|
nsresult
|
|
nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead)
|
|
{
|
|
PRInt32 newSize = mBufferLen * 2;
|
|
if (newSize < mBufferLen + aAtLeast) {
|
|
newSize = mBufferLen + aAtLeast + 100;
|
|
}
|
|
return GrowTo(newSize, aCopyToHead);
|
|
}
|
|
|
|
nsresult
|
|
nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead)
|
|
{
|
|
if (aNewSize > mBufferLen) {
|
|
PRUnichar* newBuffer = new PRUnichar[aNewSize];
|
|
if (!newBuffer) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
nsCRT::memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen],
|
|
mBuffer, sizeof(PRUnichar) * mBufferLen);
|
|
if (mBuffer != mAutoBuffer) {
|
|
delete [] mBuffer;
|
|
}
|
|
mBuffer = newBuffer;
|
|
mBufferLen = aNewSize;
|
|
}
|
|
return NS_OK;
|
|
}
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
static NS_DEFINE_IID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
|
|
static NS_DEFINE_IID(kICaseConversionIID, NS_ICASECONVERSION_IID);
|
|
|
|
static nsICaseConversion* gCaseConv = nsnull;
|
|
|
|
nsresult
|
|
nsTextTransformer::Initialize()
|
|
{
|
|
nsresult res = NS_OK;
|
|
if (!gCaseConv) {
|
|
res = nsServiceManager::GetService(kUnicharUtilCID, kICaseConversionIID,
|
|
(nsISupports**)&gCaseConv);
|
|
NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil");
|
|
NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil");
|
|
}
|
|
return res;
|
|
}
|
|
|
|
void
|
|
nsTextTransformer::Shutdown()
|
|
{
|
|
if (gCaseConv) {
|
|
nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv);
|
|
gCaseConv = nsnull;
|
|
}
|
|
}
|
|
|
|
// For now, we have only a single character to strip out. If we get
|
|
// any more, change this to use a bitset to lookup into.
|
|
#define IS_DISCARDED(_ch) \
|
|
((_ch) == CH_SHY)
|
|
|
|
#define MAX_UNIBYTE 127
|
|
|
|
MOZ_DECL_CTOR_COUNTER(nsTextTransformer);
|
|
|
|
nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker,
|
|
nsIWordBreaker* aWordBreaker)
|
|
: mHasMultibyte(PR_FALSE),
|
|
mFrag(nsnull),
|
|
mOffset(0),
|
|
mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE),
|
|
mMode(eNormal),
|
|
mLineBreaker(aLineBreaker),
|
|
mWordBreaker(aWordBreaker),
|
|
mBufferPos(0)
|
|
{
|
|
MOZ_COUNT_CTOR(nsTextTransformer);
|
|
|
|
#ifdef DEBUG
|
|
static PRBool firstTime = PR_TRUE;
|
|
if (firstTime) {
|
|
firstTime = PR_FALSE;
|
|
SelfTest(aLineBreaker, aWordBreaker);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
nsTextTransformer::~nsTextTransformer()
|
|
{
|
|
MOZ_COUNT_DTOR(nsTextTransformer);
|
|
}
|
|
|
|
nsresult
|
|
nsTextTransformer::Init(nsIFrame* aFrame,
|
|
nsIContent* aContent,
|
|
PRInt32 aStartingOffset)
|
|
{
|
|
// Get the contents text content
|
|
nsresult rv;
|
|
nsCOMPtr<nsITextContent> tc = do_QueryInterface(aContent, &rv);
|
|
if (tc.get()) {
|
|
tc->GetText(&mFrag);
|
|
|
|
// Sanitize aStartingOffset
|
|
if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) {
|
|
aStartingOffset = 0;
|
|
}
|
|
else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(),
|
|
"bad starting offset")) {
|
|
aStartingOffset = mFrag->GetLength();
|
|
}
|
|
mOffset = aStartingOffset;
|
|
|
|
// Get the frames text style information
|
|
const nsStyleText* styleText;
|
|
aFrame->GetStyleData(eStyleStruct_Text, (const nsStyleStruct*&) styleText);
|
|
if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) {
|
|
mMode = ePreformatted;
|
|
}
|
|
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) {
|
|
mMode = ePreWrap;
|
|
}
|
|
mTextTransform = styleText->mTextTransform;
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
PRInt32
|
|
nsTextTransformer::ScanNormalWhiteSpace_F()
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 fragLen = frag->GetLength();
|
|
PRInt32 offset = mOffset;
|
|
|
|
for (; offset < fragLen; offset++) {
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
if (!XP_IS_SPACE(ch)) {
|
|
// If character is not discardable then stop looping, otherwise
|
|
// let the discarded character collapse with the other spaces.
|
|
if (!IS_DISCARDED(ch)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
mTransformBuf.mBuffer[mBufferPos++] = ' ';
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
PRInt32
|
|
nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 fragLen = frag->GetLength();
|
|
PRInt32 offset = mOffset;
|
|
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
|
|
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
|
|
PRInt32 prevBufferPos = mBufferPos;
|
|
const unsigned char* cp = (const unsigned char*)frag->Get1b();
|
|
cp += offset;
|
|
|
|
for (; offset < fragLen; offset++) {
|
|
PRUnichar ch = *cp++;
|
|
if (XP_IS_SPACE(ch)) {
|
|
break;
|
|
}
|
|
if (CH_NBSP == ch) {
|
|
ch = ' ';
|
|
}
|
|
else if (IS_DISCARDED(ch)) {
|
|
// Strip discarded characters from the transformed output
|
|
continue;
|
|
}
|
|
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
if (bp == endbp) {
|
|
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
if (NS_FAILED(rv)) {
|
|
// If we run out of space (unlikely) then just chop the input
|
|
break;
|
|
}
|
|
bp = mTransformBuf.GetBuffer() + oldLength;
|
|
endbp = mTransformBuf.GetBufferEnd();
|
|
}
|
|
*bp++ = ch;
|
|
mBufferPos++;
|
|
}
|
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
PRInt32
|
|
nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak,
|
|
PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
const PRUnichar* cp0 = frag->Get2b();
|
|
PRInt32 fragLen = frag->GetLength();
|
|
PRInt32 offset = mOffset;
|
|
|
|
PRUnichar firstChar = frag->CharAt(offset++);
|
|
if (CH_NBSP == firstChar) {
|
|
firstChar = ' ';
|
|
}
|
|
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
|
|
if (firstChar > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
|
|
// Only evaluate complex breaking logic if there are more characters
|
|
// beyond the first to look at.
|
|
PRInt32 numChars = 1;
|
|
if (offset < fragLen) {
|
|
const PRUnichar* cp = cp0 + offset;
|
|
PRBool breakBetween = PR_FALSE;
|
|
if (aForLineBreak) {
|
|
mLineBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
|
|
}
|
|
else {
|
|
mWordBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
|
|
}
|
|
|
|
if (!breakBetween) {
|
|
// Find next position
|
|
PRBool tryNextFrag;
|
|
PRUint32 next;
|
|
if (aForLineBreak) {
|
|
mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
|
|
}
|
|
else {
|
|
mWordBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
|
|
}
|
|
numChars = (PRInt32) (next - (PRUint32) offset) + 1;
|
|
|
|
// Since we know the number of characters we're adding grow the buffer
|
|
// now before we start copying
|
|
nsresult rv = mTransformBuf.GrowTo(mBufferPos + numChars);
|
|
if (NS_FAILED(rv)) {
|
|
numChars = mTransformBuf.GetBufferLength() - mBufferPos;
|
|
}
|
|
|
|
offset += numChars - 1;
|
|
|
|
// 1. convert nbsp into space
|
|
// 2. check for discarded characters
|
|
// 3. check mHasMultibyte flag
|
|
// 4. copy buffer
|
|
PRUnichar* bp = &mTransformBuf.mBuffer[mBufferPos];
|
|
const PRUnichar* end = cp + numChars - 1;
|
|
while (cp < end) {
|
|
PRUnichar ch = *cp++;
|
|
if (CH_NBSP == ch) {
|
|
ch = ' ';
|
|
}
|
|
else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) {
|
|
// Strip discarded characters from the transformed output
|
|
numChars--;
|
|
continue;
|
|
}
|
|
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
*bp++ = ch;
|
|
mBufferPos++;
|
|
}
|
|
}
|
|
}
|
|
|
|
*aWordLen = numChars;
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
PRInt32
|
|
nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 fragLen = frag->GetLength();
|
|
PRInt32 offset = mOffset;
|
|
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
|
|
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
|
|
PRInt32 prevBufferPos = mBufferPos;
|
|
|
|
for (; offset < fragLen; offset++) {
|
|
// This function is used for both Unicode and ascii strings so don't
|
|
// make any assumptions about what kind of data it is
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
|
|
if (IS_DISCARDED(ch)) {
|
|
// Keep looping if this is a discarded character
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (bp == endbp) {
|
|
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
if (NS_FAILED(rv)) {
|
|
// If we run out of space (unlikely) then just chop the input
|
|
break;
|
|
}
|
|
bp = mTransformBuf.GetBuffer() + oldLength;
|
|
endbp = mTransformBuf.GetBufferEnd();
|
|
}
|
|
*bp++ = ' ';
|
|
mBufferPos++;
|
|
}
|
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
PRInt32
|
|
nsTextTransformer::ScanPreData_F(PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 fragLen = frag->GetLength();
|
|
PRInt32 offset = mOffset;
|
|
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
|
|
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
|
|
PRInt32 prevBufferPos = mBufferPos;
|
|
|
|
for (; offset < fragLen; offset++) {
|
|
// This function is used for both Unicode and ascii strings so don't
|
|
// make any assumptions about what kind of data it is
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
if ((ch == '\t') || (ch == '\n')) {
|
|
break;
|
|
}
|
|
if (CH_NBSP == ch) {
|
|
ch = ' ';
|
|
}
|
|
else if (IS_DISCARDED(ch)) {
|
|
continue;
|
|
}
|
|
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
if (bp == endbp) {
|
|
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
if (NS_FAILED(rv)) {
|
|
// If we run out of space (unlikely) then just chop the input
|
|
break;
|
|
}
|
|
bp = mTransformBuf.GetBuffer() + oldLength;
|
|
endbp = mTransformBuf.GetBufferEnd();
|
|
}
|
|
*bp++ = ch;
|
|
mBufferPos++;
|
|
}
|
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
PRInt32
|
|
nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
|
|
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
|
|
const unsigned char* cp = (const unsigned char*) frag->Get1b();
|
|
const unsigned char* end = cp + frag->GetLength();
|
|
PRInt32 prevBufferPos = mBufferPos;
|
|
cp += mOffset;
|
|
|
|
while (cp < end) {
|
|
PRUnichar ch = (PRUnichar) *cp++;
|
|
if ((ch == '\t') || (ch == '\n')) {
|
|
cp--;
|
|
break;
|
|
}
|
|
if (CH_NBSP == ch) {
|
|
ch = ' ';
|
|
}
|
|
else if (IS_DISCARDED(ch)) {
|
|
continue;
|
|
}
|
|
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
if (bp == endbp) {
|
|
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
if (NS_FAILED(rv)) {
|
|
// If we run out of space (unlikely) then just chop the input
|
|
break;
|
|
}
|
|
bp = mTransformBuf.GetBuffer() + oldLength;
|
|
endbp = mTransformBuf.GetBufferEnd();
|
|
}
|
|
*bp++ = ch;
|
|
mBufferPos++;
|
|
}
|
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
|
return cp - ((const unsigned char*)frag->Get1b());
|
|
}
|
|
|
|
//----------------------------------------
|
|
|
|
PRUnichar*
|
|
nsTextTransformer::GetNextWord(PRBool aInWord,
|
|
PRInt32* aWordLenResult,
|
|
PRInt32* aContentLenResult,
|
|
PRBool* aIsWhiteSpaceResult,
|
|
PRBool aResetTransformBuf,
|
|
PRBool aForLineBreak)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 fragLen = frag->GetLength();
|
|
PRInt32 offset = mOffset;
|
|
PRInt32 wordLen = 0;
|
|
PRBool isWhitespace = PR_FALSE;
|
|
PRUnichar* result = nsnull;
|
|
PRBool prevBufferPos;
|
|
|
|
// See if we should reset the current buffer position back to the
|
|
// beginning of the buffer
|
|
if (aResetTransformBuf) {
|
|
mBufferPos = 0;
|
|
}
|
|
prevBufferPos = mBufferPos;
|
|
|
|
// Fix word breaking problem w/ PREFORMAT and PREWRAP
|
|
// for word breaking, we should really go to the normal code
|
|
if((! aForLineBreak) && (eNormal != mMode))
|
|
mMode = eNormal;
|
|
|
|
while (offset < fragLen) {
|
|
PRUnichar firstChar = frag->CharAt(offset);
|
|
|
|
// Eat up any discarded characters before dispatching
|
|
if (IS_DISCARDED(firstChar)) {
|
|
offset++;
|
|
continue;
|
|
}
|
|
|
|
switch (mMode) {
|
|
default:
|
|
case eNormal:
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
offset = ScanNormalWhiteSpace_F();
|
|
wordLen = 1;
|
|
isWhitespace = PR_TRUE;
|
|
}
|
|
else if (frag->Is2b()) {
|
|
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen);
|
|
}
|
|
else {
|
|
offset = ScanNormalAsciiText_F(&wordLen);
|
|
}
|
|
break;
|
|
|
|
case ePreformatted:
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
|
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
|
|
offset++;
|
|
wordLen = 1;
|
|
isWhitespace = PR_TRUE;
|
|
}
|
|
else if (frag->Is2b()) {
|
|
offset = ScanPreData_F(&wordLen);
|
|
}
|
|
else {
|
|
offset = ScanPreAsciiData_F(&wordLen);
|
|
}
|
|
break;
|
|
|
|
case ePreWrap:
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
|
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
|
|
offset++;
|
|
wordLen = 1;
|
|
}
|
|
else {
|
|
offset = ScanPreWrapWhiteSpace_F(&wordLen);
|
|
}
|
|
isWhitespace = PR_TRUE;
|
|
}
|
|
else if (frag->Is2b()) {
|
|
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen);
|
|
}
|
|
else {
|
|
offset = ScanNormalAsciiText_F(&wordLen);
|
|
}
|
|
break;
|
|
}
|
|
result = &mTransformBuf.mBuffer[prevBufferPos];
|
|
|
|
if (!isWhitespace) {
|
|
switch (mTextTransform) {
|
|
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
|
|
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
|
|
break;
|
|
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
|
|
gCaseConv->ToLower(result, result, wordLen);
|
|
break;
|
|
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
|
|
gCaseConv->ToUpper(result, result, wordLen);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
*aWordLenResult = wordLen;
|
|
*aContentLenResult = offset - mOffset;
|
|
*aIsWhiteSpaceResult = isWhitespace;
|
|
|
|
mOffset = offset;
|
|
|
|
return result;
|
|
}
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
PRInt32
|
|
nsTextTransformer::ScanNormalWhiteSpace_B()
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 offset = mOffset;
|
|
|
|
while (--offset >= 0) {
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
if (!XP_IS_SPACE(ch)) {
|
|
// If character is not discardable then stop looping, otherwise
|
|
// let the discarded character collapse with the other spaces.
|
|
if (!IS_DISCARDED(ch)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
PRInt32
|
|
nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 offset = mOffset;
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd();
|
|
PRUnichar* startbp = mTransformBuf.GetBuffer();
|
|
|
|
while (--offset >= 0) {
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
if (XP_IS_SPACE(ch)) {
|
|
break;
|
|
}
|
|
if (CH_NBSP == ch) {
|
|
ch = ' ';
|
|
}
|
|
else if (IS_DISCARDED(ch)) {
|
|
continue;
|
|
}
|
|
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
if (bp == startbp) {
|
|
PRInt32 oldLength = mTransformBuf.mBufferLen;
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
if (NS_FAILED(rv)) {
|
|
// If we run out of space (unlikely) then just chop the input
|
|
break;
|
|
}
|
|
bp = mTransformBuf.GetBufferEnd() - oldLength;
|
|
startbp = mTransformBuf.GetBuffer();
|
|
}
|
|
*--bp = ch;
|
|
}
|
|
|
|
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
PRInt32
|
|
nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak,
|
|
PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
const PRUnichar* cp0 = frag->Get2b();
|
|
PRInt32 offset = mOffset - 1;
|
|
|
|
PRUnichar firstChar = frag->CharAt(offset);
|
|
if (CH_NBSP == firstChar) {
|
|
firstChar = ' ';
|
|
}
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar;
|
|
if (firstChar > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
|
|
PRInt32 numChars = 1;
|
|
if (offset > 0) {
|
|
const PRUnichar* cp = cp0 + offset;
|
|
PRBool breakBetween = PR_FALSE;
|
|
if (aForLineBreak) {
|
|
mLineBreaker->BreakInBetween(cp0, offset + 1,
|
|
mTransformBuf.GetBufferEnd()-1, 1,
|
|
&breakBetween);
|
|
}
|
|
else {
|
|
mWordBreaker->BreakInBetween(cp0, offset + 1,
|
|
mTransformBuf.GetBufferEnd()-1, 1,
|
|
&breakBetween);
|
|
}
|
|
|
|
if (!breakBetween) {
|
|
// Find next position
|
|
PRBool tryPrevFrag;
|
|
PRUint32 prev;
|
|
if (aForLineBreak) {
|
|
mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
|
|
}
|
|
else {
|
|
mWordBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
|
|
}
|
|
numChars = (PRInt32) ((PRUint32) offset - prev) + 1;
|
|
|
|
// Grow buffer before copying
|
|
nsresult rv = mTransformBuf.GrowTo(numChars);
|
|
if (NS_FAILED(rv)) {
|
|
numChars = mTransformBuf.GetBufferLength();
|
|
}
|
|
|
|
// 1. convert nbsp into space
|
|
// 2. check mHasMultibyte flag
|
|
// 3. copy buffer
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1;
|
|
const PRUnichar* end = cp - numChars + 1;
|
|
while (cp > end) {
|
|
PRUnichar ch = *--cp;
|
|
if (CH_NBSP == ch) {
|
|
ch = ' ';
|
|
}
|
|
else if (IS_DISCARDED(ch)) {
|
|
continue;
|
|
}
|
|
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
*--bp = ch;
|
|
}
|
|
|
|
// Recompute offset and numChars in case we stripped something
|
|
offset = offset - numChars;
|
|
numChars = mTransformBuf.GetBufferEnd() - bp;
|
|
}
|
|
}
|
|
|
|
*aWordLen = numChars;
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
PRInt32
|
|
nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 offset = mOffset;
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd();
|
|
PRUnichar* startbp = mTransformBuf.GetBuffer();
|
|
|
|
while (--offset >= 0) {
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
|
|
// Keep looping if this is a discarded character
|
|
if (IS_DISCARDED(ch)) {
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (bp == startbp) {
|
|
PRInt32 oldLength = mTransformBuf.mBufferLen;
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
if (NS_FAILED(rv)) {
|
|
// If we run out of space (unlikely) then just chop the input
|
|
break;
|
|
}
|
|
bp = mTransformBuf.GetBufferEnd() - oldLength;
|
|
startbp = mTransformBuf.GetBuffer();
|
|
}
|
|
*--bp = ' ';
|
|
}
|
|
|
|
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
|
|
return offset;
|
|
}
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
PRInt32
|
|
nsTextTransformer::ScanPreData_B(PRInt32* aWordLen)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 offset = mOffset;
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd();
|
|
PRUnichar* startbp = mTransformBuf.GetBuffer();
|
|
|
|
while (--offset >= 0) {
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
if ((ch == '\t') || (ch == '\n')) {
|
|
break;
|
|
}
|
|
if (CH_NBSP == ch) {
|
|
ch = ' ';
|
|
}
|
|
else if (IS_DISCARDED(ch)) {
|
|
continue;
|
|
}
|
|
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
|
|
if (bp == startbp) {
|
|
PRInt32 oldLength = mTransformBuf.mBufferLen;
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
if (NS_FAILED(rv)) {
|
|
// If we run out of space (unlikely) then just chop the input
|
|
offset++;
|
|
break;
|
|
}
|
|
bp = mTransformBuf.GetBufferEnd() - oldLength;
|
|
startbp = mTransformBuf.GetBuffer();
|
|
}
|
|
*--bp = ch;
|
|
}
|
|
|
|
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
|
|
return offset;
|
|
}
|
|
|
|
//----------------------------------------
|
|
|
|
PRUnichar*
|
|
nsTextTransformer::GetPrevWord(PRBool aInWord,
|
|
PRInt32* aWordLenResult,
|
|
PRInt32* aContentLenResult,
|
|
PRBool* aIsWhiteSpaceResult,
|
|
PRBool aForLineBreak)
|
|
{
|
|
const nsTextFragment* frag = mFrag;
|
|
PRInt32 offset = mOffset;
|
|
PRInt32 wordLen = 0;
|
|
PRBool isWhitespace = PR_FALSE;
|
|
PRUnichar* result = nsnull;
|
|
|
|
// Fix word breaking problem w/ PREFORMAT and PREWRAP
|
|
// for word breaking, we should really go to the normal code
|
|
if((! aForLineBreak) && (eNormal != mMode))
|
|
mMode = eNormal;
|
|
|
|
while (--offset >= 0) {
|
|
PRUnichar firstChar = frag->CharAt(offset);
|
|
|
|
// Eat up any discarded characters before dispatching
|
|
if (IS_DISCARDED(firstChar)) {
|
|
continue;
|
|
}
|
|
|
|
switch (mMode) {
|
|
default:
|
|
case eNormal:
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
offset = ScanNormalWhiteSpace_B();
|
|
wordLen = 1;
|
|
isWhitespace = PR_TRUE;
|
|
}
|
|
else if (frag->Is2b()) {
|
|
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
|
|
}
|
|
else {
|
|
offset = ScanNormalAsciiText_B(&wordLen);
|
|
}
|
|
break;
|
|
|
|
case ePreformatted:
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
|
|
offset--; // make sure we overshoot
|
|
wordLen = 1;
|
|
isWhitespace = PR_TRUE;
|
|
}
|
|
else {
|
|
offset = ScanPreData_B(&wordLen);
|
|
}
|
|
break;
|
|
|
|
case ePreWrap:
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
|
|
offset--; // make sure we overshoot
|
|
wordLen = 1;
|
|
}
|
|
else {
|
|
offset = ScanPreWrapWhiteSpace_B(&wordLen);
|
|
}
|
|
isWhitespace = PR_TRUE;
|
|
}
|
|
else if (frag->Is2b()) {
|
|
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
|
|
}
|
|
else {
|
|
offset = ScanNormalAsciiText_B(&wordLen);
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Backwards scanning routines *always* overshoot by one for the
|
|
// returned offset value.
|
|
offset = offset + 1;
|
|
|
|
result = mTransformBuf.GetBufferEnd() - wordLen;
|
|
|
|
if (!isWhitespace) {
|
|
switch (mTextTransform) {
|
|
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
|
|
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
|
|
break;
|
|
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
|
|
gCaseConv->ToLower(result, result, wordLen);
|
|
break;
|
|
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
|
|
gCaseConv->ToUpper(result, result, wordLen);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
*aWordLenResult = wordLen;
|
|
*aContentLenResult = mOffset - offset;
|
|
*aIsWhiteSpaceResult = isWhitespace;
|
|
|
|
mOffset = offset;
|
|
|
|
return result;
|
|
}
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Self test logic for this class. This will (hopefully) make sure
|
|
// that the forward and backward word iterator methods continue to
|
|
// function as people change things...
|
|
|
|
#ifdef DEBUG
|
|
struct SelfTestSection {
|
|
int length;
|
|
int* data;
|
|
};
|
|
|
|
#define NUM_MODES 3
|
|
|
|
struct SelfTestData {
|
|
const PRUnichar* text;
|
|
SelfTestSection modes[NUM_MODES];
|
|
};
|
|
|
|
static PRUint8 preModeValue[NUM_MODES] = {
|
|
NS_STYLE_WHITESPACE_NORMAL,
|
|
NS_STYLE_WHITESPACE_PRE,
|
|
NS_STYLE_WHITESPACE_MOZ_PRE_WRAP
|
|
};
|
|
|
|
static PRUnichar test1text[] = {
|
|
'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t',
|
|
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
|
|
};
|
|
static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
|
|
static int test1PreResults[] = { 9, 1, 12 };
|
|
static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
|
|
|
|
static PRUnichar test2text[] = {
|
|
0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t',
|
|
0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0
|
|
};
|
|
static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
|
|
static int test2PreResults[] = { 9, 1, 13 };
|
|
static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
|
|
|
|
static PRUnichar test3text[] = {
|
|
0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0
|
|
};
|
|
static int test3Results[] = { 4, 1, 2, 1, 2, 1, };
|
|
static int test3PreResults[] = { 7, 1, 3, };
|
|
static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, };
|
|
|
|
static PRUnichar test4text[] = {
|
|
'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t',
|
|
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
|
|
};
|
|
static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
|
|
static int test4PreResults[] = { 10, 1, 12 };
|
|
static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 };
|
|
|
|
static PRUnichar test5text[] = {
|
|
CH_SHY, 0
|
|
};
|
|
static int test5Results[] = { 0 };
|
|
static int test5PreResults[] = { 0 };
|
|
static int test5PreWrapResults[] = { 0 };
|
|
|
|
#if 0
|
|
static PRUnichar test6text[] = {
|
|
0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d,
|
|
0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!',
|
|
'\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0,
|
|
0x30da, 0x30fc, 0x30b8, 0x3002, 0
|
|
};
|
|
static int test6Results[] = { 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1,
|
|
5, 1, 4, 1, 5,
|
|
1, 2, 1, 2, 2 };
|
|
static int test6PreResults[] = { 20, 1, 13 };
|
|
static int test6PreWrapResults[] = { 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1,
|
|
5, 1, 4, 1, 5,
|
|
1, 2, 1, 2, 2 };
|
|
#endif
|
|
|
|
static SelfTestData tests[] = {
|
|
{ test1text,
|
|
{ { sizeof(test1Results)/sizeof(int), test1Results, },
|
|
{ sizeof(test1PreResults)/sizeof(int), test1PreResults, },
|
|
{ sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } }
|
|
},
|
|
{ test2text,
|
|
{ { sizeof(test2Results)/sizeof(int), test2Results, },
|
|
{ sizeof(test2PreResults)/sizeof(int), test2PreResults, },
|
|
{ sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } }
|
|
},
|
|
{ test3text,
|
|
{ { sizeof(test3Results)/sizeof(int), test3Results, },
|
|
{ sizeof(test3PreResults)/sizeof(int), test3PreResults, },
|
|
{ sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } }
|
|
},
|
|
{ test4text,
|
|
{ { sizeof(test4Results)/sizeof(int), test4Results, },
|
|
{ sizeof(test4PreResults)/sizeof(int), test4PreResults, },
|
|
{ sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } }
|
|
},
|
|
{ test5text,
|
|
{ { sizeof(test5Results)/sizeof(int), test5Results, },
|
|
{ sizeof(test5PreResults)/sizeof(int), test5PreResults, },
|
|
{ sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } }
|
|
},
|
|
#if 0
|
|
{ test6text,
|
|
{ { sizeof(test6Results)/sizeof(int), test6Results, },
|
|
{ sizeof(test6PreResults)/sizeof(int), test6PreResults, },
|
|
{ sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } }
|
|
},
|
|
#endif
|
|
};
|
|
|
|
#define NUM_TESTS (sizeof(tests) / sizeof(tests[0]))
|
|
|
|
void
|
|
nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker,
|
|
nsIWordBreaker* aWordBreaker)
|
|
{
|
|
PRBool gNoisy = PR_FALSE;
|
|
if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) {
|
|
gNoisy = PR_TRUE;
|
|
}
|
|
|
|
PRBool error = PR_FALSE;
|
|
PRInt32 testNum = 0;
|
|
SelfTestData* st = tests;
|
|
SelfTestData* last = st + NUM_TESTS;
|
|
for (; st < last; st++) {
|
|
PRUnichar* bp;
|
|
PRInt32 wordLen, contentLen;
|
|
PRBool ws;
|
|
|
|
PRBool isAsciiTest = PR_TRUE;
|
|
const PRUnichar* cp = st->text;
|
|
while (*cp) {
|
|
if (*cp > 255) {
|
|
isAsciiTest = PR_FALSE;
|
|
break;
|
|
}
|
|
cp++;
|
|
}
|
|
|
|
nsTextFragment frag(st->text);
|
|
nsTextTransformer tx(aLineBreaker, aWordBreaker);
|
|
|
|
for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) {
|
|
// Do forwards test
|
|
if (gNoisy) {
|
|
nsAutoString uc2(st->text);
|
|
printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode");
|
|
fputs(uc2, stdout);
|
|
printf("'\n");
|
|
}
|
|
tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE);
|
|
|
|
int* expectedResults = st->modes[preMode].data;
|
|
int resultsLen = st->modes[preMode].length;
|
|
|
|
while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws))) {
|
|
if (gNoisy) {
|
|
nsAutoString tmp(bp, wordLen);
|
|
printf(" '");
|
|
fputs(tmp, stdout);
|
|
printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n",
|
|
ws ? "yes" : "no",
|
|
wordLen, *expectedResults, contentLen, tx.mOffset);
|
|
}
|
|
if (*expectedResults != wordLen) {
|
|
error = PR_TRUE;
|
|
break;
|
|
}
|
|
expectedResults++;
|
|
}
|
|
if (expectedResults != st->modes[preMode].data + resultsLen) {
|
|
if (st->modes[preMode].data[0] != 0) {
|
|
error = PR_TRUE;
|
|
}
|
|
}
|
|
|
|
// Do backwards test
|
|
if (gNoisy) {
|
|
nsAutoString uc2(st->text);
|
|
printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode");
|
|
fputs(uc2, stdout);
|
|
printf("'\n");
|
|
}
|
|
tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL,
|
|
NS_STYLE_TEXT_TRANSFORM_NONE);
|
|
expectedResults = st->modes[preMode].data + resultsLen;
|
|
while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) {
|
|
--expectedResults;
|
|
if (gNoisy) {
|
|
nsAutoString tmp(bp, wordLen);
|
|
printf(" '");
|
|
fputs(tmp, stdout);
|
|
printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n",
|
|
ws ? "yes" : "no",
|
|
wordLen, contentLen, tx.mOffset);
|
|
}
|
|
if (*expectedResults != wordLen) {
|
|
error = PR_TRUE;
|
|
break;
|
|
}
|
|
}
|
|
if (expectedResults != st->modes[preMode].data) {
|
|
if (st->modes[preMode].data[0] != 0) {
|
|
error = PR_TRUE;
|
|
}
|
|
}
|
|
|
|
if (error) {
|
|
fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum);
|
|
}
|
|
testNum++;
|
|
}
|
|
}
|
|
if (error) {
|
|
NS_ABORT();
|
|
}
|
|
}
|
|
|
|
nsresult
|
|
nsTextTransformer::Init2(const nsTextFragment* aFrag,
|
|
PRInt32 aStartingOffset,
|
|
PRUint8 aWhiteSpace,
|
|
PRUint8 aTextTransform)
|
|
{
|
|
mFrag = aFrag;
|
|
|
|
// Sanitize aStartingOffset
|
|
if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) {
|
|
aStartingOffset = 0;
|
|
}
|
|
else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(),
|
|
"bad starting offset")) {
|
|
aStartingOffset = mFrag->GetLength();
|
|
}
|
|
mOffset = aStartingOffset;
|
|
|
|
// Get the frames text style information
|
|
if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) {
|
|
mMode = ePreformatted;
|
|
}
|
|
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) {
|
|
mMode = ePreWrap;
|
|
}
|
|
mTextTransform = aTextTransform;
|
|
|
|
return NS_OK;
|
|
}
|
|
#endif /* DEBUG */
|