gecko-dev/layout/generic/nsTextTransformer.cpp

1460 lines
42 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*/
#include <ctype.h>
#include "nsCOMPtr.h"
#include "nsTextTransformer.h"
#include "nsIContent.h"
#include "nsIFrame.h"
#include "nsIStyleContext.h"
#include "nsITextContent.h"
#include "nsStyleConsts.h"
#include "nsILineBreaker.h"
#include "nsIWordBreaker.h"
#include "nsHTMLIIDs.h"
#include "nsIServiceManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsICaseConversion.h"
#include "prenv.h"
nsAutoTextBuffer::nsAutoTextBuffer()
: mBuffer(mAutoBuffer),
mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE)
{
}
nsAutoTextBuffer::~nsAutoTextBuffer()
{
if (mBuffer && (mBuffer != mAutoBuffer)) {
delete [] mBuffer;
}
}
nsresult
nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead)
{
PRInt32 newSize = mBufferLen * 2;
if (newSize < mBufferLen + aAtLeast) {
newSize = mBufferLen + aAtLeast + 100;
}
return GrowTo(newSize, aCopyToHead);
}
nsresult
nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead)
{
if (aNewSize > mBufferLen) {
PRUnichar* newBuffer = new PRUnichar[aNewSize];
if (!newBuffer) {
return NS_ERROR_OUT_OF_MEMORY;
}
nsCRT::memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen],
mBuffer, sizeof(PRUnichar) * mBufferLen);
if (mBuffer != mAutoBuffer) {
delete [] mBuffer;
}
mBuffer = newBuffer;
mBufferLen = aNewSize;
}
return NS_OK;
}
//----------------------------------------------------------------------
static NS_DEFINE_IID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
static NS_DEFINE_IID(kICaseConversionIID, NS_ICASECONVERSION_IID);
static nsICaseConversion* gCaseConv = nsnull;
nsresult
nsTextTransformer::Initialize()
{
nsresult res = NS_OK;
if (!gCaseConv) {
res = nsServiceManager::GetService(kUnicharUtilCID, kICaseConversionIID,
(nsISupports**)&gCaseConv);
NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil");
NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil");
}
return res;
}
void
nsTextTransformer::Shutdown()
{
if (gCaseConv) {
nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv);
gCaseConv = nsnull;
}
}
// For now, we have only a single character to strip out. If we get
// any more, change this to use a bitset to lookup into.
// CH_SHY - soft hyphen (discretionary hyphen)
#define IS_DISCARDED(_ch) \
((_ch) == CH_SHY)
#define MAX_UNIBYTE 127
MOZ_DECL_CTOR_COUNTER(nsTextTransformer);
nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker,
nsIWordBreaker* aWordBreaker)
: mFrag(nsnull),
mOffset(0),
mMode(eNormal),
mLineBreaker(aLineBreaker),
mWordBreaker(aWordBreaker),
mBufferPos(0),
mFlags(0),
mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE)
{
MOZ_COUNT_CTOR(nsTextTransformer);
if (aLineBreaker == nsnull && aWordBreaker == nsnull )
NS_ASSERTION(0, "invalid creation of nsTextTransformer");
#ifdef DEBUG
static PRBool firstTime = PR_TRUE;
if (firstTime) {
firstTime = PR_FALSE;
SelfTest(aLineBreaker, aWordBreaker);
}
#endif
}
nsTextTransformer::~nsTextTransformer()
{
MOZ_COUNT_DTOR(nsTextTransformer);
}
nsresult
nsTextTransformer::Init(nsIFrame* aFrame,
nsIContent* aContent,
PRInt32 aStartingOffset,
PRBool aLeaveAsAscii)
{
// Get the contents text content
nsresult rv;
nsCOMPtr<nsITextContent> tc = do_QueryInterface(aContent, &rv);
if (tc.get()) {
tc->GetText(&mFrag);
// Sanitize aStartingOffset
if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) {
aStartingOffset = 0;
}
else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(),
"bad starting offset")) {
aStartingOffset = mFrag->GetLength();
}
mOffset = aStartingOffset;
// Get the frames text style information
const nsStyleText* styleText;
aFrame->GetStyleData(eStyleStruct_Text, (const nsStyleStruct*&) styleText);
if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) {
mMode = ePreformatted;
}
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) {
mMode = ePreWrap;
}
mTextTransform = styleText->mTextTransform;
if (aLeaveAsAscii) { // See if the text fragment is 1-byte text
SetLeaveAsAscii(PR_TRUE);
// XXX Currently we only leave it as ascii for normal text and not for preformatted
// or preformatted wrapped text
if (mFrag->Is2b() || (eNormal != mMode))
// We don't step down from Unicode to ascii
SetLeaveAsAscii(PR_FALSE);
}
else
SetLeaveAsAscii(PR_FALSE);
}
return rv;
}
//----------------------------------------------------------------------
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanNormalWhiteSpace_F()
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
for (; offset < fragLen; offset++) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch)) {
// If character is not discardable then stop looping, otherwise
// let the discarded character collapse with the other spaces.
if (!IS_DISCARDED(ch)) {
break;
}
}
}
// Make sure we have enough room in the transform buffer
if (mBufferPos >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
}
if (TransformedTextIsAscii()) {
unsigned char* bp = (unsigned char*)mTransformBuf.mBuffer;
bp[mBufferPos++] = ' ';
} else {
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
}
return offset;
}
void
nsTextTransformer::ConvertTransformedTextToUnicode()
{
// Go backwards over the characters and convert them.
PRInt32 lastChar = mBufferPos - 1;
unsigned char* cp1 = (unsigned char*)mTransformBuf.mBuffer + lastChar;
PRUnichar* cp2 = mTransformBuf.mBuffer + lastChar;
NS_ASSERTION(mTransformBuf.mBufferLen >= mBufferPos,
"transform buffer is too small");
for (PRInt32 count = mBufferPos; count > 0; count--) {
*cp2-- = PRUnichar(*cp1--);
}
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 prevBufferPos = mBufferPos;
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
union {
unsigned char* bp1;
PRUnichar* bp2;
};
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
for (; offset < fragLen; offset++) {
unsigned char ch = *cp++;
if (XP_IS_SPACE(ch)) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
// Strip discarded characters from the transformed output
continue;
}
if (ch > MAX_UNIBYTE) {
// The text has a multibyte character so we can no longer leave the
// text as ascii text
SetHasMultibyte(PR_TRUE);
if (TransformedTextIsAscii()) {
SetTransformedTextIsAscii(PR_FALSE);
*aWasTransformed = PR_TRUE;
// Transform any existing ascii text to Unicode
if (mBufferPos > 0) {
ConvertTransformedTextToUnicode();
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
}
}
}
if (mBufferPos >= mTransformBuf.mBufferLen) {
nsresult rv = mTransformBuf.GrowBy(128);
if (NS_FAILED(rv)) {
// If we run out of space then just truncate the text
break;
}
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
}
if (TransformedTextIsAscii()) {
*bp1++ = ch;
} else {
*bp2++ = PRUnichar(ch);
}
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
PRInt32
nsTextTransformer::ScanNormalAsciiText_F_ForWordBreak(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 prevBufferPos = mBufferPos;
PRBool breakAfterThis = PR_FALSE;
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
union {
unsigned char* bp1;
PRUnichar* bp2;
};
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
for (; offset < fragLen && !breakAfterThis; offset++) {
unsigned char ch = *cp++;
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
if (offset == mOffset)
breakAfterThis = PR_TRUE;
else
break;
}
else if (XP_IS_SPACE(ch)) {
break;
}
else if (IS_DISCARDED(ch)) {
// Strip discarded characters from the transformed output
continue;
}
if (ch > MAX_UNIBYTE) {
// The text has a multibyte character so we can no longer leave the
// text as ascii text
SetHasMultibyte(PR_TRUE);
if (TransformedTextIsAscii()) {
SetTransformedTextIsAscii(PR_FALSE);
*aWasTransformed = PR_TRUE;
// Transform any existing ascii text to Unicode
if (mBufferPos > 0) {
ConvertTransformedTextToUnicode();
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
}
}
}
if (mBufferPos >= mTransformBuf.mBufferLen) {
nsresult rv = mTransformBuf.GrowBy(128);
if (NS_FAILED(rv)) {
// If we run out of space then just truncate the text
break;
}
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
}
if (TransformedTextIsAscii()) {
*bp1++ = ch;
} else {
*bp2++ = PRUnichar(ch);
}
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak,
PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
const PRUnichar* cp0 = frag->Get2b();
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar firstChar = frag->CharAt(offset++);
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
// Only evaluate complex breaking logic if there are more characters
// beyond the first to look at.
PRInt32 numChars = 1;
if (offset < fragLen) {
const PRUnichar* cp = cp0 + offset;
PRBool breakBetween = PR_FALSE;
if (aForLineBreak) {
mLineBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
}
else {
mWordBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
}
// don't transform the first character until after BreakInBetween is called
// Kipp originally did this at the top of the function, which was too early.
// see bug 14280
if (CH_NBSP == firstChar) {
firstChar = ' ';
*aWasTransformed = PR_TRUE;
}
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
if (!breakBetween) {
// Find next position
PRBool tryNextFrag;
PRUint32 next;
if (aForLineBreak) {
mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
}
else {
mWordBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
}
numChars = (PRInt32) (next - (PRUint32) offset) + 1;
// Since we know the number of characters we're adding grow the buffer
// now before we start copying
nsresult rv = mTransformBuf.GrowTo(mBufferPos + numChars);
if (NS_FAILED(rv)) {
numChars = mTransformBuf.GetBufferLength() - mBufferPos;
}
offset += numChars - 1;
// 1. convert nbsp into space
// 2. check for discarded characters
// 3. check mHasMultibyte flag
// 4. copy buffer
PRUnichar* bp = &mTransformBuf.mBuffer[mBufferPos];
const PRUnichar* end = cp + numChars - 1;
while (cp < end) {
PRUnichar ch = *cp++;
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) {
// Strip discarded characters from the transformed output
numChars--;
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
*bp++ = ch;
mBufferPos++;
}
}
}
else
{ // transform the first character
// we do this here, rather than at the top of the function (like Kipp originally had it)
// because if we must call BreakInBetween, then we must do so before the transformation
// this is the case where BreakInBetween does not need to be called at all.
// see bug 14280
if (CH_NBSP == firstChar) {
firstChar = ' ';
*aWasTransformed = PR_TRUE;
}
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
}
*aWordLen = numChars;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
PRInt32 prevBufferPos = mBufferPos;
for (; offset < fragLen; offset++) {
// This function is used for both Unicode and ascii strings so don't
// make any assumptions about what kind of data it is
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
if (IS_DISCARDED(ch)) {
// Keep looping if this is a discarded character
continue;
}
break;
}
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ' ';
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreData_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
PRInt32 prevBufferPos = mBufferPos;
for (; offset < fragLen; offset++) {
// This function is used for both Unicode and ascii strings so don't
// make any assumptions about what kind of data it is
PRUnichar ch = frag->CharAt(offset);
if ((ch == '\t') || (ch == '\n')) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ch;
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
const unsigned char* cp = (const unsigned char*) frag->Get1b();
const unsigned char* end = cp + frag->GetLength();
PRInt32 prevBufferPos = mBufferPos;
cp += mOffset;
while (cp < end) {
PRUnichar ch = (PRUnichar) *cp++;
if ((ch == '\t') || (ch == '\n')) {
cp--;
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ch;
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return cp - ((const unsigned char*)frag->Get1b());
}
//----------------------------------------
static void
AsciiToLowerCase(unsigned char* aText, PRInt32 aWordLen)
{
while (aWordLen-- > 0) {
*aText = tolower(*aText);
aText++;
}
}
static void
AsciiToUpperCase(unsigned char* aText, PRInt32 aWordLen)
{
while (aWordLen-- > 0) {
*aText = toupper(*aText);
aText++;
}
}
#define kSzlig 0x00DF
static PRInt32 CountGermanSzlig(const PRUnichar* aText, PRInt32 len)
{
PRInt32 i,cnt;
for(i=0,cnt=0; i<len; i++, aText++)
{
if(kSzlig == *aText)
cnt++;
}
return cnt;
}
static void ReplaceGermanSzligToSS(PRUnichar* aText, PRInt32 len, PRInt32 szCnt)
{
PRUnichar *src, *dest;
src = aText + len - 1;
dest = src + szCnt;
while( (src!=dest) && (src >= aText) )
{
if(kSzlig == *src )
{
*dest-- = PRUnichar('S');
*dest-- = PRUnichar('S');
src--;
} else {
*dest-- = *src--;
}
}
}
PRUnichar*
nsTextTransformer::GetNextWord(PRBool aInWord,
PRInt32* aWordLenResult,
PRInt32* aContentLenResult,
PRBool* aIsWhiteSpaceResult,
PRBool* aWasTransformed,
PRBool aResetTransformBuf,
PRBool aForLineBreak)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 wordLen = 0;
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
PRBool prevBufferPos;
// Initialize OUT parameter
*aWasTransformed = PR_FALSE;
// See if we should reset the current buffer position back to the
// beginning of the buffer
if (aResetTransformBuf) {
mBufferPos = 0;
SetTransformedTextIsAscii(LeaveAsAscii());
}
prevBufferPos = mBufferPos;
// Fix word breaking problem w/ PREFORMAT and PREWRAP
// for word breaking, we should really go to the normal code
if((! aForLineBreak) && (eNormal != mMode))
mMode = eNormal;
while (offset < fragLen) {
PRUnichar firstChar = frag->CharAt(offset);
// Eat up any discarded characters before dispatching
if (IS_DISCARDED(firstChar)) {
offset++;
continue;
}
switch (mMode) {
default:
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_F();
if (firstChar != ' ') {
*aWasTransformed = PR_TRUE;
}
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (CH_NBSP == firstChar && !aForLineBreak) {
wordLen = 1;
isWhitespace = PR_TRUE;
*aWasTransformed = PR_TRUE;
// Make sure we have enough room in the transform buffer
if (mBufferPos >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
}
offset++;
if (TransformedTextIsAscii()) {
((unsigned char*)mTransformBuf.mBuffer)[mBufferPos++] = ' ';
} else {
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
}
}
else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
}
else {
if (!aForLineBreak)
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed);
else
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
}
break;
case ePreformatted:
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
offset++;
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
offset = ScanPreData_F(&wordLen, aWasTransformed);
}
else {
offset = ScanPreAsciiData_F(&wordLen, aWasTransformed);
}
break;
case ePreWrap:
if (XP_IS_SPACE(firstChar)) {
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
offset++;
wordLen = 1;
}
else {
offset = ScanPreWrapWhiteSpace_F(&wordLen);
}
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
}
else {
if (!aForLineBreak)
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed);
else
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
}
break;
}
if (TransformedTextIsAscii()) {
unsigned char* wordPtr = (unsigned char*)mTransformBuf.mBuffer + prevBufferPos;
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
*wordPtr = toupper(*wordPtr);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
AsciiToLowerCase(wordPtr, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
AsciiToUpperCase(wordPtr, wordLen);
break;
}
}
result = (PRUnichar*)wordPtr;
} else {
result = &mTransformBuf.mBuffer[prevBufferPos];
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
// if the first character is szlig
if(kSzlig == *result)
{
if ((prevBufferPos + wordLen + 1) >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
result = &mTransformBuf.mBuffer[prevBufferPos];
}
PRUnichar* src = result + wordLen;
while(src>result)
{
*(src+1) = *src;
src--;
}
result[0] = PRUnichar('S');
result[1] = PRUnichar('S');
wordLen++;
}
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
gCaseConv->ToLower(result, result, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
{
gCaseConv->ToUpper(result, result, wordLen);
// first we search for German Szlig
PRInt32 szligCnt = CountGermanSzlig(result, wordLen);
if(szligCnt > 0) {
// Make sure we have enough room in the transform buffer
if ((prevBufferPos + wordLen + szligCnt) >= mTransformBuf.mBufferLen)
{
mTransformBuf.GrowBy(128);
result = &mTransformBuf.mBuffer[prevBufferPos];
}
ReplaceGermanSzligToSS(result, wordLen, szligCnt);
wordLen += szligCnt;
}
}
break;
}
}
}
break;
}
*aWordLenResult = wordLen;
*aContentLenResult = offset - mOffset;
*aIsWhiteSpaceResult = isWhitespace;
// If the word length doesn't match the content length then we transformed
// the text
if ((mTextTransform != NS_STYLE_TEXT_TRANSFORM_NONE) ||
(*aWordLenResult != *aContentLenResult)) {
*aWasTransformed = PR_TRUE;
}
mOffset = offset;
return result;
}
//----------------------------------------------------------------------
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanNormalWhiteSpace_B()
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch)) {
// If character is not discardable then stop looping, otherwise
// let the discarded character collapse with the other spaces.
if (!IS_DISCARDED(ch)) {
break;
}
}
}
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (CH_NBSP == ch) {
ch = ' ';
}
if (XP_IS_SPACE(ch)) {
break;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ch;
}
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak,
PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
const PRUnichar* cp0 = frag->Get2b();
PRInt32 offset = mOffset - 1;
PRUnichar firstChar = frag->CharAt(offset);
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar;
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
PRInt32 numChars = 1;
if (offset > 0) {
const PRUnichar* cp = cp0 + offset;
PRBool breakBetween = PR_FALSE;
if (aForLineBreak) {
mLineBreaker->BreakInBetween(cp0, offset + 1,
mTransformBuf.GetBufferEnd()-1, 1,
&breakBetween);
}
else {
mWordBreaker->BreakInBetween(cp0, offset + 1,
mTransformBuf.GetBufferEnd()-1, 1,
&breakBetween);
}
if (!breakBetween) {
// Find next position
PRBool tryPrevFrag;
PRUint32 prev;
if (aForLineBreak) {
mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
}
else {
mWordBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
}
numChars = (PRInt32) ((PRUint32) offset - prev) + 1;
// Grow buffer before copying
nsresult rv = mTransformBuf.GrowTo(numChars);
if (NS_FAILED(rv)) {
numChars = mTransformBuf.GetBufferLength();
}
// 1. convert nbsp into space
// 2. check mHasMultibyte flag
// 3. copy buffer
PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1;
const PRUnichar* end = cp - numChars + 1;
while (cp > end) {
PRUnichar ch = *--cp;
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
*--bp = ch;
}
// Recompute offset and numChars in case we stripped something
offset = offset - numChars;
numChars = mTransformBuf.GetBufferEnd() - bp;
}
}
else
offset--;
*aWordLen = numChars;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
// Keep looping if this is a discarded character
if (IS_DISCARDED(ch)) {
continue;
}
break;
}
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ' ';
}
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreData_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if ((ch == '\t') || (ch == '\n')) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
offset++;
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ch;
}
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
//----------------------------------------
PRUnichar*
nsTextTransformer::GetPrevWord(PRBool aInWord,
PRInt32* aWordLenResult,
PRInt32* aContentLenResult,
PRBool* aIsWhiteSpaceResult,
PRBool aForLineBreak)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRInt32 wordLen = 0;
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
// Fix word breaking problem w/ PREFORMAT and PREWRAP
// for word breaking, we should really go to the normal code
if((! aForLineBreak) && (eNormal != mMode))
mMode = eNormal;
while (--offset >= 0) {
PRUnichar firstChar = frag->CharAt(offset);
// Eat up any discarded characters before dispatching
if (IS_DISCARDED(firstChar)) {
continue;
}
switch (mMode) {
default:
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_B();
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (CH_NBSP == firstChar && !aForLineBreak) {
wordLen = 1;
isWhitespace = PR_TRUE;
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
offset--;
} else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
}
else {
offset = ScanNormalAsciiText_B(&wordLen);
}
break;
case ePreformatted:
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
offset--; // make sure we overshoot
wordLen = 1;
isWhitespace = PR_TRUE;
}
else {
offset = ScanPreData_B(&wordLen);
}
break;
case ePreWrap:
if (XP_IS_SPACE(firstChar)) {
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
offset--; // make sure we overshoot
wordLen = 1;
}
else {
offset = ScanPreWrapWhiteSpace_B(&wordLen);
}
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
}
else {
offset = ScanNormalAsciiText_B(&wordLen);
}
break;
}
// Backwards scanning routines *always* overshoot by one for the
// returned offset value.
offset = offset + 1;
result = mTransformBuf.GetBufferEnd() - wordLen;
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
gCaseConv->ToLower(result, result, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
gCaseConv->ToUpper(result, result, wordLen);
break;
}
}
break;
}
*aWordLenResult = wordLen;
*aContentLenResult = mOffset - offset;
*aIsWhiteSpaceResult = isWhitespace;
mOffset = offset;
return result;
}
//----------------------------------------------------------------------
// Self test logic for this class. This will (hopefully) make sure
// that the forward and backward word iterator methods continue to
// function as people change things...
#ifdef DEBUG
struct SelfTestSection {
int length;
int* data;
};
#define NUM_MODES 3
struct SelfTestData {
const PRUnichar* text;
SelfTestSection modes[NUM_MODES];
};
static PRUint8 preModeValue[NUM_MODES] = {
NS_STYLE_WHITESPACE_NORMAL,
NS_STYLE_WHITESPACE_PRE,
NS_STYLE_WHITESPACE_MOZ_PRE_WRAP
};
static PRUnichar test1text[] = {
'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t',
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
};
static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static int test1PreResults[] = { 9, 1, 12 };
static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static PRUnichar test2text[] = {
0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t',
0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0
};
static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
static int test2PreResults[] = { 9, 1, 13 };
static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
static PRUnichar test3text[] = {
0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0
};
static int test3Results[] = { 4, 1, 2, 1, 2, 1, };
static int test3PreResults[] = { 7, 1, 3, };
static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, };
static PRUnichar test4text[] = {
'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t',
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
};
static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static int test4PreResults[] = { 10, 1, 12 };
static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 };
static PRUnichar test5text[] = {
CH_SHY, 0
};
static int test5Results[] = { 0 };
static int test5PreResults[] = { 0 };
static int test5PreWrapResults[] = { 0 };
#if 0
static PRUnichar test6text[] = {
0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d,
0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!',
'\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0,
0x30da, 0x30fc, 0x30b8, 0x3002, 0
};
static int test6Results[] = { 1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
5, 1, 4, 1, 5,
1, 2, 1, 2, 2 };
static int test6PreResults[] = { 20, 1, 13 };
static int test6PreWrapResults[] = { 1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
5, 1, 4, 1, 5,
1, 2, 1, 2, 2 };
#endif
static SelfTestData tests[] = {
{ test1text,
{ { sizeof(test1Results)/sizeof(int), test1Results, },
{ sizeof(test1PreResults)/sizeof(int), test1PreResults, },
{ sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } }
},
{ test2text,
{ { sizeof(test2Results)/sizeof(int), test2Results, },
{ sizeof(test2PreResults)/sizeof(int), test2PreResults, },
{ sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } }
},
{ test3text,
{ { sizeof(test3Results)/sizeof(int), test3Results, },
{ sizeof(test3PreResults)/sizeof(int), test3PreResults, },
{ sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } }
},
{ test4text,
{ { sizeof(test4Results)/sizeof(int), test4Results, },
{ sizeof(test4PreResults)/sizeof(int), test4PreResults, },
{ sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } }
},
{ test5text,
{ { sizeof(test5Results)/sizeof(int), test5Results, },
{ sizeof(test5PreResults)/sizeof(int), test5PreResults, },
{ sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } }
},
#if 0
{ test6text,
{ { sizeof(test6Results)/sizeof(int), test6Results, },
{ sizeof(test6PreResults)/sizeof(int), test6PreResults, },
{ sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } }
},
#endif
};
#define NUM_TESTS (sizeof(tests) / sizeof(tests[0]))
void
nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker,
nsIWordBreaker* aWordBreaker)
{
PRBool gNoisy = PR_FALSE;
if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) {
gNoisy = PR_TRUE;
}
PRBool error = PR_FALSE;
PRInt32 testNum = 0;
SelfTestData* st = tests;
SelfTestData* last = st + NUM_TESTS;
for (; st < last; st++) {
PRUnichar* bp;
PRInt32 wordLen, contentLen;
PRBool ws, transformed;
PRBool isAsciiTest = PR_TRUE;
const PRUnichar* cp = st->text;
while (*cp) {
if (*cp > 255) {
isAsciiTest = PR_FALSE;
break;
}
cp++;
}
nsTextFragment frag(st->text);
nsTextTransformer tx(aLineBreaker, aWordBreaker);
for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) {
// Do forwards test
if (gNoisy) {
nsAutoString uc2(st->text);
printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode");
fputs(uc2, stdout);
printf("'\n");
}
tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE);
int* expectedResults = st->modes[preMode].data;
int resultsLen = st->modes[preMode].length;
while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws, &transformed))) {
if (gNoisy) {
nsAutoString tmp(bp, wordLen);
printf(" '");
fputs(tmp, stdout);
printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n",
ws ? "yes" : "no",
wordLen, *expectedResults, contentLen, tx.mOffset);
}
if (*expectedResults != wordLen) {
error = PR_TRUE;
break;
}
expectedResults++;
}
if (expectedResults != st->modes[preMode].data + resultsLen) {
if (st->modes[preMode].data[0] != 0) {
error = PR_TRUE;
}
}
// Do backwards test
if (gNoisy) {
nsAutoString uc2(st->text);
printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode");
fputs(uc2, stdout);
printf("'\n");
}
tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL,
NS_STYLE_TEXT_TRANSFORM_NONE);
expectedResults = st->modes[preMode].data + resultsLen;
while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) {
--expectedResults;
if (gNoisy) {
nsAutoString tmp(bp, wordLen);
printf(" '");
fputs(tmp, stdout);
printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n",
ws ? "yes" : "no",
wordLen, contentLen, tx.mOffset);
}
if (*expectedResults != wordLen) {
error = PR_TRUE;
break;
}
}
if (expectedResults != st->modes[preMode].data) {
if (st->modes[preMode].data[0] != 0) {
error = PR_TRUE;
}
}
if (error) {
fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum);
}
testNum++;
}
}
if (error) {
NS_ABORT();
}
}
nsresult
nsTextTransformer::Init2(const nsTextFragment* aFrag,
PRInt32 aStartingOffset,
PRUint8 aWhiteSpace,
PRUint8 aTextTransform)
{
mFrag = aFrag;
// Sanitize aStartingOffset
if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) {
aStartingOffset = 0;
}
else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(),
"bad starting offset")) {
aStartingOffset = mFrag->GetLength();
}
mOffset = aStartingOffset;
// Get the frames text style information
if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) {
mMode = ePreformatted;
}
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) {
mMode = ePreWrap;
}
mTextTransform = aTextTransform;
return NS_OK;
}
#endif /* DEBUG */