/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: NPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Netscape Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla Communicator client code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the NPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the NPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include #include "nsCOMPtr.h" #include "nsTextTransformer.h" #include "nsIContent.h" #include "nsIFrame.h" #include "nsITextContent.h" #include "nsStyleConsts.h" #include "nsILineBreaker.h" #include "nsIWordBreaker.h" #include "nsIServiceManager.h" #include "nsUnicharUtilCIID.h" #include "nsUnicharUtils.h" #include "nsICaseConversion.h" #include "prenv.h" #include "nsIPrefBranch.h" #include "nsIPrefService.h" #ifdef IBMBIDI #include "nsLayoutAtoms.h" #endif PRPackedBool nsTextTransformer::sWordSelectPrefInited = PR_FALSE; PRPackedBool nsTextTransformer::sWordSelectStopAtPunctuation = PR_FALSE; nsAutoTextBuffer::nsAutoTextBuffer() : mBuffer(mAutoBuffer), mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE) { } nsAutoTextBuffer::~nsAutoTextBuffer() { if (mBuffer && (mBuffer != mAutoBuffer)) { delete [] mBuffer; } } nsresult nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead) { PRInt32 newSize = mBufferLen * 2; if (newSize < mBufferLen + aAtLeast) { newSize = mBufferLen + aAtLeast + 100; } return GrowTo(newSize, aCopyToHead); } nsresult nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead) { if (aNewSize > mBufferLen) { PRUnichar* newBuffer = new PRUnichar[aNewSize]; if (!newBuffer) { return NS_ERROR_OUT_OF_MEMORY; } memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen], mBuffer, sizeof(PRUnichar) * mBufferLen); if (mBuffer != mAutoBuffer) { delete [] mBuffer; } mBuffer = newBuffer; mBufferLen = aNewSize; } return NS_OK; } //---------------------------------------------------------------------- static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID); static nsICaseConversion* gCaseConv = nsnull; nsresult nsTextTransformer::Initialize() { // read in our global word selection prefs if ( !sWordSelectPrefInited ) { nsCOMPtr prefBranch = do_GetService( NS_PREFSERVICE_CONTRACTID ); if ( prefBranch ) { PRBool temp = PR_FALSE; prefBranch->GetBoolPref("layout.word_select.stop_at_punctuation", &temp); sWordSelectStopAtPunctuation = temp; } sWordSelectPrefInited = PR_TRUE; } return NS_OK; } static nsresult EnsureCaseConv() { nsresult res = NS_OK; if (!gCaseConv) { res = nsServiceManager::GetService(kUnicharUtilCID, NS_GET_IID(nsICaseConversion), (nsISupports**)&gCaseConv); NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil"); NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil"); } return res; } void nsTextTransformer::Shutdown() { if (gCaseConv) { nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv); gCaseConv = nsnull; } } // For now, we have only a couple of characters to strip out. If we get // any more, change this to use a bitset to lookup into. // CH_SHY - soft hyphen (discretionary hyphen) #ifdef IBMBIDI // added BIDI formatting codes #define IS_DISCARDED(_ch) \ (((_ch) == CH_SHY) || ((_ch) == '\r') || IS_BIDI_CONTROL(_ch)) #else #define IS_DISCARDED(_ch) \ (((_ch) == CH_SHY) || ((_ch) == '\r')) #endif #define MAX_UNIBYTE 127 MOZ_DECL_CTOR_COUNTER(nsTextTransformer) nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker, nsIWordBreaker* aWordBreaker, nsIPresContext* aPresContext) : mFrag(nsnull), mOffset(0), mMode(eNormal), mLineBreaker(aLineBreaker), mWordBreaker(aWordBreaker), mBufferPos(0), mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE), mFlags(0) { MOZ_COUNT_CTOR(nsTextTransformer); aPresContext-> GetLanguageSpecificTransformType(&mLanguageSpecificTransformType); #ifdef IBMBIDI mPresContext = aPresContext; #endif if (aLineBreaker == nsnull && aWordBreaker == nsnull ) NS_ASSERTION(0, "invalid creation of nsTextTransformer"); #ifdef DEBUG static PRBool firstTime = PR_TRUE; if (firstTime) { firstTime = PR_FALSE; SelfTest(aLineBreaker, aWordBreaker, aPresContext); } #endif } nsTextTransformer::~nsTextTransformer() { MOZ_COUNT_DTOR(nsTextTransformer); } nsresult nsTextTransformer::Init(nsIFrame* aFrame, nsIContent* aContent, PRInt32 aStartingOffset, PRBool aForceArabicShaping, PRBool aLeaveAsAscii) { /* * If the document has Bidi content, check whether we need to do * Arabic shaping. * * Does the frame contains Arabic characters * (mCharType == eCharType_RightToLeftArabic)? * Are we rendering character by character (aForceArabicShaping == * PR_TRUE)? If so, we always do our own Arabic shaping, even if * the platform has native shaping support. Otherwise, we only do * shaping if the platform has no shaping support. * * We do numeric shaping in all Bidi documents. */ PRBool bidiEnabled; mPresContext->GetBidiEnabled(&bidiEnabled); if (bidiEnabled) { aFrame->GetBidiProperty(mPresContext, nsLayoutAtoms::charType, (void**)&mCharType, sizeof(mCharType)); if (mCharType == eCharType_RightToLeftArabic) { if (aForceArabicShaping) { SetNeedsArabicShaping(PR_TRUE); } else { PRBool isBidiSystem; mPresContext->GetIsBidiSystem(isBidiSystem); if (!isBidiSystem) { SetNeedsArabicShaping(PR_TRUE); } } } SetNeedsNumericShaping(PR_TRUE); } // Get the contents text content nsresult rv; nsCOMPtr tc = do_QueryInterface(aContent, &rv); if (tc.get()) { tc->GetText(&mFrag); // Sanitize aStartingOffset if (aStartingOffset < 0) { NS_WARNING("bad starting offset"); aStartingOffset = 0; } else if (aStartingOffset > mFrag->GetLength()) { NS_WARNING("bad starting offset"); aStartingOffset = mFrag->GetLength(); } mOffset = aStartingOffset; // Get the frames text style information const nsStyleText* styleText = aFrame->GetStyleText(); if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) { mMode = ePreformatted; } else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) { mMode = ePreWrap; } mTextTransform = styleText->mTextTransform; if (aLeaveAsAscii) { // See if the text fragment is 1-byte text SetLeaveAsAscii(PR_TRUE); // XXX Currently we only leave it as ascii for normal text and not for preformatted // or preformatted wrapped text or language specific transforms if (mFrag->Is2b() || (eNormal != mMode) || (mLanguageSpecificTransformType != eLanguageSpecificTransformType_None)) // We don't step down from Unicode to ascii SetLeaveAsAscii(PR_FALSE); } else SetLeaveAsAscii(PR_FALSE); } return rv; } //---------------------------------------------------------------------- // wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanNormalWhiteSpace_F() { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; for (; offset < fragLen; offset++) { PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch)) { // If character is not discardable then stop looping, otherwise // let the discarded character collapse with the other spaces. if (!IS_DISCARDED(ch)) { break; } } } // Make sure we have enough room in the transform buffer if (mBufferPos >= mTransformBuf.mBufferLen) { mTransformBuf.GrowBy(128); } if (TransformedTextIsAscii()) { unsigned char* bp = (unsigned char*)mTransformBuf.mBuffer; bp[mBufferPos++] = ' '; } else { mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' '); } return offset; } void nsTextTransformer::ConvertTransformedTextToUnicode() { // Go backwards over the characters and convert them. PRInt32 lastChar = mBufferPos - 1; unsigned char* cp1 = (unsigned char*)mTransformBuf.mBuffer + lastChar; PRUnichar* cp2 = mTransformBuf.mBuffer + lastChar; NS_ASSERTION(mTransformBuf.mBufferLen >= mBufferPos, "transform buffer is too small"); for (PRInt32 count = mBufferPos; count > 0; count--) { *cp2-- = PRUnichar(*cp1--); } } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen, PRBool* aWasTransformed) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRInt32 prevBufferPos = mBufferPos; const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset; union { unsigned char* bp1; PRUnichar* bp2; }; bp2 = mTransformBuf.GetBuffer(); if (TransformedTextIsAscii()) { bp1 += mBufferPos; } else { bp2 += mBufferPos; } for (; offset < fragLen; offset++) { unsigned char ch = *cp++; if (XP_IS_SPACE(ch)) { break; } if (CH_NBSP == ch) { ch = ' '; *aWasTransformed = PR_TRUE; } else if (IS_DISCARDED(ch)) { // Strip discarded characters from the transformed output continue; } if (ch > MAX_UNIBYTE) { // The text has a multibyte character so we can no longer leave the // text as ascii text SetHasMultibyte(PR_TRUE); if (TransformedTextIsAscii()) { SetTransformedTextIsAscii(PR_FALSE); *aWasTransformed = PR_TRUE; // Transform any existing ascii text to Unicode if (mBufferPos > 0) { ConvertTransformedTextToUnicode(); bp2 = mTransformBuf.GetBuffer() + mBufferPos; } } } if (mBufferPos >= mTransformBuf.mBufferLen) { nsresult rv = mTransformBuf.GrowBy(128); if (NS_FAILED(rv)) { // If we run out of space then just truncate the text break; } bp2 = mTransformBuf.GetBuffer(); if (TransformedTextIsAscii()) { bp1 += mBufferPos; } else { bp2 += mBufferPos; } } if (TransformedTextIsAscii()) { *bp1++ = ch; } else { *bp2++ = PRUnichar(ch); } mBufferPos++; } *aWordLen = mBufferPos - prevBufferPos; return offset; } PRInt32 nsTextTransformer::ScanNormalAsciiText_F_ForWordBreak(PRInt32* aWordLen, PRBool* aWasTransformed, PRBool aIsKeyboardSelect) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRInt32 prevBufferPos = mBufferPos; PRBool breakAfterThis = PR_FALSE; const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset; union { unsigned char* bp1; PRUnichar* bp2; }; bp2 = mTransformBuf.GetBuffer(); if (TransformedTextIsAscii()) { bp1 += mBufferPos; } else { bp2 += mBufferPos; } PRBool readingAlphaNumeric = PR_TRUE; //only used in sWordSelectStopAtPunctuation // We must know if we are starting in alpha numerics. // Treat high bit chars as alphanumeric, otherwise we get stuck on accented letters // We can't trust isalnum() results for isalnum() // Therefore we don't stop at non-ascii (high bit) punctuation, // which is just fine. The punctuation we care about is low bit. if (sWordSelectStopAtPunctuation && offset < fragLen) readingAlphaNumeric = isalnum((unsigned char)*cp) || !IS_ASCII_CHAR(*cp); for (; offset < fragLen && !breakAfterThis; offset++) { unsigned char ch = *cp++; if (CH_NBSP == ch) { ch = ' '; *aWasTransformed = PR_TRUE; if (offset == mOffset) breakAfterThis = PR_TRUE; else break; } else if (XP_IS_SPACE(ch)) { break; } else if (sWordSelectStopAtPunctuation && readingAlphaNumeric && !isalnum(ch) && IS_ASCII_CHAR(ch)) { if (!aIsKeyboardSelect) break; // For keyboard move-by-word, need to pass by at least // one alphanumeric char before stopping at punct readingAlphaNumeric = PR_FALSE; } else if (sWordSelectStopAtPunctuation && !readingAlphaNumeric && (isalnum(ch) || !IS_ASCII_CHAR(ch))) { // On some platforms, punctuation breaks for word selection break; } else if (IS_DISCARDED(ch)) { // Strip discarded characters from the transformed output continue; } if (ch > MAX_UNIBYTE) { // The text has a multibyte character so we can no longer leave the // text as ascii text SetHasMultibyte(PR_TRUE); if (TransformedTextIsAscii()) { SetTransformedTextIsAscii(PR_FALSE); *aWasTransformed = PR_TRUE; // Transform any existing ascii text to Unicode if (mBufferPos > 0) { ConvertTransformedTextToUnicode(); bp2 = mTransformBuf.GetBuffer() + mBufferPos; } } } if (mBufferPos >= mTransformBuf.mBufferLen) { nsresult rv = mTransformBuf.GrowBy(128); if (NS_FAILED(rv)) { // If we run out of space then just truncate the text break; } bp2 = mTransformBuf.GetBuffer(); if (TransformedTextIsAscii()) { bp1 += mBufferPos; } else { bp2 += mBufferPos; } } if (TransformedTextIsAscii()) { *bp1++ = ch; } else { *bp2++ = PRUnichar(ch); } mBufferPos++; } *aWordLen = mBufferPos - prevBufferPos; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak, PRInt32* aWordLen, PRBool* aWasTransformed) { const nsTextFragment* frag = mFrag; const PRUnichar* cp0 = frag->Get2b(); PRInt32 fragLen = frag->GetLength(); #ifdef IBMBIDI if (*aWordLen > 0 && *aWordLen < fragLen) { fragLen = *aWordLen; } #endif PRInt32 offset = mOffset; PRUnichar firstChar = frag->CharAt(offset++); #ifdef IBMBIDI // Need to strip BIDI controls even when those are 'firstChars'. // This doesn't seem to produce bug 14280 (or similar bugs). while (offset < fragLen && IS_BIDI_CONTROL(firstChar) ) { firstChar = frag->CharAt(offset++); } #endif // IBMBIDI if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); // Only evaluate complex breaking logic if there are more characters // beyond the first to look at. PRInt32 numChars = 1; if (offset < fragLen) { const PRUnichar* cp = cp0 + offset; PRBool breakBetween = PR_FALSE; if (aForLineBreak) { mLineBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween); } else { mWordBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween); } // don't transform the first character until after BreakInBetween is called // Kipp originally did this at the top of the function, which was too early. // see bug 14280 if (CH_NBSP == firstChar) { firstChar = ' '; *aWasTransformed = PR_TRUE; } nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1); if (NS_FAILED(rv)) { *aWordLen = 0; return offset - 1; } mTransformBuf.mBuffer[mBufferPos++] = firstChar; if (!breakBetween) { // Find next position PRBool tryNextFrag; PRUint32 next; if (aForLineBreak) { mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag); } else { mWordBreaker->NextWord(cp0, fragLen, offset, &next, &tryNextFrag); } numChars = (PRInt32) (next - (PRUint32) offset) + 1; // Since we know the number of characters we're adding grow the buffer // now before we start copying nsresult rv = mTransformBuf.GrowTo(mBufferPos + numChars); if (NS_FAILED(rv)) { numChars = mTransformBuf.GetBufferLength() - mBufferPos; } offset += numChars - 1; // 1. convert nbsp into space // 2. check for discarded characters // 3. check mHasMultibyte flag // 4. copy buffer PRUnichar* bp = &mTransformBuf.mBuffer[mBufferPos]; const PRUnichar* end = cp + numChars - 1; while (cp < end) { PRUnichar ch = *cp++; if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) { // Strip discarded characters from the transformed output numChars--; continue; } if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); *bp++ = ch; mBufferPos++; } } } else { // transform the first character // we do this here, rather than at the top of the function (like Kipp originally had it) // because if we must call BreakInBetween, then we must do so before the transformation // this is the case where BreakInBetween does not need to be called at all. // see bug 14280 if (CH_NBSP == firstChar) { firstChar = ' '; *aWasTransformed = PR_TRUE; } nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1); if (NS_FAILED(rv)) { *aWordLen = 0; return offset - 1; } mTransformBuf.mBuffer[mBufferPos++] = firstChar; } *aWordLen = numChars; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos; PRUnichar* endbp = mTransformBuf.GetBufferEnd(); PRInt32 prevBufferPos = mBufferPos; for (; offset < fragLen; offset++) { // This function is used for both Unicode and ascii strings so don't // make any assumptions about what kind of data it is PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) { if (IS_DISCARDED(ch)) { // Keep looping if this is a discarded character continue; } break; } if (bp == endbp) { PRInt32 oldLength = bp - mTransformBuf.GetBuffer(); nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBuffer() + oldLength; endbp = mTransformBuf.GetBufferEnd(); } *bp++ = ' '; mBufferPos++; } *aWordLen = mBufferPos - prevBufferPos; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanPreData_F(PRInt32* aWordLen, PRBool* aWasTransformed) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos; PRUnichar* endbp = mTransformBuf.GetBufferEnd(); PRInt32 prevBufferPos = mBufferPos; for (; offset < fragLen; offset++) { // This function is used for both Unicode and ascii strings so don't // make any assumptions about what kind of data it is PRUnichar ch = frag->CharAt(offset); if ((ch == '\t') || (ch == '\n')) { break; } if (CH_NBSP == ch) { ch = ' '; *aWasTransformed = PR_TRUE; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); if (bp == endbp) { PRInt32 oldLength = bp - mTransformBuf.GetBuffer(); nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBuffer() + oldLength; endbp = mTransformBuf.GetBufferEnd(); } *bp++ = ch; mBufferPos++; } *aWordLen = mBufferPos - prevBufferPos; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen, PRBool* aWasTransformed) { const nsTextFragment* frag = mFrag; PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos; PRUnichar* endbp = mTransformBuf.GetBufferEnd(); const unsigned char* cp = (const unsigned char*) frag->Get1b(); const unsigned char* end = cp + frag->GetLength(); PRInt32 prevBufferPos = mBufferPos; cp += mOffset; while (cp < end) { PRUnichar ch = (PRUnichar) *cp++; if ((ch == '\t') || (ch == '\n')) { cp--; break; } if (CH_NBSP == ch) { ch = ' '; *aWasTransformed = PR_TRUE; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); if (bp == endbp) { PRInt32 oldLength = bp - mTransformBuf.GetBuffer(); nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBuffer() + oldLength; endbp = mTransformBuf.GetBufferEnd(); } *bp++ = ch; mBufferPos++; } *aWordLen = mBufferPos - prevBufferPos; return cp - ((const unsigned char*)frag->Get1b()); } //---------------------------------------- static void AsciiToLowerCase(unsigned char* aText, PRInt32 aWordLen) { while (aWordLen-- > 0) { *aText = tolower(*aText); aText++; } } static void AsciiToUpperCase(unsigned char* aText, PRInt32 aWordLen) { while (aWordLen-- > 0) { *aText = toupper(*aText); aText++; } } #define kSzlig 0x00DF static PRInt32 CountGermanSzlig(const PRUnichar* aText, PRInt32 len) { PRInt32 i,cnt; for(i=0,cnt=0; i= aText) ) { if(kSzlig == *src ) { *dest-- = PRUnichar('S'); *dest-- = PRUnichar('S'); src--; } else { *dest-- = *src--; } } } void nsTextTransformer::LanguageSpecificTransform(PRUnichar* aText, PRInt32 aLen, PRBool* aWasTransformed) { if (mLanguageSpecificTransformType == eLanguageSpecificTransformType_Japanese) { for (PRInt32 i = 0; i < aLen; i++) { if (aText[i] == 0x5C) { // BACKSLASH aText[i] = 0xA5; // YEN SIGN SetHasMultibyte(PR_TRUE); *aWasTransformed = PR_TRUE; } #if 0 /* * We considered doing this, but since some systems may not have fonts * with this OVERLINE glyph, we decided not to do this. */ else if (aText[i] == 0x7E) { // TILDE aText[i] = 0x203E; // OVERLINE SetHasMultibyte(PR_TRUE); *aWasTransformed = PR_TRUE; } #endif } } /* we once do transformation for Korean, but later decide to remove it */ /* see bug 88050 for more information */ } PRUnichar* nsTextTransformer::GetNextWord(PRBool aInWord, PRInt32* aWordLenResult, PRInt32* aContentLenResult, PRBool* aIsWhiteSpaceResult, PRBool* aWasTransformed, PRBool aResetTransformBuf, PRBool aForLineBreak, PRBool aIsKeyboardSelect) { const nsTextFragment* frag = mFrag; PRInt32 fragLen = frag->GetLength(); #ifdef IBMBIDI if (*aWordLenResult > 0 && *aWordLenResult < fragLen) { fragLen = *aWordLenResult; } #endif PRInt32 offset = mOffset; PRInt32 wordLen = 0; PRBool isWhitespace = PR_FALSE; PRUnichar* result = nsnull; PRBool prevBufferPos; PRBool skippedWhitespace = PR_FALSE; // Initialize OUT parameter *aWasTransformed = PR_FALSE; // See if we should reset the current buffer position back to the // beginning of the buffer if (aResetTransformBuf) { mBufferPos = 0; SetTransformedTextIsAscii(LeaveAsAscii()); } prevBufferPos = mBufferPos; // Fix word breaking problem w/ PREFORMAT and PREWRAP // for word breaking, we should really go to the normal code if((! aForLineBreak) && (eNormal != mMode)) mMode = eNormal; while (offset < fragLen) { PRUnichar firstChar = frag->CharAt(offset); // Eat up any discarded characters before dispatching if (IS_DISCARDED(firstChar)) { offset++; continue; } switch (mMode) { default: case eNormal: if (XP_IS_SPACE(firstChar)) { offset = ScanNormalWhiteSpace_F(); // if this is just a '\n', and characters before and after it are CJK chars, // we will skip this one. if (firstChar == '\n' && offset - mOffset == 1 && mOffset > 0 && offset < fragLen) { PRUnichar lastChar = frag->CharAt(mOffset - 1); PRUnichar nextChar = frag->CharAt(offset); if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) { skippedWhitespace = PR_TRUE; --mBufferPos; mOffset = offset; continue; } } if (firstChar != ' ') { *aWasTransformed = PR_TRUE; } wordLen = 1; isWhitespace = PR_TRUE; } else if (CH_NBSP == firstChar && !aForLineBreak) { wordLen = 1; isWhitespace = PR_TRUE; *aWasTransformed = PR_TRUE; // Make sure we have enough room in the transform buffer if (mBufferPos >= mTransformBuf.mBufferLen) { mTransformBuf.GrowBy(128); } offset++; if (TransformedTextIsAscii()) { ((unsigned char*)mTransformBuf.mBuffer)[mBufferPos++] = ' '; } else { mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' '); } } else if (frag->Is2b()) { #ifdef IBMBIDI wordLen = *aWordLenResult; #endif offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed); } else { if (!aForLineBreak) offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed, aIsKeyboardSelect); else offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed); } break; case ePreformatted: if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[mBufferPos++] = firstChar; offset++; wordLen = 1; isWhitespace = PR_TRUE; } else if (frag->Is2b()) { offset = ScanPreData_F(&wordLen, aWasTransformed); } else { offset = ScanPreAsciiData_F(&wordLen, aWasTransformed); } break; case ePreWrap: if (XP_IS_SPACE(firstChar)) { if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[mBufferPos++] = firstChar; offset++; wordLen = 1; } else { offset = ScanPreWrapWhiteSpace_F(&wordLen); } isWhitespace = PR_TRUE; } else if (frag->Is2b()) { #ifdef IBMBIDI wordLen = *aWordLenResult; #endif offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed); } else { if (!aForLineBreak) offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed, aIsKeyboardSelect); else offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed); } break; } if (TransformedTextIsAscii()) { unsigned char* wordPtr = (unsigned char*)mTransformBuf.mBuffer + prevBufferPos; if (!isWhitespace) { switch (mTextTransform) { case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: *wordPtr = toupper(*wordPtr); break; case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: AsciiToLowerCase(wordPtr, wordLen); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: AsciiToUpperCase(wordPtr, wordLen); break; } NS_ASSERTION(mLanguageSpecificTransformType == eLanguageSpecificTransformType_None, "should not be ASCII for language specific transforms"); } result = (PRUnichar*)wordPtr; } else { result = &mTransformBuf.mBuffer[prevBufferPos]; if (!isWhitespace) { switch (mTextTransform) { case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if(NS_SUCCEEDED(EnsureCaseConv())) gCaseConv->ToTitle(result, result, wordLen, !aInWord); // if the first character is szlig if(kSzlig == *result) { if ((prevBufferPos + wordLen + 1) >= mTransformBuf.mBufferLen) { mTransformBuf.GrowBy(128); result = &mTransformBuf.mBuffer[prevBufferPos]; } PRUnichar* src = result + wordLen; while(src>result) { *(src+1) = *src; src--; } result[0] = PRUnichar('S'); result[1] = PRUnichar('S'); wordLen++; } break; case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if(NS_SUCCEEDED(EnsureCaseConv())) gCaseConv->ToLower(result, result, wordLen); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: { if(NS_SUCCEEDED(EnsureCaseConv())) gCaseConv->ToUpper(result, result, wordLen); // first we search for German Szlig PRInt32 szligCnt = CountGermanSzlig(result, wordLen); if(szligCnt > 0) { // Make sure we have enough room in the transform buffer if ((prevBufferPos + wordLen + szligCnt) >= mTransformBuf.mBufferLen) { mTransformBuf.GrowBy(128); result = &mTransformBuf.mBuffer[prevBufferPos]; } ReplaceGermanSzligToSS(result, wordLen, szligCnt); wordLen += szligCnt; } } break; } if (mLanguageSpecificTransformType != eLanguageSpecificTransformType_None) { LanguageSpecificTransform(result, wordLen, aWasTransformed); } if (NeedsArabicShaping()) { DoArabicShaping(result, wordLen, aWasTransformed); } if (NeedsNumericShaping()) { DoNumericShaping(result, wordLen, aWasTransformed); } } } break; } *aIsWhiteSpaceResult = isWhitespace; *aWordLenResult = wordLen; *aContentLenResult = offset - mOffset; // we need to adjust the length if a '\n' has been skip between CJK chars *aContentLenResult += (skippedWhitespace ? 1 : 0); // If the word length doesn't match the content length then we transformed // the text if ((mTextTransform != NS_STYLE_TEXT_TRANSFORM_NONE) || (*aWordLenResult != *aContentLenResult)) { *aWasTransformed = PR_TRUE; mBufferPos = prevBufferPos + *aWordLenResult; } mOffset = offset; NS_ASSERTION(mBufferPos == prevBufferPos + *aWordLenResult, "internal error"); return result; } //---------------------------------------------------------------------- // wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanNormalWhiteSpace_B() { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch)) { // If character is not discardable then stop looping, otherwise // let the discarded character collapse with the other spaces. if (!IS_DISCARDED(ch)) { break; } } } mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' '; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen, PRBool aIsKeyboardSelect) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBufferEnd(); PRUnichar* startbp = mTransformBuf.GetBuffer(); PRUnichar ch = frag->CharAt(offset - 1); // Treat high bit chars as alphanumeric, otherwise we get stuck on accented letters // We can't trust isalnum() results for isalnum() // Therefore we don't stop at non-ascii (high bit) punctuation, // which is just fine. The punctuation we care about is low bit. PRBool readingAlphaNumeric = isalnum(ch) || !IS_ASCII_CHAR(ch); while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if (CH_NBSP == ch) { ch = ' '; } if (XP_IS_SPACE(ch)) { break; } else if (IS_DISCARDED(ch)) { continue; } else if (sWordSelectStopAtPunctuation && readingAlphaNumeric && !isalnum(ch) && IS_ASCII_CHAR(ch)) { // Break on ascii punctuation break; } else if (sWordSelectStopAtPunctuation && !readingAlphaNumeric && (isalnum(ch) || !IS_ASCII_CHAR(ch))) { if (!aIsKeyboardSelect) break; readingAlphaNumeric = PR_TRUE; } if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); if (bp == startbp) { PRInt32 oldLength = mTransformBuf.mBufferLen; nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBufferEnd() - oldLength; startbp = mTransformBuf.GetBuffer(); } *--bp = ch; } *aWordLen = mTransformBuf.GetBufferEnd() - bp; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak, PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; const PRUnichar* cp0 = frag->Get2b(); PRInt32 offset = mOffset - 1; PRUnichar firstChar = frag->CharAt(offset); #ifdef IBMBIDI PRInt32 limit = (*aWordLen > 0) ? *aWordLen : 0; while (offset > limit && IS_BIDI_CONTROL(firstChar) ) { firstChar = frag->CharAt(--offset); } #endif mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar; if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); PRInt32 numChars = 1; #ifdef IBMBIDI if (offset > limit) { #else if (offset > 0) { #endif const PRUnichar* cp = cp0 + offset; PRBool breakBetween = PR_FALSE; if (aForLineBreak) { mLineBreaker->BreakInBetween(cp0, offset + 1, mTransformBuf.GetBufferEnd()-1, 1, &breakBetween); } else { mWordBreaker->BreakInBetween(cp0, offset + 1, mTransformBuf.GetBufferEnd()-1, 1, &breakBetween); } if (!breakBetween) { // Find next position PRBool tryPrevFrag; PRUint32 prev; if (aForLineBreak) { mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag); } else { mWordBreaker->PrevWord(cp0, offset, offset, &prev, &tryPrevFrag); } numChars = (PRInt32) ((PRUint32) offset - prev) + 1; // Grow buffer before copying nsresult rv = mTransformBuf.GrowTo(numChars); if (NS_FAILED(rv)) { numChars = mTransformBuf.GetBufferLength(); } // 1. convert nbsp into space // 2. check mHasMultibyte flag // 3. copy buffer PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1; const PRUnichar* end = cp - numChars + 1; while (cp > end) { PRUnichar ch = *--cp; if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); *--bp = ch; } // Recompute offset and numChars in case we stripped something offset = offset - numChars; numChars = mTransformBuf.GetBufferEnd() - bp; } } else offset--; *aWordLen = numChars; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t PRInt32 nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBufferEnd(); PRUnichar* startbp = mTransformBuf.GetBuffer(); while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) { // Keep looping if this is a discarded character if (IS_DISCARDED(ch)) { continue; } break; } if (bp == startbp) { PRInt32 oldLength = mTransformBuf.mBufferLen; nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input break; } bp = mTransformBuf.GetBufferEnd() - oldLength; startbp = mTransformBuf.GetBuffer(); } *--bp = ' '; } *aWordLen = mTransformBuf.GetBufferEnd() - bp; return offset; } // wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f PRInt32 nsTextTransformer::ScanPreData_B(PRInt32* aWordLen) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRUnichar* bp = mTransformBuf.GetBufferEnd(); PRUnichar* startbp = mTransformBuf.GetBuffer(); while (--offset >= 0) { PRUnichar ch = frag->CharAt(offset); if ((ch == '\t') || (ch == '\n')) { break; } if (CH_NBSP == ch) { ch = ' '; } else if (IS_DISCARDED(ch)) { continue; } if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE); if (bp == startbp) { PRInt32 oldLength = mTransformBuf.mBufferLen; nsresult rv = mTransformBuf.GrowBy(1000); if (NS_FAILED(rv)) { // If we run out of space (unlikely) then just chop the input offset++; break; } bp = mTransformBuf.GetBufferEnd() - oldLength; startbp = mTransformBuf.GetBuffer(); } *--bp = ch; } *aWordLen = mTransformBuf.GetBufferEnd() - bp; return offset; } //---------------------------------------- PRUnichar* nsTextTransformer::GetPrevWord(PRBool aInWord, PRInt32* aWordLenResult, PRInt32* aContentLenResult, PRBool* aIsWhiteSpaceResult, PRBool aForLineBreak, PRBool aIsKeyboardSelect) { const nsTextFragment* frag = mFrag; PRInt32 offset = mOffset; PRInt32 wordLen = 0; PRBool isWhitespace = PR_FALSE; PRUnichar* result = nsnull; // Fix word breaking problem w/ PREFORMAT and PREWRAP // for word breaking, we should really go to the normal code if((! aForLineBreak) && (eNormal != mMode)) mMode = eNormal; #ifdef IBMBIDI PRInt32 limit = (*aWordLenResult > 0) ? *aWordLenResult : 0; while (--offset >= limit) { #else while (--offset >= 0) { #endif PRUnichar firstChar = frag->CharAt(offset); // Eat up any discarded characters before dispatching if (IS_DISCARDED(firstChar)) { continue; } switch (mMode) { default: case eNormal: if (XP_IS_SPACE(firstChar)) { offset = ScanNormalWhiteSpace_B(); wordLen = 1; isWhitespace = PR_TRUE; } else if (CH_NBSP == firstChar && !aForLineBreak) { wordLen = 1; isWhitespace = PR_TRUE; mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' '; offset--; } else if (frag->Is2b()) { #ifdef IBMBIDI wordLen = *aWordLenResult; #endif offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen); } else { offset = ScanNormalAsciiText_B(&wordLen, aIsKeyboardSelect); } break; case ePreformatted: if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar; offset--; // make sure we overshoot wordLen = 1; isWhitespace = PR_TRUE; } else { offset = ScanPreData_B(&wordLen); } break; case ePreWrap: if (XP_IS_SPACE(firstChar)) { if (('\n' == firstChar) || ('\t' == firstChar)) { mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar; offset--; // make sure we overshoot wordLen = 1; } else { offset = ScanPreWrapWhiteSpace_B(&wordLen); } isWhitespace = PR_TRUE; } else if (frag->Is2b()) { #ifdef IBMBIDI wordLen = *aWordLenResult; #endif offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen); } else { offset = ScanNormalAsciiText_B(&wordLen, aIsKeyboardSelect); } break; } // Backwards scanning routines *always* overshoot by one for the // returned offset value. offset = offset + 1; result = mTransformBuf.GetBufferEnd() - wordLen; if (!isWhitespace) { switch (mTextTransform) { case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if(NS_SUCCEEDED(EnsureCaseConv())) gCaseConv->ToTitle(result, result, wordLen, !aInWord); break; case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if(NS_SUCCEEDED(EnsureCaseConv())) gCaseConv->ToLower(result, result, wordLen); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if(NS_SUCCEEDED(EnsureCaseConv())) gCaseConv->ToUpper(result, result, wordLen); break; } } break; } *aWordLenResult = wordLen; *aContentLenResult = mOffset - offset; *aIsWhiteSpaceResult = isWhitespace; mOffset = offset; return result; } void nsTextTransformer::DoArabicShaping(PRUnichar* aText, PRInt32& aTextLength, PRBool* aWasTransformed) { if (aTextLength <= 0) return; PRInt32 newLen; PRBool isVisual; mPresContext->IsVisualMode(isVisual); nsAutoString buf; buf.SetLength(aTextLength); PRUnichar* buffer = (PRUnichar*)buf.get(); ArabicShaping(aText, buf.Length(), buffer, (PRUint32 *)&newLen, !isVisual, !isVisual); aTextLength = newLen; *aWasTransformed = PR_TRUE; StripZeroWidthJoinControls(buffer, aText, aTextLength, aWasTransformed); } void nsTextTransformer::DoNumericShaping(PRUnichar* aText, PRInt32& aTextLength, PRBool* aWasTransformed) { if (aTextLength <= 0) return; PRUint32 bidiOptions; mPresContext->GetBidi(&bidiOptions); switch (GET_BIDI_OPTION_NUMERAL(bidiOptions)) { case IBMBIDI_NUMERAL_HINDI: HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI); break; case IBMBIDI_NUMERAL_ARABIC: HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC); break; case IBMBIDI_NUMERAL_REGULAR: switch (mCharType) { case eCharType_EuropeanNumber: HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC); break; case eCharType_ArabicNumber: HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI); break; default: break; } break; case IBMBIDI_NUMERAL_HINDICONTEXT: if (((GET_BIDI_OPTION_DIRECTION(bidiOptions)==IBMBIDI_TEXTDIRECTION_RTL) && (IS_ARABIC_DIGIT (aText[0]))) || (eCharType_ArabicNumber == mCharType)) HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI); else if (eCharType_EuropeanNumber == mCharType) HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC); break; case IBMBIDI_NUMERAL_NOMINAL: default: break; } } void nsTextTransformer::StripZeroWidthJoinControls(PRUnichar* aSource, PRUnichar* aTarget, PRInt32& aTextLength, PRBool* aWasTransformed) { PRUnichar *src, *dest; PRInt32 stripped = 0; src = aSource; dest = aTarget; for (PRInt32 i = 0; i < aTextLength; ++i) { while (*src == CH_ZWNJ || *src == CH_ZWJ) { ++stripped; ++src; *aWasTransformed = PR_TRUE; } *dest++ = *src++; } aTextLength -= stripped; } //---------------------------------------------------------------------- // Self test logic for this class. This will (hopefully) make sure // that the forward and backward word iterator methods continue to // function as people change things... #ifdef DEBUG struct SelfTestSection { int length; int* data; }; #define NUM_MODES 3 struct SelfTestData { const PRUnichar* text; SelfTestSection modes[NUM_MODES]; }; static PRUint8 preModeValue[NUM_MODES] = { NS_STYLE_WHITESPACE_NORMAL, NS_STYLE_WHITESPACE_PRE, NS_STYLE_WHITESPACE_MOZ_PRE_WRAP }; static PRUnichar test1text[] = { 'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t', 'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0 }; static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 }; static int test1PreResults[] = { 9, 1, 12 }; static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 }; static PRUnichar test2text[] = { 0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t', 0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0 }; static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 }; static int test2PreResults[] = { 9, 1, 13 }; static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 }; static PRUnichar test3text[] = { 0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0 }; static int test3Results[] = { 4, 1, 2, 1, 2, 1, }; static int test3PreResults[] = { 7, 1, 3, }; static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, }; static PRUnichar test4text[] = { 'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t', 'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0 }; static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 }; static int test4PreResults[] = { 10, 1, 12 }; static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 }; static PRUnichar test5text[] = { CH_SHY, 0 }; static int test5Results[] = { 0 }; static int test5PreResults[] = { 0 }; static int test5PreWrapResults[] = { 0 }; #if 0 static PRUnichar test6text[] = { 0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d, 0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!', '\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0, 0x30da, 0x30fc, 0x30b8, 0x3002, 0 }; static int test6Results[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 4, 1, 5, 1, 2, 1, 2, 2 }; static int test6PreResults[] = { 20, 1, 13 }; static int test6PreWrapResults[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 4, 1, 5, 1, 2, 1, 2, 2 }; #endif static SelfTestData tests[] = { { test1text, { { sizeof(test1Results)/sizeof(int), test1Results, }, { sizeof(test1PreResults)/sizeof(int), test1PreResults, }, { sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } } }, { test2text, { { sizeof(test2Results)/sizeof(int), test2Results, }, { sizeof(test2PreResults)/sizeof(int), test2PreResults, }, { sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } } }, { test3text, { { sizeof(test3Results)/sizeof(int), test3Results, }, { sizeof(test3PreResults)/sizeof(int), test3PreResults, }, { sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } } }, { test4text, { { sizeof(test4Results)/sizeof(int), test4Results, }, { sizeof(test4PreResults)/sizeof(int), test4PreResults, }, { sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } } }, { test5text, { { sizeof(test5Results)/sizeof(int), test5Results, }, { sizeof(test5PreResults)/sizeof(int), test5PreResults, }, { sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } } }, #if 0 { test6text, { { sizeof(test6Results)/sizeof(int), test6Results, }, { sizeof(test6PreResults)/sizeof(int), test6PreResults, }, { sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } } }, #endif }; #define NUM_TESTS (sizeof(tests) / sizeof(tests[0])) void nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker, nsIWordBreaker* aWordBreaker, nsIPresContext* aPresContext) { PRBool gNoisy = PR_FALSE; if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) { gNoisy = PR_TRUE; } PRBool error = PR_FALSE; PRInt32 testNum = 0; SelfTestData* st = tests; SelfTestData* last = st + NUM_TESTS; for (; st < last; st++) { PRUnichar* bp; PRInt32 wordLen, contentLen; PRBool ws, transformed; PRBool isAsciiTest = PR_TRUE; const PRUnichar* cp = st->text; while (*cp) { if (*cp > 255) { isAsciiTest = PR_FALSE; break; } cp++; } nsTextFragment frag(st->text); nsTextTransformer tx(aLineBreaker, aWordBreaker, aPresContext); for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) { // Do forwards test if (gNoisy) { nsAutoString uc2(st->text); printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode"); fputs(NS_ConvertUCS2toUTF8(uc2).get(), stdout); printf("'\n"); } tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE); int* expectedResults = st->modes[preMode].data; int resultsLen = st->modes[preMode].length; #ifdef IBMBIDI wordLen = -1; #endif while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws, &transformed))) { if (gNoisy) { nsAutoString tmp(bp, wordLen); printf(" '"); fputs(NS_ConvertUCS2toUTF8(tmp).get(), stdout); printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n", ws ? "yes" : "no", wordLen, *expectedResults, contentLen, tx.mOffset); } if (*expectedResults != wordLen) { error = PR_TRUE; break; } expectedResults++; #ifdef IBMBIDI wordLen = -1; #endif } if (expectedResults != st->modes[preMode].data + resultsLen) { if (st->modes[preMode].data[0] != 0) { error = PR_TRUE; } } // Do backwards test if (gNoisy) { nsAutoString uc2(st->text); printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode"); fputs(NS_ConvertUCS2toUTF8(uc2).get(), stdout); printf("'\n"); } tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL, NS_STYLE_TEXT_TRANSFORM_NONE); expectedResults = st->modes[preMode].data + resultsLen; #ifdef IBMBIDI wordLen = -1; #endif while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) { --expectedResults; if (gNoisy) { nsAutoString tmp(bp, wordLen); printf(" '"); fputs(NS_ConvertUCS2toUTF8(tmp).get(), stdout); printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n", ws ? "yes" : "no", wordLen, contentLen, tx.mOffset); } if (*expectedResults != wordLen) { error = PR_TRUE; break; } #ifdef IBMBIDI wordLen = -1; #endif } if (expectedResults != st->modes[preMode].data) { if (st->modes[preMode].data[0] != 0) { error = PR_TRUE; } } if (error) { fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum); } else if (gNoisy) { fprintf(stdout, "nsTextTransformer: self test %d succeeded\n", testNum); } testNum++; } } if (error) { NS_ABORT(); } } nsresult nsTextTransformer::Init2(const nsTextFragment* aFrag, PRInt32 aStartingOffset, PRUint8 aWhiteSpace, PRUint8 aTextTransform) { mFrag = aFrag; // Sanitize aStartingOffset if (aStartingOffset < 0) { NS_WARNING("bad starting offset"); aStartingOffset = 0; } else if (aStartingOffset > mFrag->GetLength()) { NS_WARNING("bad starting offset"); aStartingOffset = mFrag->GetLength(); } mOffset = aStartingOffset; // Get the frames text style information if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) { mMode = ePreformatted; } else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) { mMode = ePreWrap; } mTextTransform = aTextTransform; return NS_OK; } #endif /* DEBUG */