/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ #include #include #include #include "nsScanner.h" #include "nsToken.h" #include "nsHTMLTokens.h" #include "nsIParser.h" #include "prtypes.h" #include "nsDebug.h" #include "nsHTMLTags.h" #include "nsHTMLEntities.h" #include "nsCRT.h" #include "nsStr.h" //#define GESS_MACHINE #ifdef GESS_MACHINE #include "nsEntityEx.cpp" #endif static const char* gUserdefined = "userdefined"; const PRInt32 kMAXNAMELEN=10; /************************************************************** And now for the token classes... **************************************************************/ /* * default constructor * * @update gess 3/25/98 * @param * @return */ CHTMLToken::CHTMLToken(const nsString& aName,eHTMLTags aTag) : CToken(aName) { mTypeID=aTag; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) { } /** * Setter method that changes the string value of this token * @update gess5/11/98 * @param name is a char* value containing new string value */ void CHTMLToken::SetStringValue(const char* name){ if(name) { mTextValue=name; mTypeID = NS_TagToEnum(name); } } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) { mAttributed=PR_FALSE; mEmpty=PR_FALSE; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CStartToken::CStartToken(nsString& aString,eHTMLTags aTag) : CHTMLToken(aString,aTag) { mAttributed=PR_FALSE; mEmpty=PR_FALSE; } /** * * @update gess8/4/98 * @param * @return */ void CStartToken::Reinitialize(PRInt32 aTag, const nsString& aString){ CToken::Reinitialize(aTag,aString); mAttributed=PR_FALSE; mEmpty=PR_FALSE; } /* * This method returns the typeid (the tag type) for this token. * * @update gess 3/25/98 * @param * @return */ PRInt32 CStartToken::GetTypeID(){ if(eHTMLTag_unknown==mTypeID) { nsAutoString tmp(mTextValue); char cbuf[20]; tmp.ToCString(cbuf, sizeof(cbuf)); mTypeID = NS_TagToEnum(cbuf); } return mTypeID; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CStartToken::GetClassName(void) { return "start"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CStartToken::GetTokenType(void) { return eToken_start; } /* * * * @update gess 3/25/98 * @param * @return */ void CStartToken::SetAttributed(PRBool aValue) { mAttributed=aValue; } /* * * * @update gess 3/25/98 * @param * @return */ PRBool CStartToken::IsAttributed(void) { return mAttributed; } /* * * * @update gess 3/25/98 * @param * @return */ void CStartToken::SetEmpty(PRBool aValue) { mEmpty=aValue; } /* * * * @update gess 3/25/98 * @param * @return */ PRBool CStartToken::IsEmpty(void) { return mEmpty; } static nsString& GetIdentChars(void) { static nsString gIdentChars("-0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"); return gIdentChars; } static nsString& GetNumericChars(void) { static nsString gNumChars("0123456789ABCDEFabcdef"); return gNumChars; } /* * Consume the identifier portion of the start tag * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner) { //if you're here, we've already Consumed the < char, and are //ready to Consume the rest of the open tag identifier. //Stop consuming as soon as you see a space or a '>'. //NOTE: We don't Consume the tag attributes here, nor do we eat the ">" mTextValue=aChar; nsresult result=aScanner.ReadWhile(mTextValue,GetIdentChars(),PR_TRUE,PR_FALSE); char buffer[300]; mTextValue.ToCString(buffer,sizeof(buffer)-1); mTypeID = NS_TagToEnum(buffer); if(eHTMLTag_image==mTypeID){ mTypeID=eHTMLTag_img; } //Good. Now, let's skip whitespace after the identifier, //and see if the next char is ">". If so, we have a complete //tag without attributes. if(NS_OK==result) { result=aScanner.SkipWhitespace(); if(NS_OK==result) { result=aScanner.GetChar(aChar); if(NS_OK==result) { if(kGreaterThan!=aChar) { //look for '>' //push that char back, since we apparently have attributes... result=aScanner.PutBack(aChar); mAttributed=PR_TRUE; } //if } //if }//if } return result; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CStartToken::DebugDumpSource(ostream& out) { char buffer[1000]; mTextValue.ToCString(buffer,sizeof(buffer)-1); out << "<" << buffer; if(!mAttributed) out << ">"; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) { } /* * default constructor for end token * * @update gess 3/25/98 * @param aName -- char* containing token name * @return */ CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) { } /* * Consume the identifier portion of the end tag * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner) { //if you're here, we've already Consumed the '. //NOTE: We don't Consume the tag attributes here, nor do we eat the ">" mTextValue=""; nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_FALSE); if(NS_OK==result){ char buffer[20]; PRInt32 theIndex=mTextValue.FindCharInSet(" \r\n\t\b",0); PRInt32 theMaxLen=(kNotFound==theIndex) ? sizeof(buffer)-1 : theIndex; mTextValue.ToCString(buffer,theMaxLen+1); buffer[theMaxLen]=0; mTypeID= NS_TagToEnum(buffer); result=aScanner.GetChar(aChar); //eat the closing '>; } return result; } /* * Asks the token to determine the HTMLTag type of * the token. This turns around and looks up the tag name * in the tag dictionary. * * @update gess 3/25/98 * @param * @return eHTMLTag id of this endtag */ PRInt32 CEndToken::GetTypeID(){ if(eHTMLTag_unknown==mTypeID) { nsAutoString tmp(mTextValue); char cbuf[200]; tmp.ToCString(cbuf, sizeof(cbuf)); mTypeID = NS_TagToEnum(cbuf); switch(mTypeID) { case eHTMLTag_dir: case eHTMLTag_menu: mTypeID=eHTMLTag_ul; break; default: break; } } return mTypeID; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CEndToken::GetClassName(void) { return "/end"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CEndToken::GetTokenType(void) { return eToken_end; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CEndToken::DebugDumpSource(ostream& out) { char buffer[1000]; mTextValue.ToCString(buffer,sizeof(buffer)-1); out << ""; } /* * default constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) { } /* * string based constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CTextToken::CTextToken(const nsString& aName) : CHTMLToken(aName) { mTypeID=eHTMLTag_text; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CTextToken::GetClassName(void) { return "text"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CTextToken::GetTokenType(void) { return eToken_text; } /* * Consume as much clear text from scanner as possible. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner) { static nsAutoString terminals("&<\r"); nsresult result=NS_OK; PRBool done=PR_FALSE; while((NS_OK==result) && (!done)) { result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE,PR_FALSE); if(NS_OK==result) { result=aScanner.Peek(aChar); if((kCR==aChar) && (NS_OK==result)) { result=aScanner.GetChar(aChar); //strip off the \r result=aScanner.Peek(aChar); //then see what's next. if(NS_OK==result) { switch(aChar) { case kCR: result=aScanner.GetChar(aChar); //strip off the \r mTextValue.Append("\n\n"); break; case kNewLine: //which means we saw \r\n, which becomes \n result=aScanner.GetChar(aChar); //strip off the \n //now fall through on purpose... default: mTextValue.Append("\n"); break; }//switch }//if } else done=PR_TRUE; } } return result; } /* * default constructor * * @update vidur 11/12/98 * @param aName -- string to init token name with * @return */ CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) { } /* * string based constructor * * @update vidur 11/12/98 * @param aName -- string to init token name with * @return */ CCDATASectionToken::CCDATASectionToken(const nsString& aName) : CHTMLToken(aName) { mTypeID=eHTMLTag_unknown; } /* * * * @update vidur 11/12/98 * @param * @return */ const char* CCDATASectionToken::GetClassName(void) { return "cdatasection"; } /* * * @update vidur 11/12/98 * @param * @return */ PRInt32 CCDATASectionToken::GetTokenType(void) { return eToken_cdatasection; } /* * Consume as much marked test from scanner as possible. * * @update vidur 11/12/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner) { static nsAutoString terminals("]\r"); nsresult result=NS_OK; PRBool done=PR_FALSE; while((NS_OK==result) && (!done)) { result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE,PR_FALSE); if(NS_OK==result) { result=aScanner.Peek(aChar); if((kCR==aChar) && (NS_OK==result)) { result=aScanner.GetChar(aChar); //strip off the \r result=aScanner.Peek(aChar); //then see what's next. if(NS_OK==result) { switch(aChar) { case kCR: result=aScanner.GetChar(aChar); //strip off the \r mTextValue.Append("\n\n"); break; case kNewLine: //which means we saw \r\n, which becomes \n result=aScanner.GetChar(aChar); //strip off the \n //now fall through on purpose... default: mTextValue.Append("\n"); break; } //switch } //if } else if (kRightSquareBracket==aChar) { result=aScanner.GetChar(aChar); //strip off the ] result=aScanner.Peek(aChar); //then see what's next. if((NS_OK==result) && (kRightSquareBracket==aChar)) { result=aScanner.GetChar(aChar); //strip off the second ] result=aScanner.Peek(aChar); //then see what's next. if(NS_OK==result) { if (kGreaterThan==aChar) { result=aScanner.GetChar(aChar); //strip off the > done=PR_TRUE; } else { // This isn't the end of the CDATA section so go on mTextValue.Append("]"); } }//if } else { // This isn't the end of the CDATA section so go on mTextValue.Append("]"); } } else done=PR_TRUE; } } return result; } /* * Default constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) { } /* * Copy constructor * * @update gess 3/25/98 * @param * @return */ CCommentToken::CCommentToken(const nsString& aName) : CHTMLToken(aName) { mTypeID=eHTMLTag_comment; } /* * This method consumes a comment using the (CORRECT) comment parsing * algorithm supplied by W3C. * * @update gess 01/04/99 * @param * @param * @return */ static nsresult ConsumeStrictComment(PRUnichar aChar, nsScanner& aScanner,nsString& aString) { static nsAutoString gMinus("-"); nsresult result=NS_OK; /********************************************************* NOTE: This algorithm does a fine job of handling comments when they're formatted per spec, but if they're not we don't handle them well. For example, we gack on the following: *********************************************************/ aString=" aString+=aChar; result=aScanner.ReadWhile(aString,gMinus,PR_TRUE,PR_FALSE); //get all available '---' if(NS_OK==result) { PRInt32 findpos=-1; nsAutoString temp(""); //Read to the first ending sequence '--' while((kNotFound==findpos) && (NS_OK==result)) { result=aScanner.ReadUntil(temp,kMinus,PR_TRUE); findpos=temp.RFind("--"); } aString+=temp; if(NS_OK==result) { result=aScanner.ReadWhile(aString,gMinus,PR_TRUE,PR_FALSE); //get all available '---' if(NS_OK==result) { temp="->"; result=aScanner.ReadUntil(aString,temp,PR_FALSE,PR_FALSE); } } } } // else break; //go find '>' } }//if else if(kGreaterThan==aChar) { return result; } else break; //go find '>' }//if }//while if(NS_OK==result) { //if you're here, we're consuming a "short-form" comment result=aScanner.ReadUntil(aString,kGreaterThan,PR_TRUE); } return result; } /* * This method consumes a comment using common (actually non-standard) * algorithm that seems to work against the content on the web. * * @update gess 01/04/99 * @param * @param * @return */ static nsresult ConsumeComment(PRUnichar aChar, nsScanner& aScanner,nsString& aString) { static nsAutoString gEdibles("!-"); static nsAutoString gMinus("-"); static nsAutoString gWhitespace("\b\t\n\r "); nsresult result=NS_OK; /********************************************************* NOTE: This algorithm does a fine job of handling comments commonly used, but it doesn't really consume them per spec (But then, neither does IE or Nav). *********************************************************/ aString=" aString+=aChar; PRBool done=PR_FALSE; PRInt32 findpos=kNotFound; result=aScanner.ReadWhile(aString,gMinus,PR_TRUE,PR_TRUE); //get all available '---' findpos=aString.RFind("-->"); while((kNotFound==findpos) && (NS_OK==result)) { result=aScanner.ReadUntil(aString,kMinus,PR_TRUE); if(NS_OK==result) { result=aScanner.ReadWhile(aString,gMinus,PR_TRUE,PR_FALSE); //get all available '---' if(NS_OK==result) result=aScanner.ReadWhile(aString,gWhitespace,PR_TRUE,PR_FALSE); //get all available whitespace } if(NS_OK==result) { result=aScanner.GetChar(aChar); aString+=aChar; } theRightChars.Truncate(0); aString.Right(theRightChars,5); theRightChars.StripChars(" "); findpos=theRightChars.RFind("-->"); if(kNotFound==findpos) findpos=theRightChars.RFind("!>"); } //while return result; } //if }//if }//if }//if if(NS_OK==result) { //Read up to the closing '>' result=aScanner.ReadUntil(aString,kGreaterThan,PR_TRUE); } return result; } /* * Consume the identifier portion of the comment. * Note that we've already eaten the ""); nsresult result=aScanner.ReadUntil(aString,terminals,PR_FALSE,PR_FALSE); //Let's force quotes if either the first or last char is quoted. PRUnichar theLast=aString.Last(); PRUnichar theFirst=aString.First(); if(kQuote==theLast) { if(kQuote!=theFirst) { aString.Insert(kQuote,0);; } } else if(kQuote==theFirst) { if(kQuote!=theLast) { aString+=kQuote; } } return result; } /* * Consume the key and value portions of the attribute. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner) { nsresult result=aScanner.SkipWhitespace(); //skip leading whitespace if(NS_OK==result) { result=aScanner.Peek(aChar); if(NS_OK==result) { if(kQuote==aChar) { //if you're here, handle quoted key... result=aScanner.GetChar(aChar); //skip the quote sign... if(NS_OK==result) { result=aScanner.Peek(aChar); //peek ahead to make sure the next char is a legal attr-key if(NS_OK==result) { if(nsString::IsAlpha(aChar) || nsString::IsDigit(aChar)){ mTextKey=aChar; result=ConsumeQuotedString(aChar,mTextKey,aScanner); } else { return NS_ERROR_HTMLPARSER_BADATTRIBUTE; } } //if }//if } else if(kHashsign==aChar) { result=aScanner.GetChar(aChar); //skip the hash sign... if(NS_OK==result) { mTextKey=aChar; static nsAutoString gDigits("0123456789"); result=aScanner.ReadWhile(mTextKey,gDigits,PR_TRUE,PR_FALSE); } } else { //If you're here, handle an unquoted key. //Don't forget to reduce entities inline! static nsAutoString terminals("\b\t\n\r \"=>"); result=aScanner.ReadUntil(mTextKey,terminals,PR_TRUE,PR_FALSE); } //now it's time to Consume the (optional) value... if(NS_OK==result) { result=aScanner.SkipWhitespace(); if(NS_OK==result) { result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'... if(NS_OK==result) { if(kEqual==aChar){ result=aScanner.GetChar(aChar); //skip the equal sign... if(NS_OK==result) { result=aScanner.SkipWhitespace(); //now skip any intervening whitespace if(NS_OK==result) { result=aScanner.GetChar(aChar); //and grab the next char. if(NS_OK==result) { if((kQuote==aChar) || (kApostrophe==aChar)) { mTextValue=aChar; result=ConsumeQuotedString(aChar,mTextValue,aScanner); } else if(kGreaterThan==aChar){ result=aScanner.PutBack(aChar); } else { mTextValue=aChar; //it's an alphanum attribute... result=ConsumeAttributeValueText(aChar,mTextValue,aScanner); } }//if if(NS_OK==result) result=aScanner.SkipWhitespace(); }//if }//if }//if else { //This is where we have to handle fairly busted content. //If you're here, it means we saw an attribute name, but couldn't find //the following equal sign. REALLY ugly. //My best guess is to grab the next non-ws char. We know it's not '=', //so let's see what it is. If it's a '"', then assume we're reading //from the middle of the value. Try stripping the quote and continuing... if(kQuote==aChar){ result=aScanner.SkipOver(aChar); //strip quote. } } }//if } //if }//if if(NS_OK==result) { result=aScanner.Peek(aChar); mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result)); } } //if }//if return result; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CAttributeToken::DebugDumpSource(ostream& out) { static char buffer[1000]; mTextKey.ToCString(buffer,sizeof(buffer)-1); out << " " << buffer; if(mTextValue.Length()){ mTextValue.ToCString(buffer,sizeof(buffer)-1); out << "=" << buffer; } if(mLastAttribute) out<<">"; } /* * default constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) { } /* * default constructor * * @update gess 3/25/98 * @param aName -- string value to init token name with * @return */ CWhitespaceToken::CWhitespaceToken(const nsString& aName) : CHTMLToken(aName) { mTypeID=eHTMLTag_whitespace; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CWhitespaceToken::GetClassName(void) { return "ws"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CWhitespaceToken::GetTokenType(void) { return eToken_whitespace; } /* * This general purpose method is used when you want to * consume an aribrary sequence of whitespace. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner) { mTextValue=aChar; static nsAutoString theWhitespace("\b\t "); nsresult result=aScanner.ReadWhile(mTextValue,theWhitespace,PR_FALSE,PR_FALSE); if(NS_OK==result) { mTextValue.StripChars("\r"); } return result; } /* * default constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) { } /* * default constructor * * @update gess 3/25/98 * @param aName -- string value to init token name with * @return */ CEntityToken::CEntityToken(const nsString& aName) : CHTMLToken(aName) { mTypeID=eHTMLTag_entity; #ifdef VERBOSE_DEBUG if(!VerifyEntityTable()) { cout<<"Entity table is invalid!" << endl; } #endif } /* * Consume the rest of the entity. We've already eaten the "&". * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner) { if(aChar) mTextValue=aChar; nsresult result=ConsumeEntity(aChar,mTextValue,aScanner); return result; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CEntityToken::GetClassName(void) { return "&entity"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CEntityToken::GetTokenType(void) { return eToken_entity; } /* * This general purpose method is used when you want to * consume an entity &xxxx;. Keep in mind that entities * are not reduced inline. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){ PRUnichar theChar=0; PRInt32 result=aScanner.Peek(theChar); if(NS_OK==result) { if(kLeftBrace==theChar) { //you're consuming a script entity... static nsAutoString terminals("}>"); result=aScanner.ReadUntil(aString,terminals,PR_FALSE,PR_FALSE); if(NS_OK==result) { result=aScanner.Peek(theChar); if(NS_OK==result) { if(kRightBrace==theChar) { aString+=kRightBrace; //append rightbrace, and... result=aScanner.GetChar(theChar);//yank the closing right-brace } } } } //if else { if(kHashsign==aChar) { if('X'==(toupper((char)theChar))) { result=aScanner.GetChar(theChar); aString+=theChar; } if(NS_OK==result){ result=aScanner.ReadWhile(aString,GetNumericChars(),PR_TRUE,PR_FALSE); } } else result=aScanner.ReadWhile(aString,GetIdentChars(),PR_TRUE,PR_FALSE); if(NS_OK==result) { result=aScanner.Peek(theChar); if(NS_OK==result) { if (kSemicolon == theChar) { // consume semicolon that stopped the scan result=aScanner.GetChar(theChar); } } }//if } //else } //if return result; } #define PA_REMAP_128_TO_160_ILLEGAL_NCR 1 #ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR /** * Map some illegal but commonly used numeric entities into their * appropriate unicode value. */ #define NOT_USED 0xfffd static PRUint16 PA_HackTable[] = { NOT_USED, NOT_USED, 0x201a, /* SINGLE LOW-9 QUOTATION MARK */ 0x0192, /* LATIN SMALL LETTER F WITH HOOK */ 0x201e, /* DOUBLE LOW-9 QUOTATION MARK */ 0x2026, /* HORIZONTAL ELLIPSIS */ 0x2020, /* DAGGER */ 0x2021, /* DOUBLE DAGGER */ 0x02c6, /* MODIFIER LETTER CIRCUMFLEX ACCENT */ 0x2030, /* PER MILLE SIGN */ 0x0160, /* LATIN CAPITAL LETTER S WITH CARON */ 0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ 0x0152, /* LATIN CAPITAL LIGATURE OE */ NOT_USED, 0x017D, /* CAPITAL Z HACEK */ NOT_USED, NOT_USED, 0x2018, /* LEFT SINGLE QUOTATION MARK */ 0x2019, /* RIGHT SINGLE QUOTATION MARK */ 0x201c, /* LEFT DOUBLE QUOTATION MARK */ 0x201d, /* RIGHT DOUBLE QUOTATION MARK */ 0x2022, /* BULLET */ 0x2013, /* EN DASH */ 0x2014, /* EM DASH */ 0x02dc, /* SMALL TILDE */ 0x2122, /* TRADE MARK SIGN */ 0x0161, /* LATIN SMALL LETTER S WITH CARON */ 0x203a, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ 0x0153, /* LATIN SMALL LIGATURE OE */ NOT_USED, NOT_USED, 0x0178 /* LATIN CAPITAL LETTER Y WITH DIAERESIS */ }; #endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */ /* * This method converts this entity into its underlying * unicode equivalent. * * @update gess 3/25/98 * @param aString will hold the resulting string value * @return numeric (unichar) value */ PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) { PRInt32 value=0; PRInt32 theRadix[2]={16,10}; if(mTextValue.Length()>1) { PRUnichar theChar0=mTextValue.CharAt(0); PRBool isDigit0=nsString::IsDigit(theChar0); if(kHashsign==theChar0) { PRInt32 err=0; PRUnichar theChar1=mTextValue.CharAt(1); PRBool isDigit1=nsString::IsDigit(theChar1); value=mTextValue.ToInteger(&err,theRadix[isDigit1]); if(0==err) { #ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR /* for some illegal, but popular usage */ if ((value >= 0x0080) && (value <= 0x009f)) { value = PA_HackTable[value - 0x0080]; } #endif aString.Append(PRUnichar(value)); }//if } else{ char cbuf[30]; mTextValue.ToCString(cbuf, sizeof(cbuf)-1); value = NS_EntityToUnicode(cbuf); if(-1). * That means we have to look for quote-pairs, and ignore the * content inside them. * * @update gess 7/25/98 * @param aScanner -- controller of underlying input source * @return error result */ nsresult CSkippedContentToken::Consume(PRUnichar aChar,nsScanner& aScanner) { PRBool done=PR_FALSE; nsresult result=NS_OK; nsString temp; PRUnichar theChar; //We're going to try a new algorithm here. Rather than scan for the matching //end tag like we used to do, we're now going to scan for whitespace and comments. //If we find either, just eat them. If we find text or a tag, then go to the //target endtag, or the start of another comment. static nsAutoString theWhitespace2("\b\t "); while((!done) && (NS_OK==result)) { result=aScanner.GetChar(aChar); if((NS_OK==result) && (kLessThan==aChar)) { //we're reading a tag or a comment... result=aScanner.GetChar(theChar); if((NS_OK==result) && (kExclamation==theChar)) { //read a comment... static CCommentToken theComment; result=theComment.Consume(aChar,aScanner); if(NS_OK==result) { //result=aScanner.SkipWhitespace(); temp.Append(theComment.GetStringValueXXX()); } } else { //read a tag... temp+=aChar; temp+=theChar; result=aScanner.ReadUntil(temp,kGreaterThan,PR_TRUE); } } else if(0<=theWhitespace2.BinarySearch(aChar)) { static CWhitespaceToken theWS; result=theWS.Consume(aChar,aScanner); if(NS_OK==result) { temp.Append(theWS.GetStringValueXXX()); } } else { temp+=aChar; result=aScanner.ReadUntil(temp,kLessThan,PR_FALSE); } nsAutoString temp2; temp.Right(temp2,mTextValue.Length()); done=PRBool(0==temp2.Compare(mTextValue.GetUnicode(),PR_TRUE,mTextValue.Length())); } int len=temp.Length(); temp.Truncate(len-mTextValue.Length()); mTextKey=temp; return result; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CSkippedContentToken::DebugDumpSource(ostream& out) { static char buffer[1000]; mTextKey.ToCString(buffer,sizeof(buffer)-1); out << " " << buffer; if(mLastAttribute) out<<">"; } /** * * @update gess4/25/98 * @param * @return */ const char* GetTagName(PRInt32 aTag) { const char* result = NS_EnumToTag((nsHTMLTag) aTag); if (0 == result) { if(aTag>=eHTMLTag_userdefined) result = gUserdefined; else result=0; } return result; } /** * * * @update gess 9/23/98 * @param * @return */ CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_unknown) { } /** * * * @update gess 9/23/98 * @param * @return */ CInstructionToken::CInstructionToken(const nsString& aString) : CHTMLToken(aString) { } /** * * * @update gess 9/23/98 * @param * @return */ nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner){ mTextValue="