Make CTextToken::ConsumeUntil not consume too much text. Bug 258082, patch by

Blake Kaplan <mrbkap@rice.edu>, r=bzbarsky, sr=jst
2024-10-12 12:55:46 +00:00 · 2004-09-12 01:50:53 +00:00 · 2004-09-12 01:50:53 +00:00 · c266ff1e12
commit c266ff1e12
parent da46873f59
3 changed files with 47 additions and 18 deletions
--- a/parser/htmlparser/public/nsHTMLTokens.h
+++ b/parser/htmlparser/public/nsHTMLTokens.h
@ -283,7 +283,7 @@ public:
  CTextToken(const nsAString& aString);
  virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
  nsresult ConsumeUntil(PRUnichar aChar, PRBool aIgnoreComments,
-                        nsScanner& aScanner, nsString& aEndTagName,
+                        nsScanner& aScanner, const nsAString& aEndTagName,
                        PRInt32 aFlag, PRBool& aFlushTokens);
  virtual PRInt32 GetTokenType(void);
  virtual PRInt32 GetTextLength(void);
--- a/parser/htmlparser/src/nsHTMLTokenizer.cpp
+++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp
@ -752,12 +752,17 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
        
        //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) {
        if(gHTMLElements[theTag].CanContainType(kCDATA)) {
-          nsAutoString endTagName; 
-          endTagName.Assign(nsHTMLTags::GetStringValue(theTag));
+          nsDependentString endTagName(nsHTMLTags::GetStringValue(theTag)); 

          CToken*     text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text);
          CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text);
-          result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens);  //tell new token to finish consuming text...    
+
+          //tell new token to finish consuming text...    
+          result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,
+                                         aScanner,
+                                         endTagName,
+                                         mFlags,
+                                         aFlushTokens);
          
          // Fix bug 44186
          // Support XML like syntax, i.e., <script src="external.js"/> == <script src="external.js"></script>
@ -768,10 +773,31 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
          // it is.
          if((!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) &&
              !theStartToken->IsEmpty()) || aFlushTokens) {
-            theStartToken->SetEmpty(PR_FALSE); // Setting this would make cases like <script/>d.w("text");</script> work.
-            CToken* endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName);
+            // Setting this would make cases like <script/>d.w("text");</script> work.
+            theStartToken->SetEmpty(PR_FALSE);
+            // do this up here so we can just add the end token later on
            AddToken(text,result,&mTokenDeque,theAllocator);
-            AddToken(endToken,result,&mTokenDeque,theAllocator);
+
+            CToken* endToken=nsnull;
+            
+            if (NS_SUCCEEDED(result) && aFlushTokens) {
+              PRUnichar theChar;
+              // Get the <
+              result = aScanner.GetChar(theChar);
+              NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kLessThan,
+                           "CTextToken::ConsumeUntil is broken!");
+#ifdef DEBUG
+              // Ensure we have a /
+              PRUnichar tempChar;  // Don't change non-debug vars in debug-only code
+              result = aScanner.Peek(tempChar);
+              NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kForwardSlash,
+                           "CTextToken::ConsumeUntil is broken!");
+#endif
+              result = ConsumeEndTag(PRUnichar('/'),endToken,aScanner);
+            } else if (!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+              endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName);
+              AddToken(endToken,result,&mTokenDeque,theAllocator);
+            }
          }
          else {
            IF_FREE(text, mTokenAllocator);
--- a/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/parser/htmlparser/src/nsHTMLTokens.cpp
@ -489,6 +489,7 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)

 /*
 *  Consume as much clear text from scanner as possible.
+ *  The scanner is left on the < of the perceived end tag.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
@ -496,7 +497,8 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
 *  @return  error result
 */
 nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
-                                  nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){
+                                  const nsAString& aEndTagName,PRInt32 aFlag,
+                                  PRBool& aFlushTokens){
  nsresult      result=NS_OK;
  nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
  PRBool        done=PR_FALSE;
@ -584,15 +586,10 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
        }
      }

-      // Make sure to preserve the end tag's representation if needed
-      if(aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT)) {
-        CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName);
-      }
-
      aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
-      aScanner.SetPosition(gtOffset.advance(1));
+      aScanner.SetPosition(ltOffset);
      
-      // We found </SCRIPT>...permit flushing -> Ref: Bug 22485
+      // We found </SCRIPT> or </STYLE>...permit flushing -> Ref: Bug 22485
      aFlushTokens=PR_TRUE;
      done = PR_TRUE;
    }
@ -1633,9 +1630,6 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a

  nsresult result;
 
-  //I changed a bit of this method to use aRetain so that we do the right
-  //thing in viewsource. The ws/cr/lf sequences are now maintained, and viewsource looks good.
-
  nsScannerIterator wsstart, wsend;
  
  if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
@ -1750,6 +1744,15 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a

    if (NS_OK==result) {
      result=aScanner.Peek(aChar);
+
+      if (mTextValue.Length() == 0 && mTextKey.Length() == 0 && 
+          aChar == kLessThan) {
+        // This attribute is completely bogus, tell the tokenizer.
+        // This happens when we have stuff like:
+        // <script>foo()</script  <p>....
+        return NS_ERROR_HTMLPARSER_BADATTRIBUTE;
+      }
+
 #ifdef DEBUG
      mLastAttribute = (kGreaterThan == aChar || kEOF == result);
 #endif