bug 299036: Fix crash in CNavDTD by moving newline stripping into the tokenizer. This reduces our reliance on the invariants that the tokenizer tries to provide. r+sr=jst a=chofmann

2024-10-12 12:55:46 +00:00 · 2005-06-29 02:46:45 +00:00 · 2005-06-29 02:46:45 +00:00 · 47de12372b
commit 47de12372b
parent 6b6f99f32b
4 changed files with 46 additions and 60 deletions
--- a/parser/htmlparser/public/nsHTMLTokens.h
+++ b/parser/htmlparser/public/nsHTMLTokens.h
@ -291,15 +291,14 @@ public:
                    nsScannerIterator& aEnd);
  virtual void Bind(const nsAString& aStr);

-  nsresult ConsumeCharacterData(PRUnichar aChar,
-                                PRBool aConservativeConsume,
+  nsresult ConsumeCharacterData(PRBool aConservativeConsume,
                                PRBool aIgnoreComments,
                                nsScanner& aScanner,
                                const nsAString& aEndTagName,
                                PRInt32 aFlag,
                                PRBool& aFlushTokens);

-  nsresult ConsumeParsedCharacterData(PRUnichar aChar,
+  nsresult ConsumeParsedCharacterData(PRBool aDiscardFirstNewline,
                                      PRBool aConservativeConsume,
                                      nsScanner& aScanner,
                                      const nsAString& aEndTagName,
--- a/parser/htmlparser/src/CNavDTD.cpp
+++ b/parser/htmlparser/src/CNavDTD.cpp
@ -1017,49 +1017,6 @@ nsresult CNavDTD::DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag){
        }//if
      }
      break;
-    case eHTMLTag_textarea:
-      {
-        // In HTML, we need to strip the first newline from the textarea's text.
-        CToken* theNextToken = mTokenizer->PeekToken();
-        if (theNextToken) {
-#ifdef DEBUG
-          eHTMLTokenTypes theType = eHTMLTokenTypes(theNextToken->GetTokenType());
-          NS_ASSERTION(eToken_text == theType, "Textareas should always have at "
-                                             "least one text token as a child.");
-#endif
-          CTextToken* text = NS_STATIC_CAST(CTextToken*, theNextToken);
-          const nsSubstring &content = text->GetStringValue();
-          PRBool chop = PR_FALSE;
-
-          if (!content.IsEmpty()) {
-            nsSubstring::const_iterator start, end;
-            content.BeginReading(start);
-            content.EndReading(end);
-
-            if (*start == nsCRT::CR) {
-              ++start;
-
-              if (start != end && *start == nsCRT::LF) {
-                ++start;
-              }
-
-              chop = PR_TRUE;
-            }
-            else if (*start == nsCRT::LF) {
-              ++start;
-              chop = PR_TRUE;
-            }
-
-            if (chop) {
-              // XXX See bug 294599 for why the nsAutoString is necessary.
-              nsAutoString chopped(Substring(start, end));
-              text->Bind(chopped);
-              ++mLineNumber;
-            }
-          }
-        }
-        break;
-      }
    default:
      break;
  }//switch 
--- a/parser/htmlparser/src/nsHTMLTokenizer.cpp
+++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp
@ -872,8 +872,7 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,
          if (isCDATA) {
            // The only tags that consume conservatively are <script> and
            // <style>, the rest all consume until the end of the document.
-            result = textToken->ConsumeCharacterData(0,
-                                                     theTag==eHTMLTag_script ||
+            result = textToken->ConsumeCharacterData(theTag==eHTMLTag_script ||
                                                     theTag==eHTMLTag_style,
                                                     theTag!=eHTMLTag_script,
                                                     aScanner,
@ -888,12 +887,13 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,
          else if (isPCDATA) {
            // Title is consumed conservatively in order to not regress
            // bug 42945
-            result = textToken->ConsumeParsedCharacterData(0,
-                                                           theTag==eHTMLTag_title,
-                                                           aScanner,
-                                                           endTagName,
-                                                           mFlags,
-                                                           done);
+            result = textToken->ConsumeParsedCharacterData(
+                                                        theTag==eHTMLTag_textarea,
+                                                        theTag==eHTMLTag_title,
+                                                        aScanner,
+                                                        endTagName,
+                                                        mFlags,
+                                                        done);

            // Note: we *don't* set aFlushTokens here.
          }
--- a/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/parser/htmlparser/src/nsHTMLTokens.cpp
@ -643,8 +643,7 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
 *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
 *  @return  error result
 */
-nsresult CTextToken::ConsumeCharacterData(PRUnichar aChar,
-                                          PRBool aConservativeConsume,
+nsresult CTextToken::ConsumeCharacterData(PRBool aConservativeConsume,
                                          PRBool aIgnoreComments,
                                          nsScanner& aScanner,
                                          const nsAString& aEndTagName,
@ -798,7 +797,7 @@ nsresult CTextToken::ConsumeCharacterData(PRUnichar aChar,
 *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
 *  @return  error result
 */
-nsresult CTextToken::ConsumeParsedCharacterData(PRUnichar aChar,
+nsresult CTextToken::ConsumeParsedCharacterData(PRBool aDiscardFirstNewline,
                                                PRBool aConservativeConsume,
                                                nsScanner& aScanner,
                                                const nsAString& aEndTagName,
@ -845,6 +844,39 @@ nsresult CTextToken::ConsumeParsedCharacterData(PRUnichar aChar,
    result = ConsumeUntil(theContent, mNewlineCount, aScanner, 
                          theEndCondition, PR_TRUE, aFlag);

+    if (aDiscardFirstNewline && 
+        (NS_SUCCEEDED(result) || !aScanner.IsIncremental()) &&
+        !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+      // Check if the very first character is a newline, and if so discard it.
+      // Note that we don't want to discard it in view source!
+      // Also note that this has to happen here (as opposed to before the
+      // ConsumeUntil) because we have to expand any entities.
+      // XXX It would be nice to be able to do this without calling
+      // writable()!
+      const nsSubstring &firstChunk = theContent.str();
+      if (!firstChunk.IsEmpty()) {
+        PRUint32 where = 0;
+        PRUnichar newline = firstChunk.First();
+
+        if (newline == kCR || newline == kNewLine) {
+          ++where;
+
+          if (firstChunk.Length() > 1) {
+            if (newline == kCR && firstChunk.CharAt(1) == kNewLine) {
+              // Handle \r\n = 1 newline.
+              ++where;
+            }
+            // Note: \n\r = 2 newlines.
+          }
+        }
+
+        if (where != 0) {
+          theContent.writable() = Substring(firstChunk, where);
+        }
+      }
+    }
+    aDiscardFirstNewline = PR_FALSE;
+
    if (NS_FAILED(result)) {
      if (kEOF == result && !aScanner.IsIncremental()) {
        aFound = PR_TRUE; // this is as good as it gets.
@ -895,9 +927,7 @@ nsresult CTextToken::ConsumeParsedCharacterData(PRUnichar aChar,
        }
      }
    }
-    // IE only consumes <!-- --> as comments in PCDATA. We'll accept a bit
-    // more in quirks mode, but lets ensure that this really is a comment
-    // start to maintain the illusion of compatability.
+    // IE only consumes <!-- --> as comments in PCDATA.
    if (Distance(currPos, endPos) >= commentStartLen) {
      nsScannerIterator start(currPos), end(currPos);
      end.advance(commentStartLen);