bug 70282: view-source loses the last tag in a document if the tag is unclosed. r=rbs sr=roc

2025-02-27 21:00:50 +00:00 · 2004-11-11 03:41:52 +00:00 · 2004-11-11 03:41:52 +00:00 · 08819824f9
commit 08819824f9
parent a4eea6f5b5
3 changed files with 65 additions and 18 deletions
--- a/parser/htmlparser/src/nsHTMLTokenizer.cpp
+++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp
@ -697,6 +697,10 @@ nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar,

  if (NS_FAILED(result)) {
    aToken->SetInError(PR_TRUE);
+
+    if (!aScanner.IsIncremental()) {
+      result = NS_OK;
+    }
  }

  aToken->SetAttributeCount(theAttrCount);
@ -854,6 +858,7 @@ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanne

  nsTokenAllocator* theAllocator=this->GetTokenAllocator();
  aToken=theAllocator->CreateTokenOfType(eToken_end,eHTMLTag_unknown);
+  PRInt32 theDequeSize=mTokenDeque.GetSize(); //remember this for later in case you have to unwind...
  nsresult result=NS_OK;
  
  if(aToken) {
@ -869,7 +874,6 @@ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanne

    if(kGreaterThan != aChar) {
      result = ConsumeAttributes(aChar, aToken, aScanner);
-      NS_ENSURE_SUCCESS(result, result);
    }
    else {
      aScanner.GetChar(aChar);
@ -883,6 +887,16 @@ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanne
        mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT;
      }
    }
+
+    // Do the same thing as we do in ConsumeStartTag. Basically, if we've run
+    // out of room in this *section* of the document, pop all of the tokens
+    // we've consumed this round and wait for more data.
+    if(NS_FAILED(result)) {
+      while(mTokenDeque.GetSize()>theDequeSize) {
+        CToken* theToken=(CToken*)mTokenDeque.Pop();
+        IF_FREE(theToken, mTokenAllocator);
+      }
+    }
  } //if
  return result;
 }
--- a/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/parser/htmlparser/src/nsHTMLTokens.cpp
@ -191,10 +191,6 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag
    }
  }
  else {
-    //added PR_TRUE to readId() call below to fix bug 46083. The problem was that the tag given
-    //was written <title_> but since we didn't respect the '_', we only saw <title>. Then 
-    //we searched for end title, which never comes (they give </title_>). 
-
    result=aScanner.ReadTagIdentifier(mTextValue);  
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }
@ -203,6 +199,11 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag
    result = aScanner.SkipWhitespace(mNewlineCount);
  }

+  if (kEOF == result && !aScanner.IsIncremental()) {
+    // Take what we can get.
+    result = NS_OK;
+  }
+
  return result;
 }

@ -285,7 +286,6 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
  if (aFlag & NS_IPARSER_FLAG_HTML) {
    nsAutoString theSubstr;
    result=aScanner.ReadTagIdentifier(theSubstr);
-    NS_ENSURE_SUCCESS(result, result);
    
    mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
    // Save the original tag string if this is user-defined or if we
@ -297,16 +297,20 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
  }
  else {
    result = aScanner.ReadTagIdentifier(mTextValue);
-    NS_ENSURE_SUCCESS(result, result);

    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }

-  if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+  if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    result = aScanner.SkipWhitespace(mNewlineCount);
    NS_ENSURE_SUCCESS(result, result);
  }

+  if (kEOF == result && !aScanner.IsIncremental()) {
+    // Take what we can get.
+    result = NS_OK;
+  }
+
  return result;
 }

@ -1674,6 +1678,11 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a
  
  if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount);
+    if (kEOF == result && wsstart != wsend) {
+      // Do this here so if this is the final token in the document, we don't
+      // lose the whitespace.
+      aScanner.BindSubstring(mTextKey, wsstart, wsend);
+    }
  }
  else {
    result = aScanner.SkipWhitespace(mNewlineCount);
@ -1694,6 +1703,11 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a

    if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      aScanner.BindSubstring(mTextKey, start, end);
+    } 
+    else if (kEOF == result && wsstart != end) {
+      //Capture all of the text (from the beginning of the whitespace to the
+      //end of the document).
+      aScanner.BindSubstring(mTextKey, wsstart, end);
    }

    //now it's time to Consume the (optional) value...
@ -1769,6 +1783,11 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a
                  }
                }
              }//if
+              else {
+                //We saw an equal sign but ran out of room looking for a value.
+                mHasEqualWithoutValue=PR_TRUE;
+                mInError=PR_TRUE;
+              }
            }//if
          }//if
          else {
@ -1819,6 +1838,19 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a
 #endif
    }
  }//if
+
+  if (kEOF == result && !aScanner.IsIncremental()) {
+    // This is our run-of-the mill "don't lose content at the end of a 
+    // document" with a slight twist: we don't want to bother returning an
+    // empty attribute key, even if this is the end of the document.
+    if (mTextKey.Length() == 0) {
+      result = NS_ERROR_HTMLPARSER_BADATTRIBUTE;
+    }
+    else {
+      result = NS_OK;
+    }
+  }
+
  return result;
 }

--- a/parser/htmlparser/src/nsScanner.cpp
+++ b/parser/htmlparser/src/nsScanner.cpp
@ -784,10 +784,11 @@ nsresult nsScanner::ReadTagIdentifier(nsString& aString) {
  current = mCurrentPosition;
  end = mEndPosition;

-  while(current != end) {
+  // Loop until we find an illegal character. Everything is then appended
+  // later.
+  while(current != end && !found) {
    theChar=*current;

-    found = PR_TRUE;
    switch(theChar) {
      case '\n':
      case '\r':
@ -800,20 +801,20 @@ nsresult nsScanner::ReadTagIdentifier(nsString& aString) {
      case '>':
      case '/':
      case '\0':
-        found = PR_FALSE;
+        found = PR_TRUE;
        break;
      default:
        break;
    }

-    if(!found) {
-      // If the current character isn't a valid character for
-      // the identifier, we're done. Append the results to
-      // the string passed in.
-      AppendUnicodeTo(mCurrentPosition, current, aString);
-      break;
+    if (!found) {
+      ++current;
    }
-    ++current;
+  }
+
+  // Don't bother appending nothing.
+  if (current != mCurrentPosition) {
+    AppendUnicodeTo(mCurrentPosition, current, aString);
  }

  // Drop NULs on the floor since nobody really likes them.