Bug 499642 - Split the HTML5 parser into stream parser, doc.write parser and tree op executor. r=bnewman, sr=mrbkap.

This commit is contained in:
Henri Sivonen 2009-09-18 12:21:47 +03:00
parent aa1ce3f484
commit 217db1634c
38 changed files with 2235 additions and 1507 deletions

View File

@ -925,7 +925,7 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
// Set the parser as the stream listener for the document loader...
if (mParser) {
rv = CallQueryInterface(mParser, aDocListener);
rv = mParser->GetStreamListener(aDocListener);
if (NS_FAILED(rv)) {
return rv;
}

View File

@ -68,6 +68,8 @@ CPPSRCS = \
nsHtml5MetaScanner.cpp \
nsHtml5TreeOperation.cpp \
nsHtml5StateSnapshot.cpp \
nsHtml5TreeOpExecutor.cpp \
nsHtml5StreamParser.cpp \
$(NULL)
FORCE_STATIC_LIB = 1

View File

@ -36,7 +36,7 @@
#
# ***** END LICENSE BLOCK *****
SVN_BASE=http://svn.versiondude.net/whattf/htmlparser/trunk
SVN_BASE=https://whattf.svn.cvsdude.com/htmlparser/trunk
sync:: \
; ../sync-src.sh $(SVN_BASE)/src/nu/validator/htmlparser/impl

View File

@ -36,7 +36,7 @@
#
# ***** END LICENSE BLOCK *****
SVN_BASE=http://svn.versiondude.net/whattf/htmlparser/trunk
SVN_BASE=https://whattf.svn.cvsdude.com/htmlparser/trunk
libs:: \
; mkdir -p bin && \

View File

@ -77,22 +77,23 @@ public class CppTypes {
"nsIDocument", "nsTraceRefcnt", "jArray", "nsHtml5DocumentMode",
"nsHtml5ArrayCopy", "nsHtml5NamedCharacters", "nsHtml5Parser",
"nsHtml5Atoms", "nsHtml5ByteReadable", "nsHtml5TreeOperation",
"nsHtml5PendingNotification", "nsHtml5StateSnapshot", "nsHtml5StackNode" };
"nsHtml5PendingNotification", "nsHtml5StateSnapshot", "nsHtml5StackNode",
"nsHtml5TreeOpExecutor", "nsHtml5StreamParser" };
private static final String[] INCLUDES = { "prtypes", "nsIAtom",
"nsString", "nsINameSpaceManager", "nsIContent", "nsIDocument",
"nsTraceRefcnt", "jArray", "nsHtml5DocumentMode",
"nsHtml5ArrayCopy", "nsHtml5NamedCharacters",
"nsHtml5Atoms", "nsHtml5ByteReadable", };
"nsHtml5Atoms", "nsHtml5ByteReadable", "nsIUnicodeDecoder", };
private static final String[] OTHER_DECLATIONS = {};
private static final String[] TREE_BUILDER_OTHER_DECLATIONS = { "typedef nsIContent* nsIContentPtr;" };
private static final String[] TREE_BUILDER_OTHER_DECLATIONS = { };
private static final String[] NAMED_CHARACTERS_INCLUDES = { "prtypes",
"jArray", "nscore" };
private static final String[] FORWARD_DECLARATIONS = { "nsHtml5Parser", };
private static final String[] FORWARD_DECLARATIONS = { "nsHtml5StreamParser", };
private static final String[] CLASSES_THAT_NEED_SUPPLEMENT = {
"MetaScanner",
@ -174,7 +175,7 @@ public class CppTypes {
}
public String encodingDeclarationHandlerType() {
return "nsHtml5Parser*";
return "nsHtml5StreamParser*";
}
public String nodeType() {
@ -275,7 +276,7 @@ public class CppTypes {
}
public String documentModeHandlerType() {
return "nsHtml5Parser*";
return "nsHtml5TreeBuilder*";
}
public String documentModeType() {

View File

@ -303,7 +303,7 @@ public class CppVisitor extends AnnotationHelperVisitor<LocalSymbolTable> {
} else if ("HTML_LOCAL".equals(n.getName())) {
printer.print(cppTypes.localForLiteral("html"));
} else if ("documentModeHandler".equals(n.getName())) {
printer.print("parser");
printer.print("this");
} else {
String prefixedName = javaClassName + "." + n.getName();
String constant = symbolTable.cppDefinesByJavaNames.get(prefixedName);
@ -718,12 +718,7 @@ public class CppVisitor extends AnnotationHelperVisitor<LocalSymbolTable> {
}
public void visit(VariableDeclaratorId n, LocalSymbolTable arg) {
String name = n.getName();
if ("documentModeHandler".equals(name)) {
printer.print("parser");
} else {
printer.print(n.getName());
}
printer.print(n.getName());
if (noLength()) {
for (int i = 0; i < currentArrayCount; i++) {
if (inPrimitiveNoLengthFieldDeclarator) {

View File

@ -40,6 +40,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -41,8 +41,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

View File

@ -40,6 +40,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -41,8 +41,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

View File

@ -41,6 +41,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -42,8 +42,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

View File

@ -41,6 +41,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -42,8 +42,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

File diff suppressed because it is too large Load Diff

View File

@ -55,85 +55,18 @@
#include "nsIHTMLDocument.h"
#include "nsIUnicharStreamListener.h"
#include "nsCycleCollectionParticipant.h"
#include "nsAutoPtr.h"
#include "nsIInputStream.h"
#include "nsIUnicodeDecoder.h"
#include "nsICharsetDetectionObserver.h"
#include "nsDetectionConfident.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5MetaScanner.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5StreamParser.h"
#define NS_HTML5_PARSER_READ_BUFFER_SIZE 1024
#define NS_HTML5_PARSER_SNIFFING_BUFFER_SIZE 512
enum eHtml5ParserLifecycle {
/**
* The parser has told the tokenizer to start yet.
*/
NOT_STARTED = 0,
/**
* The parser has started the tokenizer and the stream hasn't ended yet.
*/
PARSING = 1,
/**
* The parser hasn't told the tokenizer to emit EOF yet, but the network
* stream has been exhausted or document.close() called.
*/
STREAM_ENDING = 2,
/**
* The parser has told the tokenizer to emit EOF.
*/
TERMINATED = 3
};
enum eBomState {
/**
* BOM sniffing hasn't started.
*/
BOM_SNIFFING_NOT_STARTED = 0,
/**
* BOM sniffing is ongoing, and the first byte of an UTF-16LE BOM has been
* seen.
*/
SEEN_UTF_16_LE_FIRST_BYTE = 1,
/**
* BOM sniffing is ongoing, and the first byte of an UTF-16BE BOM has been
* seen.
*/
SEEN_UTF_16_BE_FIRST_BYTE = 2,
/**
* BOM sniffing is ongoing, and the first byte of an UTF-8 BOM has been
* seen.
*/
SEEN_UTF_8_FIRST_BYTE = 3,
/**
* BOM sniffing is ongoing, and the first and second bytes of an UTF-8 BOM
* have been seen.
*/
SEEN_UTF_8_SECOND_BYTE = 4,
/**
* BOM sniffing was started but is now over for whatever reason.
*/
BOM_SNIFFING_OVER = 5
};
class nsHtml5Parser : public nsIParser,
public nsIStreamListener,
public nsICharsetDetectionObserver,
public nsIContentSink,
public nsContentSink {
class nsHtml5Parser : public nsIParser {
public:
NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
NS_DECL_ISUPPORTS_INHERITED
NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(nsHtml5Parser, nsContentSink)
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser)
nsHtml5Parser();
virtual ~nsHtml5Parser();
@ -174,12 +107,11 @@ class nsHtml5Parser : public nsIParser,
NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource);
/**
* Getter for backwards compat.
* Don't call. For interface compat only.
*/
NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, PRInt32& aSource)
{
aCharset = mCharset;
aSource = mCharsetSource;
NS_NOTREACHED("No one should call this.");
}
/**
@ -199,6 +131,11 @@ class nsHtml5Parser : public nsIParser,
*/
NS_IMETHOD GetDTD(nsIDTD** aDTD);
/**
* Get the stream parser for this parser
*/
NS_IMETHOD GetStreamListener(nsIStreamListener** aListener);
/**
* Unblocks parser and calls ContinueInterruptedParsing()
*/
@ -211,7 +148,7 @@ class nsHtml5Parser : public nsIParser,
NS_IMETHOD ContinueInterruptedParsing();
/**
* Don't call. For interface backwards compat only.
* Blocks the parser.
*/
NS_IMETHOD_(void) BlockParser();
@ -314,234 +251,51 @@ class nsHtml5Parser : public nsIParser,
virtual PRBool CanInterrupt();
/* End nsIParser */
//*********************************************
// These methods are callback methods used by
// net lib to let us know about our inputstream.
//*********************************************
// nsIRequestObserver methods:
NS_DECL_NSIREQUESTOBSERVER
// nsIStreamListener methods:
NS_DECL_NSISTREAMLISTENER
/**
* Fired when the continue parse event is triggered.
*/
void HandleParserContinueEvent(class nsHtml5ParserContinueEvent *);
// EncodingDeclarationHandler
/**
* Tree builder uses this to report a late <meta charset>
*/
void internalEncodingDeclaration(nsString* aEncoding);
// DocumentModeHandler
/**
* Tree builder uses this to report quirkiness of the document
*/
void documentMode(nsHtml5DocumentMode m);
// nsICharsetDetectionObserver
/**
* Chardet calls this to report the detection result
*/
NS_IMETHOD Notify(const char* aCharset, nsDetectionConfident aConf);
// nsIContentSink
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD WillParse();
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode);
/**
* Emits EOF.
*/
NS_IMETHOD DidBuildModel();
/**
* Forwards to nsContentSink
*/
NS_IMETHOD WillInterrupt();
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD WillResume();
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD SetParser(nsIParser* aParser);
/**
* No-op for backwards compat.
*/
virtual void FlushPendingNotifications(mozFlushType aType);
/**
* Sets mCharset
*/
NS_IMETHOD SetDocumentCharset(nsACString& aCharset);
/**
* Returns the document.
*/
virtual nsISupports *GetTarget();
// Not from an external interface
// Non-inherited methods
public:
// nsContentSink methods
/**
* Initializes the parser to load from a channel.
*/
virtual nsresult Initialize(nsIDocument* aDoc,
nsIURI* aURI,
nsISupports* aContainer,
nsIChannel* aChannel);
virtual nsresult ProcessBASETag(nsIContent* aContent);
virtual void UpdateChildCounts();
virtual nsresult FlushTags();
virtual void PostEvaluateScript(nsIScriptElement *aElement);
using nsContentSink::Notify;
// Non-inherited methods
/**
* <meta charset> scan failed. Try chardet if applicable. After this, the
* the parser will have some encoding even if a last resolt fallback.
*
* @param aFromSegment The current network buffer or null if the sniffing
* buffer is being flushed due to network stream ending.
* @param aCount The number of bytes in aFromSegment (ignored if
* aFromSegment is null)
* @param aWriteCount Return value for how many bytes got read from the
* buffer.
* @param aCountToSniffingLimit The number of unfilled slots in
* mSniffingBuffer
*/
nsresult FinalizeSniffing(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount,
PRUint32 aCountToSniffingLimit);
/**
* Set up the Unicode decoder and write the sniffing buffer into it
* followed by the current network buffer.
*
* @param aFromSegment The current network buffer or null if the sniffing
* buffer is being flushed due to network stream ending.
* @param aCount The number of bytes in aFromSegment (ignored if
* aFromSegment is null)
* @param aWriteCount Return value for how many bytes got read from the
* buffer.
*/
nsresult SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* Write the sniffing buffer into the Unicode decoder followed by the
* current network buffer.
*
* @param aFromSegment The current network buffer or null if the sniffing
* buffer is being flushed due to network stream ending.
* @param aCount The number of bytes in aFromSegment (ignored if
* aFromSegment is null)
* @param aWriteCount Return value for how many bytes got read from the
* buffer.
*/
nsresult WriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* Initialize the Unicode decoder, mark the BOM as the source and
* drop the sniffer.
*
* @param aCharsetName The charset name to report to the outside (UTF-16
* or UTF-8)
* @param aDecoderCharsetName The actual name for the decoder's charset
* (UTF-16BE, UTF-16LE or UTF-8; the BOM has
* been swallowed)
*/
nsresult SetupDecodingFromBom(const char* aCharsetName,
const char* aDecoderCharsetName);
/**
* True when there is a Unicode decoder already
*/
PRBool HasDecoder() {
return !!mUnicodeDecoder;
}
/**
* Push bytes from network when there is no Unicode decoder yet
*/
nsresult SniffStreamBytes(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* Push bytes from network when there is a Unicode decoder already
*/
nsresult WriteStreamBytes(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* Request event loop spin as soon as the tokenizer returns
*/
void Suspend();
/**
* Request execution of the script element when the tokenizer returns
*/
void SetScriptElement(nsIContent* aScript);
/**
* Sets up style sheet load / parse
*/
void UpdateStyleSheet(nsIContent* aElement);
// Getters and setters for fields from nsContentSink
nsIDocument* GetDocument() {
return mDocument;
inline nsHtml5Tokenizer* GetTokenizer() {
return mTokenizer;
}
nsNodeInfoManager* GetNodeInfoManager() {
return mNodeInfoManager;
}
nsIDocShell* GetDocShell() {
return mDocShell;
}
private:
/**
* Runs mScriptElement
*/
void ExecuteScript();
/**
* Posts a continue event if there isn't one already
*/
void MaybePostContinueEvent();
void DropStreamParser() {
mStreamParser = nsnull;
}
/**
* Renavigates to the document with a different charset
*/
nsresult PerformCharsetSwitch();
private:
/**
* Parse until pending data is exhausted or tree builder suspends
*/
void ParseUntilSuspend();
private:
// State variables
/**
* Call to PerformCharsetSwitch() needed
*/
PRBool mNeedsCharsetSwitch;
/**
* Whether the last character tokenized was a carriage return (for CRLF)
@ -562,75 +316,13 @@ class nsHtml5Parser : public nsIParser,
* The event loop will spin ASAP
*/
PRBool mSuspending;
/**
* Whether EOF needs to be suppressed
*/
PRBool mSuppressEOF;
/**
* The current point on parser life cycle
*/
eHtml5ParserLifecycle mLifeCycle;
// script execution
/**
* Script to run ASAP
*/
nsCOMPtr<nsIContent> mScriptElement;
/**
*
*/
PRBool mUninterruptibleDocWrite;
// Gecko integration
void* mRootContextKey;
nsCOMPtr<nsIRequest> mRequest;
nsCOMPtr<nsIRequestObserver> mObserver;
nsIRunnable* mContinueEvent; // weak ref
// encoding-related stuff
/**
* The source (confidence) of the character encoding in use
*/
PRInt32 mCharsetSource;
/**
* The character encoding in use
*/
nsCString mCharset;
/**
* The character encoding to which to switch in a late <meta> renavigation
*/
nsCString mPendingCharset;
/**
* The Unicode decoder
*/
nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
/**
* The buffer for sniffing the character encoding
*/
nsAutoArrayPtr<PRUint8> mSniffingBuffer;
/**
* The number of meaningful bytes in mSniffingBuffer
*/
PRUint32 mSniffingLength;
/**
* BOM sniffing state
*/
eBomState mBomState;
/**
* <meta> prescan implementation
*/
nsAutoPtr<nsHtml5MetaScanner> mMetaScanner;
// Portable parser objects
/**
* The first buffer in the pending UTF-16 buffer queue
@ -643,28 +335,25 @@ class nsHtml5Parser : public nsIParser,
nsHtml5UTF16Buffer* mLastBuffer; // weak ref; always points to
// a buffer of the size NS_HTML5_PARSER_READ_BUFFER_SIZE
/**
* The tree operation executor
*/
nsRefPtr<nsHtml5TreeOpExecutor> mExecutor;
/**
* The HTML5 tree builder
*/
nsAutoPtr<nsHtml5TreeBuilder> mTreeBuilder;
const nsAutoPtr<nsHtml5TreeBuilder> mTreeBuilder;
/**
* The HTML5 tokenizer
*/
nsAutoPtr<nsHtml5Tokenizer> mTokenizer;
const nsAutoPtr<nsHtml5Tokenizer> mTokenizer;
#ifdef DEBUG
/**
* For asserting stream life cycle
* The stream parser.
*/
eStreamState mStreamListenerState;
#endif
nsRefPtr<nsHtml5StreamParser> mStreamParser;
#ifdef GATHER_DOCWRITE_STATISTICS
nsHtml5StateSnapshot* mSnapshot;
static PRUint32 sUnsafeDocWrites;
static PRUint32 sTokenSafeDocWrites;
static PRUint32 sTreeSafeDocWrites;
#endif
};
#endif

View File

@ -41,8 +41,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

View File

@ -41,6 +41,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -42,8 +42,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

View File

@ -40,6 +40,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -41,8 +41,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

View File

@ -0,0 +1,628 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set sw=2 ts=2 et tw=79: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Pierre Phaneuf <pp@ludusdesign.com>
* Henri Sivonen <hsivonen@iki.fi>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsHtml5StreamParser.h"
#include "nsICharsetConverterManager.h"
#include "nsServiceManagerUtils.h"
#include "nsEncoderDecoderUtils.h"
#include "nsContentUtils.h"
#include "nsICharsetDetector.h"
#include "nsHtml5Tokenizer.h"
#include "nsIHttpChannel.h"
#include "nsHtml5Parser.h"
static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser)
NS_INTERFACE_TABLE2(nsHtml5StreamParser,
nsIStreamListener,
nsICharsetDetectionObserver)
NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTION_3(nsHtml5StreamParser, mObserver, mRequest, mOwner)
nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5Tokenizer* aTokenizer,
nsHtml5TreeOpExecutor* aExecutor,
nsHtml5Parser* aOwner)
: mFirstBuffer(new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE))
, mLastBuffer(mFirstBuffer)
, mExecutor(aExecutor)
, mTokenizer(aTokenizer)
, mOwner(aOwner)
{
}
nsHtml5StreamParser::~nsHtml5StreamParser()
{
mRequest = nsnull;
mObserver = nsnull;
mUnicodeDecoder = nsnull;
mSniffingBuffer = nsnull;
mMetaScanner = nsnull;
while (mFirstBuffer) {
nsHtml5UTF16Buffer* old = mFirstBuffer;
mFirstBuffer = mFirstBuffer->next;
delete old;
}
mExecutor = nsnull;
mTreeBuilder = nsnull;
mTokenizer = nsnull;
mOwner = nsnull;
}
nsresult
nsHtml5StreamParser::GetChannel(nsIChannel** aChannel)
{
return mRequest ? CallQueryInterface(mRequest, aChannel) :
NS_ERROR_NOT_AVAILABLE;
}
NS_IMETHODIMP
nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
{
if (aConf == eBestAnswer || aConf == eSureAnswer) {
mCharset.Assign(aCharset);
mCharsetSource = kCharsetFromAutoDetection;
mExecutor->SetDocumentCharset(mCharset);
}
return NS_OK;
}
nsresult
nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
PRUint32 aCount,
PRUint32* aWriteCount)
{
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
if (rv == NS_ERROR_UCONV_NOCONV) {
mCharset.Assign("windows-1252"); // lower case is the raw form
mCharsetSource = kCharsetFromWeakDocTypeDefault;
rv = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
mExecutor->SetDocumentCharset(mCharset);
}
NS_ENSURE_SUCCESS(rv, rv);
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
nsresult
nsHtml5StreamParser::WriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment, // can be null
PRUint32 aCount,
PRUint32* aWriteCount)
{
nsresult rv = NS_OK;
if (mSniffingBuffer) {
PRUint32 writeCount;
rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount);
NS_ENSURE_SUCCESS(rv, rv);
mSniffingBuffer = nsnull;
}
mMetaScanner = nsnull;
if (aFromSegment) {
rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount);
}
return rv;
}
nsresult
nsHtml5StreamParser::SetupDecodingFromBom(const char* aCharsetName, const char* aDecoderCharsetName)
{
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
rv = convManager->GetUnicodeDecoderRaw(aDecoderCharsetName, getter_AddRefs(mUnicodeDecoder));
NS_ENSURE_SUCCESS(rv, rv);
mCharset.Assign(aCharsetName);
mCharsetSource = kCharsetFromByteOrderMark;
mExecutor->SetDocumentCharset(mCharset);
mSniffingBuffer = nsnull;
mMetaScanner = nsnull;
mBomState = BOM_SNIFFING_OVER;
return rv;
}
nsresult
nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be null
PRUint32 aCount,
PRUint32* aWriteCount,
PRUint32 aCountToSniffingLimit)
{
// meta scan failed.
if (mCharsetSource >= kCharsetFromHintPrevDoc) {
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
// maybe try chardet now; instantiation copied from nsDOMFile
const nsAdoptingString& detectorName = nsContentUtils::GetLocalizedStringPref("intl.charset.detector");
if (!detectorName.IsEmpty()) {
nsCAutoString detectorContractID;
detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
AppendUTF16toUTF8(detectorName, detectorContractID);
nsCOMPtr<nsICharsetDetector> detector = do_CreateInstance(detectorContractID.get());
if (detector) {
nsresult rv = detector->Init(this);
NS_ENSURE_SUCCESS(rv, rv);
PRBool dontFeed = PR_FALSE;
if (mSniffingBuffer) {
rv = detector->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed);
NS_ENSURE_SUCCESS(rv, rv);
}
if (!dontFeed && aFromSegment) {
rv = detector->DoIt((const char*)aFromSegment, aCountToSniffingLimit, &dontFeed);
NS_ENSURE_SUCCESS(rv, rv);
}
rv = detector->Done();
NS_ENSURE_SUCCESS(rv, rv);
// fall thru; callback may have changed charset
} else {
NS_ERROR("Could not instantiate charset detector.");
}
}
if (mCharsetSource == kCharsetUninitialized) {
// Hopefully this case is never needed, but dealing with it anyway
mCharset.Assign("windows-1252");
mCharsetSource = kCharsetFromWeakDocTypeDefault;
mExecutor->SetDocumentCharset(mCharset);
}
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
nsresult
nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount)
{
nsresult rv = NS_OK;
PRUint32 writeCount;
for (PRUint32 i = 0; i < aCount; i++) {
switch (mBomState) {
case BOM_SNIFFING_NOT_STARTED:
NS_ASSERTION(i == 0, "Bad BOM sniffing state.");
switch (*aFromSegment) {
case 0xEF:
mBomState = SEEN_UTF_8_FIRST_BYTE;
break;
case 0xFF:
mBomState = SEEN_UTF_16_LE_FIRST_BYTE;
break;
case 0xFE:
mBomState = SEEN_UTF_16_BE_FIRST_BYTE;
break;
default:
mBomState = BOM_SNIFFING_OVER;
break;
}
break;
case SEEN_UTF_16_LE_FIRST_BYTE:
if (aFromSegment[i] == 0xFE) {
rv = SetupDecodingFromBom("UTF-16", "UTF-16LE"); // upper case is the raw form
NS_ENSURE_SUCCESS(rv, rv);
PRUint32 count = aCount - (i + 1);
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
NS_ENSURE_SUCCESS(rv, rv);
*aWriteCount = writeCount + (i + 1);
return rv;
}
mBomState = BOM_SNIFFING_OVER;
break;
case SEEN_UTF_16_BE_FIRST_BYTE:
if (aFromSegment[i] == 0xFF) {
rv = SetupDecodingFromBom("UTF-16", "UTF-16BE"); // upper case is the raw form
NS_ENSURE_SUCCESS(rv, rv);
PRUint32 count = aCount - (i + 1);
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
NS_ENSURE_SUCCESS(rv, rv);
*aWriteCount = writeCount + (i + 1);
return rv;
}
mBomState = BOM_SNIFFING_OVER;
break;
case SEEN_UTF_8_FIRST_BYTE:
if (aFromSegment[i] == 0xBB) {
mBomState = SEEN_UTF_8_SECOND_BYTE;
} else {
mBomState = BOM_SNIFFING_OVER;
}
break;
case SEEN_UTF_8_SECOND_BYTE:
if (aFromSegment[i] == 0xBF) {
rv = SetupDecodingFromBom("UTF-8", "UTF-8"); // upper case is the raw form
NS_ENSURE_SUCCESS(rv, rv);
PRUint32 count = aCount - (i + 1);
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
NS_ENSURE_SUCCESS(rv, rv);
*aWriteCount = writeCount + (i + 1);
return rv;
}
mBomState = BOM_SNIFFING_OVER;
break;
default:
goto bom_loop_end;
}
}
// if we get here, there either was no BOM or the BOM sniffing isn't complete yet
bom_loop_end:
if (!mMetaScanner) {
mMetaScanner = new nsHtml5MetaScanner();
}
if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) {
// this is the last buffer
PRUint32 countToSniffingLimit = NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
nsHtml5ByteReadable readable(aFromSegment, aFromSegment + countToSniffingLimit);
mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
if (mUnicodeDecoder) {
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
// meta scan successful
mCharsetSource = kCharsetFromMetaPrescan;
mExecutor->SetDocumentCharset(mCharset);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
return FinalizeSniffing(aFromSegment, aCount, aWriteCount, countToSniffingLimit);
}
// not the last buffer
nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
if (mUnicodeDecoder) {
// meta scan successful
mCharsetSource = kCharsetFromMetaPrescan;
mExecutor->SetDocumentCharset(mCharset);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
if (!mSniffingBuffer) {
mSniffingBuffer = new PRUint8[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE];
}
memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount);
mSniffingLength += aCount;
*aWriteCount = aCount;
return NS_OK;
}
nsresult
nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount)
{
// mLastBuffer always points to a buffer of the size NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
mLastBuffer = (mLastBuffer->next = new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE));
}
PRUint32 totalByteCount = 0;
for (;;) {
PRInt32 end = mLastBuffer->getEnd();
PRInt32 byteCount = aCount - totalByteCount;
PRInt32 utf16Count = NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE - end;
NS_ASSERTION(utf16Count, "Trying to convert into a buffer with no free space!");
nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count);
end += utf16Count;
mLastBuffer->setEnd(end);
totalByteCount += byteCount;
aFromSegment += byteCount;
NS_ASSERTION(mLastBuffer->getEnd() <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE, "The Unicode decoder wrote too much data.");
if (NS_FAILED(convResult)) {
if (totalByteCount < aCount) { // mimicking nsScanner even though this seems wrong
++totalByteCount;
++aFromSegment;
}
mLastBuffer->getBuffer()[end] = 0xFFFD;
++end;
mLastBuffer->setEnd(end);
if (end == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
mLastBuffer = (mLastBuffer->next = new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE));
}
mUnicodeDecoder->Reset();
if (totalByteCount == aCount) {
*aWriteCount = totalByteCount;
return NS_OK;
}
} else if (convResult == NS_PARTIAL_MORE_OUTPUT) {
mLastBuffer = (mLastBuffer->next = new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE));
NS_ASSERTION(totalByteCount < aCount, "The Unicode decoder has consumed too many bytes.");
} else {
NS_ASSERTION(totalByteCount == aCount, "The Unicode decoder consumed the wrong number of bytes.");
*aWriteCount = totalByteCount;
return NS_OK;
}
}
}
// nsIRequestObserver methods:
nsresult
nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
{
NS_PRECONDITION(eNone == mStreamListenerState,
"Parser's nsIStreamListener API was not setup "
"correctly in constructor.");
NS_PRECONDITION(mExecutor->GetLifeCycle() == NOT_STARTED,
"Got OnStartRequest at the wrong stage in the life cycle.");
if (mObserver) {
mObserver->OnStartRequest(aRequest, aContext);
}
#ifdef DEBUG
mStreamListenerState = eOnStart;
#endif
mRequest = aRequest;
/*
* If you move the following line, be very careful not to cause
* WillBuildModel to be called before the document has had its
* script global object set.
*/
mTokenizer->start();
mExecutor->SetLifeCycle(PARSING);
if (mCharsetSource < kCharsetFromChannel) {
// we aren't ready to commit to an encoding yet
// leave converter uninstantiated for now
return NS_OK;
}
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
NS_ENSURE_SUCCESS(rv, rv);
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
return NS_OK;
}
nsresult
nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest,
nsISupports* aContext,
nsresult status)
{
mExecutor->MaybeFlush();
NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream.");
nsresult rv = NS_OK;
if (!mUnicodeDecoder) {
PRUint32 writeCount;
rv = FinalizeSniffing(nsnull, 0, &writeCount, 0);
NS_ENSURE_SUCCESS(rv, rv);
}
switch (mExecutor->GetLifeCycle()) {
case TERMINATED:
break;
case NOT_STARTED:
NS_NOTREACHED("OnStopRequest before calling Parse() on the owner.");
break;
case STREAM_ENDING:
NS_ERROR("OnStopRequest when the stream lifecycle was already ending.");
break;
default:
mExecutor->SetLifeCycle(STREAM_ENDING);
break;
}
#ifdef DEBUG
mStreamListenerState = eOnStop;
#endif
if (!mExecutor->IsScriptExecuting()) {
ParseUntilSuspend();
}
if (mObserver) {
mObserver->OnStopRequest(aRequest, aContext, status);
}
return NS_OK;
}
// nsIStreamListener method:
/*
* This function is invoked as a result of a call to a stream's
* ReadSegments() method. It is called for each contiguous buffer
* of data in the underlying stream or pipe. Using ReadSegments
* allows us to avoid copying data to read out of the stream.
*/
NS_METHOD
nsHtml5StreamParser::ParserWriteFunc(nsIInputStream* aInStream,
void* aHtml5StreamParser,
const char* aFromSegment,
PRUint32 aToOffset,
PRUint32 aCount,
PRUint32* aWriteCount)
{
nsHtml5StreamParser* streamParser = static_cast<nsHtml5StreamParser*> (aHtml5StreamParser);
if (streamParser->HasDecoder()) {
return streamParser->WriteStreamBytes((const PRUint8*)aFromSegment, aCount, aWriteCount);
} else {
return streamParser->SniffStreamBytes((const PRUint8*)aFromSegment, aCount, aWriteCount);
}
}
nsresult
nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
nsISupports* aContext,
nsIInputStream* aInStream,
PRUint32 aSourceOffset,
PRUint32 aLength)
{
mExecutor->MaybeFlush();
NS_PRECONDITION(eOnStart == mStreamListenerState ||
eOnDataAvail == mStreamListenerState,
"Error: OnStartRequest() must be called before OnDataAvailable()");
NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream.");
PRUint32 totalRead;
nsresult rv = aInStream->ReadSegments(nsHtml5StreamParser::ParserWriteFunc,
static_cast<void*> (this),
aLength,
&totalRead);
NS_ASSERTION(totalRead == aLength, "ReadSegments read the wrong number of bytes.");
if (!mExecutor->IsScriptExecuting()) {
ParseUntilSuspend();
}
return rv;
}
void
nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
{
if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
return;
}
nsresult rv = NS_OK;
nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &rv));
if (NS_FAILED(rv)) {
return;
}
nsCAutoString newEncoding;
CopyUTF16toUTF8(*aEncoding, newEncoding);
PRBool eq;
rv = calias->Equals(newEncoding, mCharset, &eq);
if (NS_FAILED(rv)) {
return;
}
if (eq) {
mCharsetSource = kCharsetFromMetaTag; // become confident
return;
}
// XXX check HTML5 non-IANA aliases here
// The encodings are different. We want to reparse.
nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv));
if (NS_SUCCEEDED(rv)) {
nsCAutoString method;
httpChannel->GetRequestMethod(method);
// XXX does Necko have a way to renavigate POST, etc. without hitting
// the network?
if (!method.EqualsLiteral("GET")) {
// This is the old Gecko behavior but the spec disagrees.
// Don't reparse on POST.
return;
}
}
// we still want to reparse
mExecutor->NeedsCharsetSwitchTo(newEncoding);
}
void
nsHtml5StreamParser::ParseUntilSuspend()
{
NS_PRECONDITION(!mExecutor->NeedsCharsetSwitch(), "ParseUntilSuspend called when charset switch needed.");
if (mBlocked) {
return;
}
switch (mExecutor->GetLifeCycle()) {
case TERMINATED:
return;
case NOT_STARTED:
NS_NOTREACHED("Bad life cycle!");
break;
default:
break;
}
mExecutor->WillResume();
mSuspending = PR_FALSE;
for (;;) {
if (!mFirstBuffer->hasMore()) {
if (mFirstBuffer == mLastBuffer) {
switch (mExecutor->GetLifeCycle()) {
case TERMINATED:
// something like cache manisfests stopped the parse in mid-flight
return;
case PARSING:
// never release the last buffer. instead just zero its indeces for refill
mFirstBuffer->setStart(0);
mFirstBuffer->setEnd(0);
return; // no more data for now but expecting more
case STREAM_ENDING:
mDone = PR_TRUE;
if (mExecutor->ReadyToCallDidBuildModel(PR_FALSE)) {
mExecutor->DidBuildModel();
}
return; // no more data and not expecting more
default:
NS_NOTREACHED("It should be impossible to reach this.");
return;
}
} else {
nsHtml5UTF16Buffer* oldBuf = mFirstBuffer;
mFirstBuffer = mFirstBuffer->next;
delete oldBuf;
continue;
}
}
if (mBlocked || (mExecutor->GetLifeCycle() == TERMINATED)) {
return;
}
// now we have a non-empty buffer
mFirstBuffer->adjust(mLastWasCR);
mLastWasCR = PR_FALSE;
if (mFirstBuffer->hasMore()) {
mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
NS_ASSERTION(!(mExecutor->HasScriptElement() && mExecutor->NeedsCharsetSwitch()), "Can't have both script and charset switch.");
mExecutor->MaybeExecuteScript();
if (mExecutor->MaybePerformCharsetSwitch() == NS_ERROR_HTMLPARSER_STOPPARSING) {
return;
}
if (mBlocked) {
mExecutor->WillInterrupt();
return;
}
// XXX we may now have document.written stuff in the other buffer
// queue
if (mSuspending) {
mOwner->MaybePostContinueEvent();
mExecutor->WillInterrupt();
return;
}
}
continue;
}
}

View File

@ -0,0 +1,350 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Henri Sivonen <hsivonen@iki.fi>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsHtml5StreamParser_h__
#define nsHtml5StreamParser_h__
#include "nsAutoPtr.h"
#include "nsCOMPtr.h"
#include "nsIStreamListener.h"
#include "nsICharsetDetectionObserver.h"
#include "nsHtml5MetaScanner.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsIInputStream.h"
#include "nsICharsetAlias.h"
class nsHtml5Parser;
#define NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE 1024
#define NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE 512
enum eBomState {
/**
* BOM sniffing hasn't started.
*/
BOM_SNIFFING_NOT_STARTED = 0,
/**
* BOM sniffing is ongoing, and the first byte of an UTF-16LE BOM has been
* seen.
*/
SEEN_UTF_16_LE_FIRST_BYTE = 1,
/**
* BOM sniffing is ongoing, and the first byte of an UTF-16BE BOM has been
* seen.
*/
SEEN_UTF_16_BE_FIRST_BYTE = 2,
/**
* BOM sniffing is ongoing, and the first byte of an UTF-8 BOM has been
* seen.
*/
SEEN_UTF_8_FIRST_BYTE = 3,
/**
* BOM sniffing is ongoing, and the first and second bytes of an UTF-8 BOM
* have been seen.
*/
SEEN_UTF_8_SECOND_BYTE = 4,
/**
* BOM sniffing was started but is now over for whatever reason.
*/
BOM_SNIFFING_OVER = 5
};
class nsHtml5StreamParser : public nsIStreamListener,
public nsICharsetDetectionObserver {
public:
NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5StreamParser, nsIStreamListener)
nsHtml5StreamParser(nsHtml5Tokenizer* aTokenizer,
nsHtml5TreeOpExecutor* aExecutor,
nsHtml5Parser* aOwner);
virtual ~nsHtml5StreamParser();
// nsIRequestObserver methods:
NS_DECL_NSIREQUESTOBSERVER
// nsIStreamListener methods:
NS_DECL_NSISTREAMLISTENER
// nsICharsetDetectionObserver
/**
* Chardet calls this to report the detection result
*/
NS_IMETHOD Notify(const char* aCharset, nsDetectionConfident aConf);
// EncodingDeclarationHandler
/**
* Tree builder uses this to report a late <meta charset>
*/
void internalEncodingDeclaration(nsString* aEncoding);
// Not from an external interface
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @param aCharset the charset of a document
* @param aCharsetSource the source of the charset
*/
inline void SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource) {
mCharset = aCharset;
mCharsetSource = aSource;
}
inline void SetObserver(nsIRequestObserver* aObserver) {
mObserver = aObserver;
}
nsresult GetChannel(nsIChannel** aChannel);
inline void Block() {
mBlocked = PR_TRUE;
}
inline void Unblock() {
mBlocked = PR_FALSE;
}
inline void Suspend() {
mSuspending = PR_TRUE;
}
void ParseUntilSuspend();
PRBool IsDone() {
return mDone;
}
private:
static NS_METHOD ParserWriteFunc(nsIInputStream* aInStream,
void* aHtml5StreamParser,
const char* aFromSegment,
PRUint32 aToOffset,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* True when there is a Unicode decoder already
*/
inline PRBool HasDecoder() {
return !!mUnicodeDecoder;
}
/**
* Push bytes from network when there is no Unicode decoder yet
*/
nsresult SniffStreamBytes(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* Push bytes from network when there is a Unicode decoder already
*/
nsresult WriteStreamBytes(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* <meta charset> scan failed. Try chardet if applicable. After this, the
* the parser will have some encoding even if a last resolt fallback.
*
* @param aFromSegment The current network buffer or null if the sniffing
* buffer is being flushed due to network stream ending.
* @param aCount The number of bytes in aFromSegment (ignored if
* aFromSegment is null)
* @param aWriteCount Return value for how many bytes got read from the
* buffer.
* @param aCountToSniffingLimit The number of unfilled slots in
* mSniffingBuffer
*/
nsresult FinalizeSniffing(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount,
PRUint32 aCountToSniffingLimit);
/**
* Set up the Unicode decoder and write the sniffing buffer into it
* followed by the current network buffer.
*
* @param aFromSegment The current network buffer or null if the sniffing
* buffer is being flushed due to network stream ending.
* @param aCount The number of bytes in aFromSegment (ignored if
* aFromSegment is null)
* @param aWriteCount Return value for how many bytes got read from the
* buffer.
*/
nsresult SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* Write the sniffing buffer into the Unicode decoder followed by the
* current network buffer.
*
* @param aFromSegment The current network buffer or null if the sniffing
* buffer is being flushed due to network stream ending.
* @param aCount The number of bytes in aFromSegment (ignored if
* aFromSegment is null)
* @param aWriteCount Return value for how many bytes got read from the
* buffer.
*/
nsresult WriteSniffingBufferAndCurrentSegment(const PRUint8* aFromSegment,
PRUint32 aCount,
PRUint32* aWriteCount);
/**
* Initialize the Unicode decoder, mark the BOM as the source and
* drop the sniffer.
*
* @param aCharsetName The charset name to report to the outside (UTF-16
* or UTF-8)
* @param aDecoderCharsetName The actual name for the decoder's charset
* (UTF-16BE, UTF-16LE or UTF-8; the BOM has
* been swallowed)
*/
nsresult SetupDecodingFromBom(const char* aCharsetName,
const char* aDecoderCharsetName);
nsCOMPtr<nsIRequest> mRequest;
nsCOMPtr<nsIRequestObserver> mObserver;
/**
* The Unicode decoder
*/
nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
/**
* The buffer for sniffing the character encoding
*/
nsAutoArrayPtr<PRUint8> mSniffingBuffer;
/**
* The number of meaningful bytes in mSniffingBuffer
*/
PRUint32 mSniffingLength;
/**
* BOM sniffing state
*/
eBomState mBomState;
/**
* <meta> prescan implementation
*/
nsAutoPtr<nsHtml5MetaScanner> mMetaScanner;
// encoding-related stuff
/**
* The source (confidence) of the character encoding in use
*/
PRInt32 mCharsetSource;
/**
* The character encoding in use
*/
nsCString mCharset;
// Portable parser objects
/**
* The first buffer in the pending UTF-16 buffer queue
*/
nsHtml5UTF16Buffer* mFirstBuffer; // manually managed strong ref
/**
* The last buffer in the pending UTF-16 buffer queue
*/
nsHtml5UTF16Buffer* mLastBuffer; // weak ref; always points to
// a buffer of the size NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
/**
* The tree operation executor
*/
nsHtml5TreeOpExecutor* mExecutor;
/**
* The HTML5 tree builder
*/
nsHtml5TreeBuilder* mTreeBuilder;
/**
* The HTML5 tokenizer
*/
nsHtml5Tokenizer* mTokenizer;
nsCOMPtr<nsHtml5Parser> mOwner;
/**
* Whether the last character tokenized was a carriage return (for CRLF)
*/
PRBool mLastWasCR;
/**
* The parser is blocking on a script
*/
PRBool mBlocked;
/**
* The event loop will spin ASAP
*/
PRBool mSuspending;
/**
* Whether the stream parser is done
*/
PRBool mDone;
#ifdef DEBUG
/**
* For asserting stream life cycle
*/
eStreamState mStreamListenerState;
#endif
};
#endif // nsHtml5StreamParser_h__

View File

@ -43,6 +43,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5MetaScanner.h"
@ -3331,7 +3332,7 @@ nsHtml5Tokenizer::isInDataState()
}
void
nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5Parser* encodingDeclarationHandler)
nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler)
{
this->encodingDeclarationHandler = encodingDeclarationHandler;
}

View File

@ -44,8 +44,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5TreeBuilder;
class nsHtml5MetaScanner;
@ -81,7 +82,7 @@ class nsHtml5Tokenizer
static jArray<PRUnichar,PRInt32> NOFRAMES_ARR;
protected:
nsHtml5TreeBuilder* tokenHandler;
nsHtml5Parser* encodingDeclarationHandler;
nsHtml5StreamParser* encodingDeclarationHandler;
PRBool lastCR;
PRInt32 stateSave;
private:
@ -264,7 +265,7 @@ class nsHtml5Tokenizer
PRInt32 getLine();
PRInt32 getCol();
PRBool isInDataState();
void setEncodingDeclarationHandler(nsHtml5Parser* encodingDeclarationHandler);
void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
static void initializeStatics();
static void releaseStatics();
};

View File

@ -49,6 +49,8 @@
#include "nsHtml5PendingNotification.h"
#include "nsHtml5StateSnapshot.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5StreamParser.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5MetaScanner.h"
@ -2691,8 +2693,8 @@ void
nsHtml5TreeBuilder::documentModeInternal(nsHtml5DocumentMode m, nsString* publicIdentifier, nsString* systemIdentifier, PRBool html4SpecificAdditionalErrorChecks)
{
quirks = (m == QUIRKS_MODE);
if (!!parser) {
parser->documentMode(m);
if (!!this) {
this->documentMode(m);
}
}

View File

@ -50,8 +50,10 @@
#include "nsHtml5PendingNotification.h"
#include "nsHtml5StateSnapshot.h"
#include "nsHtml5StackNode.h"
#include "nsHtml5TreeOpExecutor.h"
#include "nsHtml5StreamParser.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5MetaScanner;
@ -62,7 +64,6 @@ class nsHtml5UTF16Buffer;
class nsHtml5StateSnapshot;
class nsHtml5Portability;
typedef nsIContent* nsIContentPtr;
class nsHtml5TreeBuilder
{
@ -75,7 +76,7 @@ class nsHtml5TreeBuilder
protected:
nsHtml5Tokenizer* tokenizer;
private:
nsHtml5Parser* parser;
nsHtml5TreeBuilder* documentModeHandler;
PRBool scriptingEnabled;
PRBool needToDropLF;
PRBool fragment;

View File

@ -54,24 +54,17 @@
#include "nsTraceRefcnt.h"
#include "mozAutoDocUpdate.h"
#include "nsIScriptElement.h"
#define NS_HTML5_TREE_BUILDER_MAX_QUEUE_TIME 3000UL // milliseconds
#define NS_HTML5_TREE_BUILDER_DEFAULT_QUEUE_LENGTH 200
#define NS_HTML5_TREE_BUILDER_MIN_QUEUE_LENGTH 100
#define NS_HTML5_TREE_BUILDER_MAX_TIME_WITHOUT_FLUSH 5000 // milliseconds
#include "nsIDTD.h"
// this really should be autogenerated...
jArray<PRUnichar,PRInt32> nsHtml5TreeBuilder::ISINDEX_PROMPT = jArray<PRUnichar,PRInt32>();
nsHtml5TreeBuilder::nsHtml5TreeBuilder(nsHtml5Parser* aParser)
: parser(aParser)
, scriptingEnabled(PR_FALSE)
nsHtml5TreeBuilder::nsHtml5TreeBuilder(nsHtml5TreeOpExecutor* aExec)
: scriptingEnabled(PR_FALSE)
, fragment(PR_FALSE)
, contextNode(nsnull)
, formPointer(nsnull)
, headPointer(nsnull)
, mNeedsFlush(PR_FALSE)
, mFlushTimer(do_CreateInstance("@mozilla.org/timer;1"))
, mHasProcessedBase(PR_FALSE)
, mExecutor(aExec)
#ifdef DEBUG
, mActive(PR_FALSE)
#endif
@ -84,16 +77,13 @@ nsHtml5TreeBuilder::~nsHtml5TreeBuilder()
MOZ_COUNT_DTOR(nsHtml5TreeBuilder);
NS_ASSERTION(!mActive, "nsHtml5TreeBuilder deleted without ever calling end() on it!");
mOpQueue.Clear();
if (mFlushTimer) {
mFlushTimer->Cancel(); // XXX why is this even necessary? it is, though.
}
}
nsIContent*
nsHtml5TreeBuilder::createElement(PRInt32 aNamespace, nsIAtom* aName, nsHtml5HtmlAttributes* aAttributes)
{
nsIContent* newContent;
nsCOMPtr<nsINodeInfo> nodeInfo = parser->GetNodeInfoManager()->GetNodeInfo(aName, nsnull, aNamespace);
nsCOMPtr<nsINodeInfo> nodeInfo = mExecutor->GetNodeInfoManager()->GetNodeInfo(aName, nsnull, aNamespace);
NS_ASSERTION(nodeInfo, "Got null nodeinfo.");
NS_NewElement(&newContent, nodeInfo->NamespaceID(), nodeInfo, PR_TRUE);
NS_ASSERTION(newContent, "Element creation created null pointer.");
@ -181,7 +171,7 @@ void
nsHtml5TreeBuilder::insertFosterParentedCharacters(PRUnichar* aBuffer, PRInt32 aStart, PRInt32 aLength, nsIContent* aTable, nsIContent* aStackParent)
{
nsCOMPtr<nsIContent> text;
NS_NewTextNode(getter_AddRefs(text), parser->GetNodeInfoManager());
NS_NewTextNode(getter_AddRefs(text), mExecutor->GetNodeInfoManager());
// XXX nsresult and comment null check?
text->SetText(aBuffer + aStart, aLength, PR_FALSE);
// XXX nsresult
@ -202,7 +192,7 @@ void
nsHtml5TreeBuilder::appendCharacters(nsIContent* aParent, PRUnichar* aBuffer, PRInt32 aStart, PRInt32 aLength)
{
nsCOMPtr<nsIContent> text;
NS_NewTextNode(getter_AddRefs(text), parser->GetNodeInfoManager());
NS_NewTextNode(getter_AddRefs(text), mExecutor->GetNodeInfoManager());
// XXX nsresult and comment null check?
text->SetText(aBuffer + aStart, aLength, PR_FALSE);
// XXX nsresult
@ -215,7 +205,7 @@ void
nsHtml5TreeBuilder::appendComment(nsIContent* aParent, PRUnichar* aBuffer, PRInt32 aStart, PRInt32 aLength)
{
nsCOMPtr<nsIContent> comment;
NS_NewCommentNode(getter_AddRefs(comment), parser->GetNodeInfoManager());
NS_NewCommentNode(getter_AddRefs(comment), mExecutor->GetNodeInfoManager());
// XXX nsresult and comment null check?
comment->SetText(aBuffer + aStart, aLength, PR_FALSE);
// XXX nsresult
@ -228,7 +218,7 @@ void
nsHtml5TreeBuilder::appendCommentToDocument(PRUnichar* aBuffer, PRInt32 aStart, PRInt32 aLength)
{
nsCOMPtr<nsIContent> comment;
NS_NewCommentNode(getter_AddRefs(comment), parser->GetNodeInfoManager());
NS_NewCommentNode(getter_AddRefs(comment), mExecutor->GetNodeInfoManager());
// XXX nsresult and comment null check?
comment->SetText(aBuffer + aStart, aLength, PR_FALSE);
// XXX nsresult
@ -260,14 +250,15 @@ void
nsHtml5TreeBuilder::start(PRBool fragment)
{
// XXX check that timer creation didn't fail in constructor
if (fragment) {
mHasProcessedBase = PR_TRUE;
} else {
mHasProcessedBase = PR_FALSE;
parser->WillBuildModelImpl();
parser->GetDocument()->BeginLoad(); // XXX fragment?
if (!fragment) {
/*
* If you move the following line, be very careful not to cause
* WillBuildModel to be called before the document has had its
* script global object set.
*/
mExecutor->WillBuildModel(eDTDMode_unknown);
}
mNeedsFlush = PR_FALSE;
mExecutor->Start();
#ifdef DEBUG
mActive = PR_TRUE;
#endif
@ -276,18 +267,11 @@ nsHtml5TreeBuilder::start(PRBool fragment)
void
nsHtml5TreeBuilder::end()
{
mFlushTimer->Cancel();
mExecutor->End();
mOpQueue.Clear();
#ifdef DEBUG
mActive = PR_FALSE;
#endif
#ifdef DEBUG_hsivonen
printf("MAX INSERTION BATCH LEN: %d\n", sInsertionBatchMaxLength);
printf("MAX NOTIFICATION BATCH LEN: %d\n", sAppendBatchMaxSize);
if (sAppendBatchExaminations != 0) {
printf("AVERAGE SLOTS EXAMINED: %d\n", sAppendBatchSlotsExamined / sAppendBatchExaminations);
}
#endif
}
void
@ -299,7 +283,7 @@ nsHtml5TreeBuilder::appendDoctypeToDocument(nsIAtom* aName, nsString* aPublicId,
nsAutoString voidString;
voidString.SetIsVoid(PR_TRUE);
NS_NewDOMDocumentType(getter_AddRefs(docType),
parser->GetNodeInfoManager(),
mExecutor->GetNodeInfoManager(),
nsnull,
aName,
nsnull,
@ -350,7 +334,7 @@ nsHtml5TreeBuilder::elementPushed(PRInt32 aNamespace, nsIAtom* aName, nsIContent
}
}
#endif
MaybeSuspend();
mExecutor->MaybeSuspend();
}
void
@ -359,14 +343,14 @@ nsHtml5TreeBuilder::elementPopped(PRInt32 aNamespace, nsIAtom* aName, nsIContent
NS_ASSERTION(aNamespace == kNameSpaceID_XHTML || aNamespace == kNameSpaceID_SVG || aNamespace == kNameSpaceID_MathML, "Element isn't HTML, SVG or MathML!");
NS_ASSERTION(aName, "Element doesn't have local name!");
NS_ASSERTION(aElement, "No element!");
MaybeSuspend();
mExecutor->MaybeSuspend();
if (aNamespace == kNameSpaceID_MathML) {
return;
}
// we now have only SVG and HTML
if (aName == nsHtml5Atoms::script) {
requestSuspension();
parser->SetScriptElement(aElement);
mExecutor->SetScriptElement(aElement);
return;
}
if (aName == nsHtml5Atoms::title) {
@ -455,59 +439,10 @@ nsHtml5TreeBuilder::accumulateCharacters(PRUnichar* aBuf, PRInt32 aStart, PRInt3
charBufferLen = newFillLen;
}
static void
TimerCallbackFunc(nsITimer* aTimer, void* aClosure)
{
(static_cast<nsHtml5TreeBuilder*> (aClosure))->DeferredTimerFlush();
}
void
nsHtml5TreeBuilder::Flush()
{
mNeedsFlush = PR_FALSE;
MOZ_AUTO_DOC_UPDATE(parser->GetDocument(), UPDATE_CONTENT_MODEL, PR_TRUE);
PRIntervalTime flushStart = 0;
PRUint32 opQueueLength = mOpQueue.Length();
if (opQueueLength > NS_HTML5_TREE_BUILDER_MIN_QUEUE_LENGTH) { // avoid computing averages with too few ops
flushStart = PR_IntervalNow();
}
mElementsSeenInThisAppendBatch.SetCapacity(opQueueLength * 2);
// XXX alloc failure
const nsHtml5TreeOperation* start = mOpQueue.Elements();
const nsHtml5TreeOperation* end = start + opQueueLength;
for (nsHtml5TreeOperation* iter = (nsHtml5TreeOperation*)start; iter < end; ++iter) {
iter->Perform(this);
}
FlushPendingAppendNotifications();
#ifdef DEBUG_hsivonen
if (mOpQueue.Length() > sInsertionBatchMaxLength) {
sInsertionBatchMaxLength = opQueueLength;
}
#endif
mOpQueue.Clear();
if (flushStart) {
PRUint32 delta = PR_IntervalToMilliseconds(PR_IntervalNow() - flushStart);
sTreeOpQueueMaxLength = delta ?
(PRUint32)((NS_HTML5_TREE_BUILDER_MAX_QUEUE_TIME * (PRUint64)opQueueLength) / delta) :
0;
if (sTreeOpQueueMaxLength < NS_HTML5_TREE_BUILDER_MIN_QUEUE_LENGTH) {
sTreeOpQueueMaxLength = NS_HTML5_TREE_BUILDER_MIN_QUEUE_LENGTH;
}
#ifdef DEBUG_hsivonen
printf("QUEUE MAX LENGTH: %d\n", sTreeOpQueueMaxLength);
#endif
}
mFlushTimer->InitWithFuncCallback(TimerCallbackFunc, static_cast<void*> (this), NS_HTML5_TREE_BUILDER_MAX_TIME_WITHOUT_FLUSH, nsITimer::TYPE_ONE_SHOT);
}
void
nsHtml5TreeBuilder::DoUnlink()
{
nsHtml5TreeBuilder* tmp = this;
if (mFlushTimer) {
mFlushTimer->Cancel();
}
NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mFlushTimer);
NS_IF_RELEASE(contextNode);
NS_IF_RELEASE(formPointer);
NS_IF_RELEASE(headPointer);
@ -528,7 +463,6 @@ void
nsHtml5TreeBuilder::DoTraverse(nsCycleCollectionTraversalCallback &cb)
{
nsHtml5TreeBuilder* tmp = this;
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mFlushTimer);
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_RAWPTR(contextNode);
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_RAWPTR(formPointer);
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_RAWPTR(headPointer);
@ -560,10 +494,11 @@ nsHtml5TreeBuilder::DoTraverse(nsCycleCollectionTraversalCallback &cb)
}
}
#ifdef DEBUG_hsivonen
PRUint32 nsHtml5TreeBuilder::sInsertionBatchMaxLength = 0;
PRUint32 nsHtml5TreeBuilder::sAppendBatchMaxSize = 0;
PRUint32 nsHtml5TreeBuilder::sAppendBatchSlotsExamined = 0;
PRUint32 nsHtml5TreeBuilder::sAppendBatchExaminations = 0;
#endif
PRUint32 nsHtml5TreeBuilder::sTreeOpQueueMaxLength = NS_HTML5_TREE_BUILDER_DEFAULT_QUEUE_LENGTH;
// DocumentModeHandler
void
nsHtml5TreeBuilder::documentMode(nsHtml5DocumentMode m)
{
nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
// XXX if null, OOM!
treeOp->Init(m);
}

View File

@ -37,133 +37,36 @@
private:
#ifdef DEBUG_hsivonen
static PRUint32 sInsertionBatchMaxLength;
static PRUint32 sAppendBatchMaxSize;
static PRUint32 sAppendBatchSlotsExamined;
static PRUint32 sAppendBatchExaminations;
#endif
static PRUint32 sTreeOpQueueMaxLength;
PRBool mNeedsFlush;
nsCOMPtr<nsITimer> mFlushTimer;
PRBool mHasProcessedBase;
#ifdef DEBUG
PRBool mActive;
#endif
nsTArray<nsHtml5TreeOperation> mOpQueue;
nsTArray<nsIContentPtr> mElementsSeenInThisAppendBatch;
nsTArray<nsHtml5PendingNotification> mPendingNotifications;
inline void MaybeSuspend() {
if (!mNeedsFlush) {
mNeedsFlush = !!(mOpQueue.Length() >= sTreeOpQueueMaxLength);
}
if (parser->DidProcessATokenImpl() == NS_ERROR_HTMLPARSER_INTERRUPTED || mNeedsFlush) {
// We've been in the parser for too long and/or the op queue is becoming too
// long to flush in one go it it grows further.
parser->Suspend();
requestSuspension();
}
}
nsHtml5TreeOpExecutor* mExecutor;
#ifdef DEBUG
PRBool mActive;
#endif
public:
nsHtml5TreeBuilder(nsHtml5Parser* aParser);
nsHtml5TreeBuilder(nsHtml5TreeOpExecutor* aExec);
~nsHtml5TreeBuilder();
void Flush();
inline void MaybeFlush() {
if (mNeedsFlush) {
Flush();
}
}
inline void DeferredTimerFlush() {
if (!mOpQueue.IsEmpty()) {
mNeedsFlush = PR_TRUE;
}
}
inline void PostPendingAppendNotification(nsIContent* aParent, nsIContent* aChild) {
PRBool newParent = PR_TRUE;
const nsIContentPtr* first = mElementsSeenInThisAppendBatch.Elements();
const nsIContentPtr* last = first + mElementsSeenInThisAppendBatch.Length() - 1;
for (const nsIContentPtr* iter = last; iter >= first; --iter) {
#ifdef DEBUG_hsivonen
sAppendBatchSlotsExamined++;
#endif
if (*iter == aParent) {
newParent = PR_FALSE;
break;
}
}
if (aChild->IsNodeOfType(nsINode::eELEMENT)) {
mElementsSeenInThisAppendBatch.AppendElement(aChild);
}
mElementsSeenInThisAppendBatch.AppendElement(aParent);
if (newParent) {
mPendingNotifications.AppendElement(aParent);
}
#ifdef DEBUG_hsivonen
sAppendBatchExaminations++;
#endif
}
inline void FlushPendingAppendNotifications() {
const nsHtml5PendingNotification* start = mPendingNotifications.Elements();
const nsHtml5PendingNotification* end = start + mPendingNotifications.Length();
for (nsHtml5PendingNotification* iter = (nsHtml5PendingNotification*)start; iter < end; ++iter) {
iter->Fire();
}
mPendingNotifications.Clear();
#ifdef DEBUG_hsivonen
if (mElementsSeenInThisAppendBatch.Length() > sAppendBatchMaxSize) {
sAppendBatchMaxSize = mElementsSeenInThisAppendBatch.Length();
}
#endif
mElementsSeenInThisAppendBatch.Clear();
}
inline nsIDocument* GetDocument() {
return parser->GetDocument();
}
inline void SetScriptElement(nsIContent* aScript) {
parser->SetScriptElement(aScript);
}
inline void UpdateStyleSheet(nsIContent* aSheet) {
parser->UpdateStyleSheet(aSheet);
}
inline nsresult ProcessBase(nsIContent* aBase) {
if (!mHasProcessedBase) {
nsresult rv = parser->ProcessBASETag(aBase);
NS_ENSURE_SUCCESS(rv, rv);
mHasProcessedBase = PR_TRUE;
}
return NS_OK;
}
inline nsresult ProcessMeta(nsIContent* aMeta) {
return parser->ProcessMETATag(aMeta);
}
inline nsresult ProcessOfflineManifest(nsIContent* aHtml) {
parser->ProcessOfflineManifest(aHtml);
return NS_OK;
}
inline void StartLayout() {
nsIDocument* doc = GetDocument();
if (doc) {
FlushPendingAppendNotifications();
parser->StartLayout(PR_FALSE);
}
}
void DoUnlink();
void DoTraverse(nsCycleCollectionTraversalCallback &cb);
// DocumentModeHandler
/**
* Tree builder uses this to report quirkiness of the document
*/
void documentMode(nsHtml5DocumentMode m);
inline PRUint32 GetOpQueueLength() {
return mOpQueue.Length();
}
inline void SwapQueue(nsTArray<nsHtml5TreeOperation>& aOtherQueue) {
mOpQueue.SwapElements(aOtherQueue);
}
inline void ReqSuspension() {
requestSuspension();
}

View File

@ -0,0 +1,546 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set sw=2 ts=2 et tw=79: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Pierre Phaneuf <pp@ludusdesign.com>
* Henri Sivonen <hsivonen@iki.fi>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsHtml5TreeOpExecutor.h"
#include "nsScriptLoader.h"
#include "nsIMarkupDocumentViewer.h"
#include "nsIContentViewer.h"
#include "nsIDocShellTreeItem.h"
#include "nsIStyleSheetLinkingElement.h"
#include "nsIDocShell.h"
#include "nsIScriptGlobalObject.h"
#include "nsIScriptGlobalObjectOwner.h"
#include "nsIScriptSecurityManager.h"
#include "nsIWebShellServices.h"
#include "nsContentUtils.h"
#include "mozAutoDocUpdate.h"
#include "nsNetUtil.h"
#include "nsHtml5Parser.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"
#include "nsHtml5StreamParser.h"
#define NS_HTML5_TREE_OP_EXECUTOR_MAX_QUEUE_TIME 3000UL // milliseconds
#define NS_HTML5_TREE_OP_EXECUTOR_DEFAULT_QUEUE_LENGTH 200
#define NS_HTML5_TREE_OP_EXECUTOR_MIN_QUEUE_LENGTH 100
#define NS_HTML5_TREE_OP_EXECUTOR_MAX_TIME_WITHOUT_FLUSH 5000 // milliseconds
NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5TreeOpExecutor)
NS_INTERFACE_TABLE_HEAD_CYCLE_COLLECTION_INHERITED(nsHtml5TreeOpExecutor)
NS_INTERFACE_TABLE_INHERITED1(nsHtml5TreeOpExecutor,
nsIContentSink)
NS_INTERFACE_TABLE_TAIL_INHERITING(nsContentSink)
NS_IMPL_ADDREF_INHERITED(nsHtml5TreeOpExecutor, nsContentSink)
NS_IMPL_RELEASE_INHERITED(nsHtml5TreeOpExecutor, nsContentSink)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(nsHtml5TreeOpExecutor, nsContentSink)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mFlushTimer);
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mScriptElement)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(nsHtml5TreeOpExecutor, nsContentSink)
if (tmp->mFlushTimer) {
tmp->mFlushTimer->Cancel();
}
NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mFlushTimer);
NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mScriptElement)
NS_IMPL_CYCLE_COLLECTION_UNLINK_END
nsHtml5TreeOpExecutor::nsHtml5TreeOpExecutor()
: mSuppressEOF(PR_FALSE)
, mHasProcessedBase(PR_FALSE)
, mNeedsFlush(PR_FALSE)
, mFlushTimer(do_CreateInstance("@mozilla.org/timer;1"))
, mNeedsCharsetSwitch(PR_FALSE)
{
}
nsHtml5TreeOpExecutor::~nsHtml5TreeOpExecutor()
{
NS_ASSERTION(mOpQueue.IsEmpty(), "Somehow there's stuff in the op queue.");
if (mFlushTimer) {
mFlushTimer->Cancel(); // XXX why is this even necessary? it is, though.
}
}
static void
TimerCallbackFunc(nsITimer* aTimer, void* aClosure)
{
(static_cast<nsHtml5TreeOpExecutor*> (aClosure))->DeferredTimerFlush();
}
// nsIContentSink
NS_IMETHODIMP
nsHtml5TreeOpExecutor::WillParse()
{
NS_NOTREACHED("No one should call this");
return NS_ERROR_NOT_IMPLEMENTED;
}
// This is called when the tree construction has ended
NS_IMETHODIMP
nsHtml5TreeOpExecutor::DidBuildModel()
{
NS_ASSERTION(mLifeCycle == STREAM_ENDING, "Bad life cycle.");
mLifeCycle = TERMINATED;
if (!mSuppressEOF) {
GetTokenizer()->eof();
Flush();
}
GetTokenizer()->end();
// This is comes from nsXMLContentSink
DidBuildModelImpl();
mDocument->ScriptLoader()->RemoveObserver(this);
nsContentSink::StartLayout(PR_FALSE);
ScrollToRef();
mDocument->RemoveObserver(this);
mDocument->EndLoad();
static_cast<nsHtml5Parser*> (mParser.get())->DropStreamParser();
static_cast<nsHtml5Parser*> (mParser.get())->CancelParsingEvents();
DropParserAndPerfHint();
#ifdef GATHER_DOCWRITE_STATISTICS
printf("UNSAFE SCRIPTS: %d\n", sUnsafeDocWrites);
printf("TOKENIZER-SAFE SCRIPTS: %d\n", sTokenSafeDocWrites);
printf("TREEBUILDER-SAFE SCRIPTS: %d\n", sTreeSafeDocWrites);
#endif
#ifdef DEBUG_hsivonen
printf("MAX INSERTION BATCH LEN: %d\n", sInsertionBatchMaxLength);
printf("MAX NOTIFICATION BATCH LEN: %d\n", sAppendBatchMaxSize);
if (sAppendBatchExaminations != 0) {
printf("AVERAGE SLOTS EXAMINED: %d\n", sAppendBatchSlotsExamined / sAppendBatchExaminations);
}
#endif
return NS_OK;
}
NS_IMETHODIMP
nsHtml5TreeOpExecutor::WillInterrupt()
{
return WillInterruptImpl();
}
NS_IMETHODIMP
nsHtml5TreeOpExecutor::WillResume()
{
WillResumeImpl();
return WillParseImpl();
}
NS_IMETHODIMP
nsHtml5TreeOpExecutor::SetParser(nsIParser* aParser)
{
mParser = aParser;
return NS_OK;
}
void
nsHtml5TreeOpExecutor::FlushPendingNotifications(mozFlushType aType)
{
}
NS_IMETHODIMP
nsHtml5TreeOpExecutor::SetDocumentCharset(nsACString& aCharset)
{
if (mDocShell) {
// the following logic to get muCV is copied from
// nsHTMLDocument::StartDocumentLoad
// We need to call muCV->SetPrevDocCharacterSet here in case
// the charset is detected by parser DetectMetaTag
nsCOMPtr<nsIMarkupDocumentViewer> muCV;
nsCOMPtr<nsIContentViewer> cv;
mDocShell->GetContentViewer(getter_AddRefs(cv));
if (cv) {
muCV = do_QueryInterface(cv);
} else {
// in this block of code, if we get an error result, we return
// it but if we get a null pointer, that's perfectly legal for
// parent and parentContentViewer
nsCOMPtr<nsIDocShellTreeItem> docShellAsItem =
do_QueryInterface(mDocShell);
NS_ENSURE_TRUE(docShellAsItem, NS_ERROR_FAILURE);
nsCOMPtr<nsIDocShellTreeItem> parentAsItem;
docShellAsItem->GetSameTypeParent(getter_AddRefs(parentAsItem));
nsCOMPtr<nsIDocShell> parent(do_QueryInterface(parentAsItem));
if (parent) {
nsCOMPtr<nsIContentViewer> parentContentViewer;
nsresult rv =
parent->GetContentViewer(getter_AddRefs(parentContentViewer));
if (NS_SUCCEEDED(rv) && parentContentViewer) {
muCV = do_QueryInterface(parentContentViewer);
}
}
}
if (muCV) {
muCV->SetPrevDocCharacterSet(aCharset);
}
}
if (mDocument) {
mDocument->SetDocumentCharacterSet(aCharset);
}
return NS_OK;
}
nsISupports*
nsHtml5TreeOpExecutor::GetTarget()
{
return mDocument;
}
// nsContentSink overrides
void
nsHtml5TreeOpExecutor::UpdateChildCounts()
{
// No-op
}
nsresult
nsHtml5TreeOpExecutor::FlushTags()
{
return NS_OK;
}
void
nsHtml5TreeOpExecutor::PostEvaluateScript(nsIScriptElement *aElement)
{
nsCOMPtr<nsIHTMLDocument> htmlDocument = do_QueryInterface(mDocument);
NS_ASSERTION(htmlDocument, "Document didn't QI into HTML document.");
htmlDocument->ScriptExecuted(aElement);
}
void
nsHtml5TreeOpExecutor::UpdateStyleSheet(nsIContent* aElement)
{
nsCOMPtr<nsIStyleSheetLinkingElement> ssle(do_QueryInterface(aElement));
if (ssle) {
ssle->SetEnableUpdates(PR_TRUE);
PRBool willNotify;
PRBool isAlternate;
nsresult rv = ssle->UpdateStyleSheet(this, &willNotify, &isAlternate);
if (NS_SUCCEEDED(rv) && willNotify && !isAlternate) {
++mPendingSheetCount;
mScriptLoader->AddExecuteBlocker();
}
}
}
void
nsHtml5TreeOpExecutor::Flush()
{
mNeedsFlush = PR_FALSE;
FillQueue();
MOZ_AUTO_DOC_UPDATE(GetDocument(), UPDATE_CONTENT_MODEL, PR_TRUE);
PRIntervalTime flushStart = 0;
PRUint32 opQueueLength = mOpQueue.Length();
if (opQueueLength > NS_HTML5_TREE_OP_EXECUTOR_MIN_QUEUE_LENGTH) { // avoid computing averages with too few ops
flushStart = PR_IntervalNow();
}
mElementsSeenInThisAppendBatch.SetCapacity(opQueueLength * 2);
// XXX alloc failure
const nsHtml5TreeOperation* start = mOpQueue.Elements();
const nsHtml5TreeOperation* end = start + opQueueLength;
for (nsHtml5TreeOperation* iter = (nsHtml5TreeOperation*)start; iter < end; ++iter) {
iter->Perform(this);
}
FlushPendingAppendNotifications();
#ifdef DEBUG_hsivonen
if (mOpQueue.Length() > sInsertionBatchMaxLength) {
sInsertionBatchMaxLength = opQueueLength;
}
#endif
mOpQueue.Clear();
if (flushStart) {
PRUint32 delta = PR_IntervalToMilliseconds(PR_IntervalNow() - flushStart);
sTreeOpQueueMaxLength = delta ?
(PRUint32)((NS_HTML5_TREE_OP_EXECUTOR_MAX_QUEUE_TIME * (PRUint64)opQueueLength) / delta) :
0;
if (sTreeOpQueueMaxLength < NS_HTML5_TREE_OP_EXECUTOR_MIN_QUEUE_LENGTH) {
sTreeOpQueueMaxLength = NS_HTML5_TREE_OP_EXECUTOR_MIN_QUEUE_LENGTH;
}
#ifdef DEBUG_hsivonen
printf("QUEUE MAX LENGTH: %d\n", sTreeOpQueueMaxLength);
#endif
}
mFlushTimer->InitWithFuncCallback(TimerCallbackFunc, static_cast<void*> (this), NS_HTML5_TREE_OP_EXECUTOR_MAX_TIME_WITHOUT_FLUSH, nsITimer::TYPE_ONE_SHOT);
}
nsresult
nsHtml5TreeOpExecutor::ProcessBASETag(nsIContent* aContent)
{
NS_ASSERTION(aContent, "missing base-element");
if (mHasProcessedBase) {
return NS_OK;
}
mHasProcessedBase = PR_TRUE;
nsresult rv = NS_OK;
if (mDocument) {
nsAutoString value;
if (aContent->GetAttr(kNameSpaceID_None, nsHtml5Atoms::target, value)) {
mDocument->SetBaseTarget(value);
}
if (aContent->GetAttr(kNameSpaceID_None, nsHtml5Atoms::href, value)) {
nsCOMPtr<nsIURI> baseURI;
rv = NS_NewURI(getter_AddRefs(baseURI), value);
if (NS_SUCCEEDED(rv)) {
rv = mDocument->SetBaseURI(baseURI); // The document checks if it is legal to set this base
if (NS_SUCCEEDED(rv)) {
mDocumentBaseURI = mDocument->GetBaseURI();
}
}
}
}
return rv;
}
// copied from HTML content sink
PRBool
nsHtml5TreeOpExecutor::IsScriptEnabled()
{
NS_ENSURE_TRUE(mDocument && mDocShell, PR_TRUE);
nsCOMPtr<nsIScriptGlobalObject> globalObject = mDocument->GetScriptGlobalObject();
// Getting context is tricky if the document hasn't had its
// GlobalObject set yet
if (!globalObject) {
nsCOMPtr<nsIScriptGlobalObjectOwner> owner = do_GetInterface(mDocShell);
NS_ENSURE_TRUE(owner, PR_TRUE);
globalObject = owner->GetScriptGlobalObject();
NS_ENSURE_TRUE(globalObject, PR_TRUE);
}
nsIScriptContext *scriptContext = globalObject->GetContext();
NS_ENSURE_TRUE(scriptContext, PR_TRUE);
JSContext* cx = (JSContext *) scriptContext->GetNativeContext();
NS_ENSURE_TRUE(cx, PR_TRUE);
PRBool enabled = PR_TRUE;
nsContentUtils::GetSecurityManager()->
CanExecuteScripts(cx, mDocument->NodePrincipal(), &enabled);
return enabled;
}
void
nsHtml5TreeOpExecutor::DocumentMode(nsHtml5DocumentMode m)
{
nsCompatibility mode = eCompatibility_NavQuirks;
switch (m) {
case STANDARDS_MODE:
mode = eCompatibility_FullStandards;
break;
case ALMOST_STANDARDS_MODE:
mode = eCompatibility_AlmostStandards;
break;
case QUIRKS_MODE:
mode = eCompatibility_NavQuirks;
break;
}
nsCOMPtr<nsIHTMLDocument> htmlDocument = do_QueryInterface(mDocument);
NS_ASSERTION(htmlDocument, "Document didn't QI into HTML document.");
htmlDocument->SetCompatibilityMode(mode);
}
nsresult
nsHtml5TreeOpExecutor::MaybePerformCharsetSwitch()
{
if (!mNeedsCharsetSwitch) {
return NS_ERROR_HTMLPARSER_CONTINUE;
}
// this code comes from nsObserverBase.cpp
nsresult rv = NS_OK;
nsCOMPtr<nsIWebShellServices> wss = do_QueryInterface(mDocShell);
if (!wss) {
return NS_ERROR_HTMLPARSER_CONTINUE;
}
#ifndef DONT_INFORM_WEBSHELL
// ask the webshellservice to load the URL
if (NS_FAILED(rv = wss->SetRendering(PR_FALSE))) {
// do nothing and fall thru
} else if (NS_FAILED(rv = wss->StopDocumentLoad())) {
rv = wss->SetRendering(PR_TRUE); // turn on the rendering so at least we will see something.
} else if (NS_FAILED(rv = wss->ReloadDocument(mPendingCharset.get(), kCharsetFromMetaTag))) {
rv = wss->SetRendering(PR_TRUE); // turn on the rendering so at least we will see something.
} else {
rv = NS_ERROR_HTMLPARSER_STOPPARSING; // We're reloading a new document...stop loading the current.
}
#endif
// if our reload request is not accepted, we should tell parser to go on
if (rv != NS_ERROR_HTMLPARSER_STOPPARSING)
mNeedsCharsetSwitch = PR_FALSE;
rv = NS_ERROR_HTMLPARSER_CONTINUE;
return rv;
}
/**
* This method executes a script element set by nsHtml5TreeBuilder. The reason
* why this code is here and not in the tree builder is to allow the control
* to return from the tokenizer before scripts run. This way, the tokenizer
* is not invoked re-entrantly although the parser is.
*/
void
nsHtml5TreeOpExecutor::ExecuteScript()
{
NS_PRECONDITION(mScriptElement, "Trying to run a script without having one!");
Flush();
#ifdef GATHER_DOCWRITE_STATISTICS
if (!mSnapshot) {
mSnapshot = mTreeBuilder->newSnapshot();
}
#endif
nsCOMPtr<nsIScriptElement> sele = do_QueryInterface(mScriptElement);
// Notify our document that we're loading this script.
nsCOMPtr<nsIHTMLDocument> htmlDocument = do_QueryInterface(mDocument);
NS_ASSERTION(htmlDocument, "Document didn't QI into HTML document.");
htmlDocument->ScriptLoading(sele);
// Copied from nsXMLContentSink
// Now tell the script that it's ready to go. This may execute the script
// or return NS_ERROR_HTMLPARSER_BLOCK. Or neither if the script doesn't
// need executing.
nsresult rv = mScriptElement->DoneAddingChildren(PR_TRUE);
// If the act of insertion evaluated the script, we're fine.
// Else, block the parser till the script has loaded.
if (rv == NS_ERROR_HTMLPARSER_BLOCK) {
mScriptElements.AppendObject(sele);
mParser->BlockParser();
} else {
// This may have already happened if the script executed, but in case
// it didn't then remove the element so that it doesn't get stuck forever.
htmlDocument->ScriptExecuted(sele);
}
mScriptElement = nsnull;
}
nsresult
nsHtml5TreeOpExecutor::Init(nsIDocument* aDoc,
nsIURI* aURI,
nsISupports* aContainer,
nsIChannel* aChannel)
{
nsresult rv = nsContentSink::Init(aDoc, aURI, aContainer, aChannel);
NS_ENSURE_SUCCESS(rv, rv);
aDoc->AddObserver(this);
return rv;
}
void
nsHtml5TreeOpExecutor::Start()
{
mNeedsFlush = PR_FALSE;
mNeedsCharsetSwitch = PR_FALSE;
mPendingCharset.Truncate();
mScriptElement = nsnull;
}
void
nsHtml5TreeOpExecutor::End()
{
mFlushTimer->Cancel();
}
void
nsHtml5TreeOpExecutor::NeedsCharsetSwitchTo(const nsACString& aEncoding)
{
mNeedsCharsetSwitch = PR_TRUE;
mPendingCharset.Assign(aEncoding);
}
nsHtml5Tokenizer*
nsHtml5TreeOpExecutor::GetTokenizer()
{
return (static_cast<nsHtml5Parser*> (mParser.get()))->GetTokenizer();
}
void
nsHtml5TreeOpExecutor::MaybeSuspend() {
if (!mNeedsFlush) {
mNeedsFlush = !!(mTreeBuilder->GetOpQueueLength() >= sTreeOpQueueMaxLength);
}
if (DidProcessATokenImpl() == NS_ERROR_HTMLPARSER_INTERRUPTED || mNeedsFlush) {
// We've been in the parser for too long and/or the op queue is becoming too
// long to flush in one go it it grows further.
static_cast<nsHtml5Parser*>(mParser.get())->Suspend();
mTreeBuilder->ReqSuspension();
}
}
void
nsHtml5TreeOpExecutor::MaybeExecuteScript() {
if (mScriptElement) {
// mUninterruptibleDocWrite = PR_FALSE;
ExecuteScript();
if (mStreamParser) {
mStreamParser->Suspend();
}
}
}
void
nsHtml5TreeOpExecutor::DeferredTimerFlush() {
if (mTreeBuilder->GetOpQueueLength() > 0) {
mNeedsFlush = PR_TRUE;
}
}
void
nsHtml5TreeOpExecutor::FillQueue() {
mTreeBuilder->SwapQueue(mOpQueue);
}
void
nsHtml5TreeOpExecutor::Reset() {
mSuppressEOF = PR_FALSE;
mHasProcessedBase = PR_FALSE;
mNeedsFlush = PR_FALSE;
mOpQueue.Clear();
mPendingCharset.Truncate();
mNeedsCharsetSwitch = PR_FALSE;
mLifeCycle = NOT_STARTED;
mScriptElement = nsnull;
}
PRUint32 nsHtml5TreeOpExecutor::sTreeOpQueueMaxLength = NS_HTML5_TREE_OP_EXECUTOR_DEFAULT_QUEUE_LENGTH;
#ifdef DEBUG_hsivonen
PRUint32 nsHtml5TreeOpExecutor::sInsertionBatchMaxLength = 0;
PRUint32 nsHtml5TreeOpExecutor::sAppendBatchMaxSize = 0;
PRUint32 nsHtml5TreeOpExecutor::sAppendBatchSlotsExamined = 0;
PRUint32 nsHtml5TreeOpExecutor::sAppendBatchExaminations = 0;
#endif

View File

@ -0,0 +1,388 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is HTML Parser Gecko integration code.
*
* The Initial Developer of the Original Code is
* Mozilla Foundation.
* Portions created by the Initial Developer are Copyright (C) 2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Henri Sivonen <hsivonen@iki.fi>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsHtml5TreeOpExecutor_h__
#define nsHtml5TreeOpExecutor_h__
#include "prtypes.h"
#include "nsIAtom.h"
#include "nsINameSpaceManager.h"
#include "nsIContent.h"
#include "nsIDocument.h"
#include "nsTraceRefcnt.h"
#include "nsHtml5TreeOperation.h"
#include "nsHtml5PendingNotification.h"
#include "nsTArray.h"
#include "nsContentSink.h"
#include "nsNodeInfoManager.h"
#include "nsHtml5DocumentMode.h"
#include "nsITimer.h"
#include "nsIScriptElement.h"
#include "nsIParser.h"
class nsHtml5TreeBuilder;
class nsHtml5Tokenizer;
class nsHtml5StreamParser;
enum eHtml5ParserLifecycle {
/**
* The parser has told the tokenizer to start yet.
*/
NOT_STARTED = 0,
/**
* The parser has started the tokenizer and the stream hasn't ended yet.
*/
PARSING = 1,
/**
* The parser hasn't told the tokenizer to emit EOF yet, but the network
* stream has been exhausted or document.close() called.
*/
STREAM_ENDING = 2,
/**
* The parser has told the tokenizer to emit EOF.
*/
TERMINATED = 3
};
typedef nsIContent* nsIContentPtr;
class nsHtml5TreeOpExecutor : public nsIContentSink,
public nsContentSink
{
public:
NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW
NS_DECL_ISUPPORTS_INHERITED
NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(nsHtml5TreeOpExecutor, nsContentSink)
private:
#ifdef DEBUG_hsivonen
static PRUint32 sInsertionBatchMaxLength;
static PRUint32 sAppendBatchMaxSize;
static PRUint32 sAppendBatchSlotsExamined;
static PRUint32 sAppendBatchExaminations;
#endif
static PRUint32 sTreeOpQueueMaxLength;
/**
* Whether EOF needs to be suppressed
*/
PRBool mSuppressEOF;
PRBool mHasProcessedBase;
PRBool mNeedsFlush;
nsCOMPtr<nsITimer> mFlushTimer;
nsTArray<nsHtml5TreeOperation> mOpQueue;
nsTArray<nsIContentPtr> mElementsSeenInThisAppendBatch;
nsTArray<nsHtml5PendingNotification> mPendingNotifications;
nsHtml5StreamParser* mStreamParser;
/**
* The character encoding to which to switch in a late <meta> renavigation
*/
nsCString mPendingCharset;
/**
* Call to PerformCharsetSwitch() needed
*/
PRBool mNeedsCharsetSwitch;
/**
* The current point on parser life cycle
*/
eHtml5ParserLifecycle mLifeCycle;
/**
* Script to run ASAP
*/
nsCOMPtr<nsIContent> mScriptElement;
nsHtml5TreeBuilder* mTreeBuilder;
public:
nsHtml5TreeOpExecutor();
virtual ~nsHtml5TreeOpExecutor();
// nsIContentSink
virtual PRBool ReadyToCallDidBuildModel(PRBool aTerminated)
{
return ReadyToCallDidBuildModelImpl(aTerminated);
};
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD WillParse();
/**
*
*/
NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) {
NS_ASSERTION(GetDocument()->GetScriptGlobalObject(),
"Script global object not ready");
WillBuildModelImpl();
GetDocument()->BeginLoad();
return NS_OK;
}
/**
* Emits EOF.
*/
NS_IMETHOD DidBuildModel();
/**
* Forwards to nsContentSink
*/
NS_IMETHOD WillInterrupt();
/**
* Unimplemented. For interface compat only.
*/
NS_IMETHOD WillResume();
/**
* Sets the parser.
*/
NS_IMETHOD SetParser(nsIParser* aParser);
/**
* No-op for backwards compat.
*/
virtual void FlushPendingNotifications(mozFlushType aType);
/**
* Sets mCharset
*/
NS_IMETHOD SetDocumentCharset(nsACString& aCharset);
/**
* Returns the document.
*/
virtual nsISupports *GetTarget();
// nsContentSink methods
virtual nsresult ProcessBASETag(nsIContent* aContent);
virtual void UpdateChildCounts();
virtual nsresult FlushTags();
virtual void PostEvaluateScript(nsIScriptElement *aElement);
/**
* Sets up style sheet load / parse
*/
void UpdateStyleSheet(nsIContent* aElement);
// Getters and setters for fields from nsContentSink
nsIDocument* GetDocument() {
return mDocument;
}
nsNodeInfoManager* GetNodeInfoManager() {
return mNodeInfoManager;
}
nsIDocShell* GetDocShell() {
return mDocShell;
}
PRBool IsScriptExecuting() {
return IsScriptExecutingImpl();
}
void AllowInterrupts() {
mCanInterruptParser = PR_TRUE;
}
void ProhibitInterrupts() {
mCanInterruptParser = PR_FALSE;
}
void SetBaseUriFromDocument() {
mDocumentBaseURI = mDocument->GetBaseURI();
mHasProcessedBase = PR_TRUE;
}
void SetNodeInfoManager(nsNodeInfoManager* aManager) {
mNodeInfoManager = aManager;
}
void SetStreamParser(nsHtml5StreamParser* aStreamParser) {
mStreamParser = aStreamParser;
}
/**
* Renavigates to the document with a different charset
*/
nsresult MaybePerformCharsetSwitch();
/**
* Runs mScriptElement
*/
void ExecuteScript();
PRBool IsScriptEnabled();
void PostPendingAppendNotification(nsIContent* aParent, nsIContent* aChild) {
PRBool newParent = PR_TRUE;
const nsIContentPtr* first = mElementsSeenInThisAppendBatch.Elements();
const nsIContentPtr* last = first + mElementsSeenInThisAppendBatch.Length() - 1;
for (const nsIContentPtr* iter = last; iter >= first; --iter) {
#ifdef DEBUG_hsivonen
sAppendBatchSlotsExamined++;
#endif
if (*iter == aParent) {
newParent = PR_FALSE;
break;
}
}
if (aChild->IsNodeOfType(nsINode::eELEMENT)) {
mElementsSeenInThisAppendBatch.AppendElement(aChild);
}
mElementsSeenInThisAppendBatch.AppendElement(aParent);
if (newParent) {
mPendingNotifications.AppendElement(aParent);
}
#ifdef DEBUG_hsivonen
sAppendBatchExaminations++;
#endif
}
void FlushPendingAppendNotifications() {
const nsHtml5PendingNotification* start = mPendingNotifications.Elements();
const nsHtml5PendingNotification* end = start + mPendingNotifications.Length();
for (nsHtml5PendingNotification* iter = (nsHtml5PendingNotification*)start; iter < end; ++iter) {
iter->Fire();
}
mPendingNotifications.Clear();
#ifdef DEBUG_hsivonen
if (mElementsSeenInThisAppendBatch.Length() > sAppendBatchMaxSize) {
sAppendBatchMaxSize = mElementsSeenInThisAppendBatch.Length();
}
#endif
mElementsSeenInThisAppendBatch.Clear();
}
void StartLayout() {
nsIDocument* doc = GetDocument();
if (doc) {
FlushPendingAppendNotifications();
nsContentSink::StartLayout(PR_FALSE);
}
}
void DocumentMode(nsHtml5DocumentMode m);
nsresult Init(nsIDocument* aDoc, nsIURI* aURI,
nsISupports* aContainer, nsIChannel* aChannel);
void Flush();
void MaybeSuspend();
void MaybeFlush() {
if (mNeedsFlush) {
Flush();
}
}
void DeferredTimerFlush();
void Start();
void End();
void NeedsCharsetSwitchTo(const nsACString& aEncoding);
void IgnoreCharsetSwitch() {
mNeedsCharsetSwitch = PR_FALSE;
}
#ifdef DEBUG
PRBool NeedsCharsetSwitch() {
return mNeedsCharsetSwitch;
}
PRBool HasScriptElement() {
return !!mScriptElement;
}
#endif
PRBool IsComplete() {
return (mLifeCycle == TERMINATED);
}
eHtml5ParserLifecycle GetLifeCycle() {
return mLifeCycle;
}
void SetLifeCycle(eHtml5ParserLifecycle aLifeCycle) {
mLifeCycle = aLifeCycle;
}
void MaybeExecuteScript();
void MaybePreventExecution() {
if (mScriptElement) {
nsCOMPtr<nsIScriptElement> script = do_QueryInterface(mScriptElement);
NS_ASSERTION(script, "mScriptElement didn't QI to nsIScriptElement!");
script->PreventExecution();
mScriptElement = nsnull;
}
}
/**
* Request execution of the script element when the tokenizer returns
*/
void SetScriptElement(nsIContent* aScript) {
mScriptElement = aScript;
}
void SetTreeBuilder(nsHtml5TreeBuilder* aBuilder) {
mTreeBuilder = aBuilder;
}
void Reset();
private:
nsHtml5Tokenizer* GetTokenizer();
void FillQueue();
};
#endif // nsHtml5TreeOpExecutor_h__

View File

@ -47,6 +47,7 @@
#include "mozAutoDocUpdate.h"
#include "nsBindingManager.h"
#include "nsXBLBinding.h"
#include "nsHtml5DocumentMode.h"
nsHtml5TreeOperation::nsHtml5TreeOperation()
: mOpCode(eTreeOpAppend)
@ -60,7 +61,7 @@ nsHtml5TreeOperation::~nsHtml5TreeOperation()
}
nsresult
nsHtml5TreeOperation::Perform(nsHtml5TreeBuilder* aBuilder)
nsHtml5TreeOperation::Perform(nsHtml5TreeOpExecutor* aBuilder)
{
nsresult rv = NS_OK;
switch(mOpCode) {
@ -181,21 +182,25 @@ nsHtml5TreeOperation::Perform(nsHtml5TreeBuilder* aBuilder)
return rv;
}
case eTreeOpProcessBase: {
rv = aBuilder->ProcessBase(mNode);
rv = aBuilder->ProcessBASETag(mNode);
return rv;
}
case eTreeOpProcessMeta: {
rv = aBuilder->ProcessMeta(mNode);
rv = aBuilder->ProcessMETATag(mNode);
return rv;
}
case eTreeOpProcessOfflineManifest: {
rv = aBuilder->ProcessOfflineManifest(mNode);
aBuilder->ProcessOfflineManifest(mNode);
return rv;
}
case eTreeOpStartLayout: {
aBuilder->StartLayout(); // this causes a flush anyway
return rv;
}
case eTreeOpDocumentMode: {
aBuilder->DocumentMode(mMode);
return rv;
}
default: {
NS_NOTREACHED("Bogus tree op");
}

View File

@ -39,8 +39,9 @@
#define nsHtml5TreeOperation_h__
#include "nsIContent.h"
#include "nsHtml5DocumentMode.h"
class nsHtml5TreeBuilder;
class nsHtml5TreeOpExecutor;
enum eHtml5TreeOperation {
// main HTML5 ops
@ -50,6 +51,7 @@ enum eHtml5TreeOperation {
eTreeOpFosterParent,
eTreeOpAppendToDocument,
eTreeOpAddAttributes,
eTreeOpDocumentMode,
// Gecko-specific on-pop ops
eTreeOpDoneAddingChildren,
eTreeOpDoneCreatingElement,
@ -94,6 +96,12 @@ class nsHtml5TreeOperation {
mParent = aParent;
mTable = aTable;
}
inline void Init(nsHtml5DocumentMode aMode) {
mOpCode = eTreeOpDocumentMode;
mMode = aMode;
}
inline void DoTraverse(nsCycleCollectionTraversalCallback &cb) {
nsHtml5TreeOperation* tmp = this;
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mNode);
@ -101,13 +109,14 @@ class nsHtml5TreeOperation {
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mTable);
}
nsresult Perform(nsHtml5TreeBuilder* aBuilder);
nsresult Perform(nsHtml5TreeOpExecutor* aBuilder);
private:
eHtml5TreeOperation mOpCode;
nsCOMPtr<nsIContent> mNode;
nsCOMPtr<nsIContent> mParent;
nsCOMPtr<nsIContent> mTable;
nsHtml5DocumentMode mMode; // space-wasting temporary solution
};
#endif // nsHtml5TreeOperation_h__

View File

@ -40,6 +40,7 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
#include "nsHtml5Tokenizer.h"
#include "nsHtml5TreeBuilder.h"

View File

@ -41,8 +41,9 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5Atoms.h"
#include "nsHtml5ByteReadable.h"
#include "nsIUnicodeDecoder.h"
class nsHtml5Parser;
class nsHtml5StreamParser;
class nsHtml5Tokenizer;
class nsHtml5TreeBuilder;

View File

@ -194,6 +194,13 @@ class nsIParser : public nsISupports {
* @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
*/
NS_IMETHOD GetDTD(nsIDTD** aDTD) = 0;
/**
* Get the nsIStreamListener for this parser
* @param aDTD out param that will contain the result
* @return NS_OK if successful
*/
NS_IMETHOD GetStreamListener(nsIStreamListener** aListener) = 0;
/**************************************************************************
* Parse methods always begin with an input source, and perform

View File

@ -3162,3 +3162,12 @@ nsParser::GetDTD(nsIDTD** aDTD)
return NS_OK;
}
/**
* Get this as nsIStreamListener
*/
NS_IMETHODIMP
nsParser::GetStreamListener(nsIStreamListener** aListener)
{
NS_ADDREF(*aListener = this);
return NS_OK;
}

View File

@ -311,6 +311,13 @@ class nsParser : public nsIParser,
*/
NS_IMETHOD GetDTD(nsIDTD** aDTD);
/**
* Get the nsIStreamListener for this parser
* @param aDTD out param that will contain the result
* @return NS_OK if successful
*/
NS_IMETHOD GetStreamListener(nsIStreamListener** aListener);
/**
* Detects the existence of a META tag with charset information in
* the given buffer.