Bug 489820 part 2 - Grow buffers to the worst-case size before tokenizing; fix comments. r=wchen.

2025-02-11 01:57:00 +00:00 · 2015-08-25 18:05:45 +03:00 · 2015-08-25 18:05:45 +03:00 · 02d517f7ac
commit 02d517f7ac
parent 9cb0eb6ae0
16 changed files with 231 additions and 52 deletions
--- a/parser/html/jArray.h
+++ b/parser/html/jArray.h
@ -53,6 +53,12 @@ struct jArray {
    jArray<T,L> newArray = { new T[len], len };
    return newArray;
  }
+  static jArray<T,L> newFallibleJArray(L const len) {
+    MOZ_ASSERT(len >= 0, "Negative length.");
+    T* a = new (mozilla::fallible) T[len];
+    jArray<T,L> newArray = { a, a ? len : 0 };
+    return newArray;
+  }
  operator T*() { return arr; }
  T& operator[] (L const index) {
    MOZ_ASSERT(index >= 0, "Array access with negative index.");
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@ -53,7 +53,7 @@ import org.xml.sax.SAXParseException;

 /**
 * An implementation of
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
+ * https://html.spec.whatwg.org/multipage/syntax.html#tokenization
 * 
 * This class implements the <code>Locator</code> interface. This is not an
 * incidental implementation detail: Users of this class are encouraged to make
@ -262,11 +262,6 @@ public class Tokenizer implements Locator {
     */
    private static final @NoLength char[] LF = { '\n' };

-    /**
-     * Buffer growth parameter.
-     */
-    private static final int BUFFER_GROW_BY = 1024;
-
    /**
     * "CDATA[" as <code>char[]</code>
     */
@ -442,7 +437,7 @@ public class Tokenizer implements Locator {
    protected boolean html4;

    /**
-     * Whether the stream is past the first 512 bytes.
+     * Whether the stream is past the first 1024 bytes.
     */
    private boolean metaBoundaryPassed;

@ -842,37 +837,32 @@ public class Tokenizer implements Locator {
    }

    /**
-     * Appends to the smaller buffer.
+     * Appends to the buffer.
     * 
     * @param c
     *            the UTF-16 code unit to append
     */
    private void appendStrBuf(char c) {
-        if (strBufLen == strBuf.length) {
-            char[] newBuf = new char[strBuf.length + Tokenizer.BUFFER_GROW_BY];
-            System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
-            strBuf = newBuf;
-        }
        strBuf[strBufLen++] = c;
    }

    /**
-     * The smaller buffer as a String. Currently only used for error reporting.
+     * The buffer as a String. Currently only used for error reporting.
     * 
     * <p>
     * C++ memory note: The return value must be released.
     * 
-     * @return the smaller buffer as a string
+     * @return the buffer as a string
     */
    protected String strBufToString() {
        return Portability.newStringFromBuffer(strBuf, 0, strBufLen);
    }

    /**
-     * Returns the short buffer as a local name. The return value is released in
+     * Returns the buffer as a local name. The return value is released in
     * emitDoctypeToken().
     * 
-     * @return the smaller buffer as local name
+     * @return the buffer as local name
     */
    private void strBufToDoctypeName() {
        doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
@ -880,7 +870,7 @@ public class Tokenizer implements Locator {
    }

    /**
-     * Emits the smaller buffer as character tokens.
+     * Emits the buffer as character tokens.
     * 
     * @throws SAXException
     *             if the token handler threw
@ -962,7 +952,7 @@ public class Tokenizer implements Locator {
    }

    /**
-     * Append the contents of the smaller buffer to the larger one.
+     * Append the contents of the char reference buffer to the main one.
     */
    @Inline private void appendCharRefBufToStrBuf() {
        appendStrBuf(charRefBuf, 0, charRefBufLen);
@ -1260,6 +1250,13 @@ public class Tokenizer implements Locator {
        lastCR = false;

        int start = buffer.getStart();
+        int end = buffer.getEnd();
+
+        // In C++, the caller of tokenizeBuffer needs to do this explicitly.
+        // [NOCPP[
+        ensureBufferSpace(end - start);
+        // ]NOCPP]
+
        /**
         * The index of the last <code>char</code> read from <code>buf</code>.
         */
@ -1310,9 +1307,9 @@ public class Tokenizer implements Locator {
        // CPPONLY: }
        // [NOCPP[
        pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
-                buffer.getEnd());
+                end);
        // ]NOCPP]
-        if (pos == buffer.getEnd()) {
+        if (pos == end) {
            // exiting due to end of buffer
            buffer.setStart(pos);
        } else {
@ -1321,6 +1318,32 @@ public class Tokenizer implements Locator {
        return lastCR;
    }

+    // [NOCPP[
+    private void ensureBufferSpace(int inputLength) throws SAXException {
+        // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
+        // Adding to the general worst case instead of only the
+        // TreeBuilder-exposed worst case to avoid re-introducing a bug when
+        // unifying the tokenizer and tree builder buffers in the future.
+        int worstCase = strBufLen + inputLength + charRefBufLen + 2;
+        tokenHandler.ensureBufferSpace(worstCase);
+        if (strBuf == null) {
+            // Add an arbitrary small value to avoid immediate reallocation
+            // once there are a few characters in the buffer.
+            strBuf = new char[worstCase + 128];
+        } else if (worstCase > strBuf.length) {
+            // HotSpot reportedly allocates memory with 8-byte accuracy, so
+            // there's no point in trying to do math here to avoid slop.
+            // Maybe we should add some small constant to worstCase here
+            // but not doing that without profiling. In C++ with jemalloc,
+            // the corresponding method should do math to round up here
+            // to avoid slop.
+            char[] newBuf = new char[worstCase];
+            System.arraycopy(strBuf, 0, newBuf, 0, strBufLen);
+            strBuf = newBuf;
+        }
+    }
+    // ]NOCPP]
+
    @SuppressWarnings("unused") private int stateLoop(int state, char c,
            int pos, @NoLength char[] buf, boolean reconsume, int returnState,
            int endPos) throws SAXException {
@ -6710,7 +6733,7 @@ public class Tokenizer implements Locator {

    public void initializeWithoutStarting() throws SAXException {
        confident = false;
-        strBuf = new char[1024];
+        strBuf = null;
        line = 1;
        // [NOCPP[
        html4 = false;
--- a/parser/html/javasrc/TreeBuilder.java
+++ b/parser/html/javasrc/TreeBuilder.java
@ -602,7 +602,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
        // ]NOCPP]
        start(fragment);
        charBufferLen = 0;
-        charBuffer = new char[1024];
+        charBuffer = null;
        framesetOk = true;
        if (fragment) {
            T elt;
@ -5594,14 +5594,30 @@ public abstract class TreeBuilder<T> implements TokenHandler,

    private final void accumulateCharactersForced(@Const @NoLength char[] buf,
            int start, int length) throws SAXException {
-        int newLen = charBufferLen + length;
-        if (newLen > charBuffer.length) {
-            char[] newBuf = new char[newLen];
+        System.arraycopy(buf, start, charBuffer, charBufferLen, length);
+        charBufferLen += length;
+    }
+
+    @Override public void ensureBufferSpace(int inputLength)
+            throws SAXException {
+        // TODO: Unify Tokenizer.strBuf and TreeBuilder.charBuffer so that
+        // this method becomes unnecessary.
+        int worstCase = charBufferLen + inputLength;
+        if (charBuffer == null) {
+            // Add an arbitrary small value to avoid immediate reallocation
+            // once there are a few characters in the buffer.
+            charBuffer = new char[worstCase + 128];
+        } else if (worstCase > charBuffer.length) {
+            // HotSpot reportedly allocates memory with 8-byte accuracy, so
+            // there's no point in trying to do math here to avoid slop.
+            // Maybe we should add some small constant to worstCase here
+            // but not doing that without profiling. In C++ with jemalloc,
+            // the corresponding method should do math to round up here
+            // to avoid slop.
+            char[] newBuf = new char[worstCase];
            System.arraycopy(charBuffer, 0, newBuf, 0, charBufferLen);
            charBuffer = newBuf;
        }
-        System.arraycopy(buf, start, charBuffer, charBufferLen, length);
-        charBufferLen = newLen;
    }

    // ]NOCPP]
--- a/parser/html/javasrc/UTF16Buffer.java
+++ b/parser/html/javasrc/UTF16Buffer.java
@ -117,6 +117,15 @@ public final class UTF16Buffer {
        return start < end;
    }

+    /**
+     * Returns <code>end - start</code>.
+     *
+     * @return <code>end - start</code>
+     */
+    public int getLength() {
+        return end - start;
+    }
+
    /**
     * Adjusts the start index to skip over the first character if it is a line
     * feed and the previous character was a carriage return.
--- a/parser/html/nsHtml5Parser.cpp
+++ b/parser/html/nsHtml5Parser.cpp
@ -370,6 +370,9 @@ nsHtml5Parser::Parse(const nsAString& aSourceBuffer,
        lineNumberSave = mTokenizer->getLineNumber();
      }

+      if (!mTokenizer->EnsureBufferSpace(stackBuffer.getLength())) {
+        return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+      }
      mLastWasCR = mTokenizer->tokenizeBuffer(&stackBuffer);

      if (inRootContext) {
@ -476,6 +479,10 @@ nsHtml5Parser::Parse(const nsAString& aSourceBuffer,
    while (stackBuffer.hasMore()) {
      stackBuffer.adjust(mDocWriteSpeculativeLastWasCR);
      if (stackBuffer.hasMore()) {
+        if (!mDocWriteSpeculativeTokenizer->EnsureBufferSpace(
+            stackBuffer.getLength())) {
+          return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+        }
        mDocWriteSpeculativeLastWasCR =
            mDocWriteSpeculativeTokenizer->tokenizeBuffer(&stackBuffer);
      }
@ -613,16 +620,19 @@ nsHtml5Parser::ParseUntilBlocked()
          return NS_OK;
        }
        if (mDocumentClosed) {
+          nsresult rv;
          NS_ASSERTION(!GetStreamParser(),
                       "This should only happen with script-created parser.");
-          mTokenizer->eof();
-          mTreeBuilder->StreamEnded();
+          if (NS_SUCCEEDED((rv = mExecutor->IsBroken()))) {
+            mTokenizer->eof();
+            mTreeBuilder->StreamEnded();
+          }
          mTreeBuilder->Flush();
          mExecutor->FlushDocumentWrite();
          // The below call does memory cleanup, so call it even if the
          // parser has been marked as broken.
          mTokenizer->end();
-          return NS_OK;
+          return rv;
        }
        // never release the last buffer.
        NS_ASSERTION(!mLastBuffer->getStart() && !mLastBuffer->getEnd(),
@ -662,6 +672,9 @@ nsHtml5Parser::ParseUntilBlocked()
      if (inRootContext) {
        mTokenizer->setLineNumber(mRootContextLineNumber);
      }
+      if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) {
+        return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+      }
      mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
      if (inRootContext) {
        mRootContextLineNumber = mTokenizer->getLineNumber();
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@ -890,7 +890,10 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
    mTreeBuilder->StartPlainText();
    mTokenizer->StartPlainText();
  } else if (mMode == VIEW_SOURCE_PLAIN) {
-    mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
+    nsAutoString viewSourceTitle;
+    CopyUTF8toUTF16(mViewSourceTitle, viewSourceTitle);
+    mTreeBuilder->EnsureBufferSpace(viewSourceTitle.Length());
+    mTreeBuilder->StartPlainTextViewSource(viewSourceTitle);
    mTokenizer->StartPlainText();
  }

@ -1364,10 +1367,12 @@ nsHtml5StreamParser::ParseAvailableData()
                                                        0);
              }
            }
-            mTokenizer->eof();
-            mTreeBuilder->StreamEnded();
-            if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
-              mTokenizer->EndViewSource();
+            if (NS_SUCCEEDED(mTreeBuilder->IsBroken())) {
+              mTokenizer->eof();
+              mTreeBuilder->StreamEnded();
+              if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
+                mTokenizer->EndViewSource();
+              }
            }
            FlushTreeOpsAndDisarmTimer();
            return; // no more data and not expecting more
@ -1384,6 +1389,10 @@ nsHtml5StreamParser::ParseAvailableData()
    mFirstBuffer->adjust(mLastWasCR);
    mLastWasCR = false;
    if (mFirstBuffer->hasMore()) {
+      if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) {
+        MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+        return;
+      }
      mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
      // At this point, internalEncodingDeclaration() may have called 
      // Terminate, but that never happens together with script.
--- a/parser/html/nsHtml5StringParser.cpp
+++ b/parser/html/nsHtml5StringParser.cpp
@ -109,6 +109,10 @@ nsHtml5StringParser::Tokenize(const nsAString& aSourceBuffer,
      buffer.adjust(lastWasCR);
      lastWasCR = false;
      if (buffer.hasMore()) {
+        if (!mTokenizer->EnsureBufferSpace(buffer.getLength())) {
+          rv = mBuilder->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+          break;
+        }
        lastWasCR = mTokenizer->tokenizeBuffer(&buffer);
        if (NS_FAILED(rv = mBuilder->IsBroken())) {
          break;
@ -116,7 +120,9 @@ nsHtml5StringParser::Tokenize(const nsAString& aSourceBuffer,
      }
    }
  }
-  mTokenizer->eof();
+  if (NS_SUCCEEDED(rv)) {
+    mTokenizer->eof();
+  }
  mTokenizer->end();
  mBuilder->Finish();
  mAtomTable.Clear();
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@ -223,11 +223,6 @@ nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState)
 void 
 nsHtml5Tokenizer::appendStrBuf(char16_t c)
 {
-  if (strBufLen == strBuf.length) {
-    jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newJArray(strBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY);
-    nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
-    strBuf = newBuf;
-  }
  strBuf[strBufLen++] = c;
 }

@ -374,6 +369,7 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
  shouldSuspend = false;
  lastCR = false;
  int32_t start = buffer->getStart();
+  int32_t end = buffer->getEnd();
  int32_t pos = start - 1;
  switch(state) {
    case NS_HTML5TOKENIZER_DATA:
@ -408,7 +404,7 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
  } else {
    pos = stateLoop<nsHtml5SilentPolicy>(state, c, pos, buffer->getBuffer(), false, returnState, buffer->getEnd());
  }
-  if (pos == buffer->getEnd()) {
+  if (pos == end) {
    buffer->setStart(pos);
  } else {
    buffer->setStart(pos + 1);
@ -4045,7 +4041,7 @@ void
 nsHtml5Tokenizer::initializeWithoutStarting()
 {
  confident = false;
-  strBuf = jArray<char16_t,int32_t>::newJArray(1024);
+  strBuf = nullptr;
  line = 1;
  resetToDataState();
 }
--- a/parser/html/nsHtml5Tokenizer.h
+++ b/parser/html/nsHtml5Tokenizer.h
@ -367,7 +367,6 @@ class nsHtml5Tokenizer
 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
-#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024


 #endif
--- a/parser/html/nsHtml5TokenizerCppSupplement.h
+++ b/parser/html/nsHtml5TokenizerCppSupplement.h
@ -4,6 +4,44 @@

 #include "mozilla/Likely.h"

+bool
+nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength)
+{
+  MOZ_ASSERT(aLength >= 0, "Negative length.");
+  // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
+  // Adding to the general worst case instead of only the
+  // TreeBuilder-exposed worst case to avoid re-introducing a bug when
+  // unifying the tokenizer and tree builder buffers in the future.
+  size_t worstCase = size_t(strBufLen) +
+                     size_t(aLength) +
+                     size_t(charRefBufLen) +
+                     size_t(2);
+  if (worstCase > INT32_MAX) {
+    // Since we index into the buffer using int32_t due to the Java heritage
+    // of the code, let's treat this as OOM.
+    return false;
+  }
+  // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer
+  // so that the call below becomes unnecessary.
+  tokenHandler->EnsureBufferSpace(worstCase);
+  if (!strBuf) {
+    // Add one to round to the next power of two to avoid immediate
+    // reallocation once there are a few characters in the buffer.
+    strBuf = jArray<char16_t,int32_t>::newFallibleJArray(mozilla::RoundUpPow2(worstCase + 1));
+    if (!strBuf) {
+      return false;
+    }
+  } else if (worstCase > size_t(strBuf.length)) {
+    jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newFallibleJArray(mozilla::RoundUpPow2(worstCase));
+    if (!newBuf) {
+      return false;
+    }
+    memcpy(newBuf,strBuf, sizeof(char16_t) * strBufLen);
+    strBuf = newBuf;
+  }
+  return true;
+}
+
 void
 nsHtml5Tokenizer::StartPlainText()
 {
--- a/parser/html/nsHtml5TokenizerHSupplement.h
+++ b/parser/html/nsHtml5TokenizerHSupplement.h
@ -7,6 +7,16 @@ inline nsHtml5HtmlAttributes* GetAttributes()
  return attributes;
 }

+/**
+ * Makes sure the buffers are large enough to be able to tokenize aLength
+ * UTF-16 code units before having to make the buffers larger.
+ *
+ * @param aLength the number of UTF-16 code units to be tokenized before the
+ *                next call to this method.
+ * @return true if successful; false if out of memory
+ */
+bool EnsureBufferSpace(int32_t aLength);
+
 nsAutoPtr<nsHtml5Highlighter> mViewSource;

 /**
--- a/parser/html/nsHtml5TreeBuilder.cpp
+++ b/parser/html/nsHtml5TreeBuilder.cpp
@ -88,7 +88,7 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
  deepTreeSurrogateParent = nullptr;
  start(fragment);
  charBufferLen = 0;
-  charBuffer = jArray<char16_t,int32_t>::newJArray(1024);
+  charBuffer = nullptr;
  framesetOk = true;
  if (fragment) {
    nsIContentHandle* elt;
--- a/parser/html/nsHtml5TreeBuilderCppSupplement.h
+++ b/parser/html/nsHtml5TreeBuilderCppSupplement.h
@ -24,6 +24,7 @@ nsHtml5TreeBuilder::nsHtml5TreeBuilder(nsHtml5OplessBuilder* aBuilder)
  , mHandles(nullptr)
  , mHandlesUsed(0)
  , mSpeculativeLoadStage(nullptr)
+  , mBroken(NS_OK)
  , mCurrentHtmlScriptIsAsyncOrDefer(false)
  , mPreventScriptExecution(false)
 #ifdef DEBUG
@ -48,6 +49,7 @@ nsHtml5TreeBuilder::nsHtml5TreeBuilder(nsAHtml5TreeOpSink* aOpSink,
  , mHandles(new nsIContent*[NS_HTML5_TREE_BUILDER_HANDLE_ARRAY_LENGTH])
  , mHandlesUsed(0)
  , mSpeculativeLoadStage(aStage)
+  , mBroken(NS_OK)
  , mCurrentHtmlScriptIsAsyncOrDefer(false)
  , mPreventScriptExecution(false)
 #ifdef DEBUG
@ -912,15 +914,39 @@ nsHtml5TreeBuilder::elementPopped(int32_t aNamespace, nsIAtom* aName, nsIContent
 void
 nsHtml5TreeBuilder::accumulateCharacters(const char16_t* aBuf, int32_t aStart, int32_t aLength)
 {
-  int32_t newFillLen = charBufferLen + aLength;
-  if (newFillLen > charBuffer.length) {
-    int32_t newAllocLength = newFillLen + (newFillLen >> 1);
-    jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newJArray(newAllocLength);
+  MOZ_ASSERT(charBufferLen + aLength <= charBuffer.length,
+             "About to memcpy past the end of the buffer!");
+  memcpy(charBuffer + charBufferLen, aBuf + aStart, sizeof(char16_t) * aLength);
+  charBufferLen += aLength;
+}
+
+bool
+nsHtml5TreeBuilder::EnsureBufferSpace(size_t aLength)
+{
+  // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer
+  // so that this method becomes unnecessary.
+  size_t worstCase = size_t(charBufferLen) + aLength;
+  if (worstCase > INT32_MAX) {
+    // Since we index into the buffer using int32_t due to the Java heritage
+    // of the code, let's treat this as OOM.
+    return false;
+  }
+  if (!charBuffer) {
+    // Add one to round to the next power of two to avoid immediate
+    // reallocation once there are a few characters in the buffer.
+    charBuffer = jArray<char16_t,int32_t>::newFallibleJArray(mozilla::RoundUpPow2(worstCase + 1));
+    if (!charBuffer) {
+      return false;
+    }
+  } else if (worstCase > size_t(charBuffer.length)) {
+    jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newFallibleJArray(mozilla::RoundUpPow2(worstCase));
+    if (!newBuf) {
+      return false;
+    }
    memcpy(newBuf, charBuffer, sizeof(char16_t) * charBufferLen);
    charBuffer = newBuf;
  }
-  memcpy(charBuffer + charBufferLen, aBuf + aStart, sizeof(char16_t) * aLength);
-  charBufferLen = newFillLen;
+  return true;
 }

 nsIContentHandle*
@ -1074,6 +1100,7 @@ nsHtml5TreeBuilder::MarkAsBroken(nsresult aRv)
    MOZ_ASSERT_UNREACHABLE("Must not call this with builder.");
    return;
  }
+  mBroken = aRv;
  mOpQueue.Clear(); // Previous ops don't matter anymore
  mOpQueue.AppendElement()->Init(aRv);
 }
--- a/parser/html/nsHtml5TreeBuilderHSupplement.h
+++ b/parser/html/nsHtml5TreeBuilderHSupplement.h
@ -18,6 +18,7 @@
    int32_t                                mHandlesUsed;
    nsTArray<nsAutoArrayPtr<nsIContent*> > mOldHandles;
    nsHtml5TreeOpStage*                    mSpeculativeLoadStage;
+    nsresult                               mBroken;
    bool                                   mCurrentHtmlScriptIsAsyncOrDefer;
    bool                                   mPreventScriptExecution;
 #ifdef DEBUG
@ -127,6 +128,16 @@
      return mBuilder;
    }

+    /**
+     * Makes sure the buffers are large enough to be able to tokenize aLength
+     * UTF-16 code units before having to make the buffers larger.
+     *
+     * @param aLength the number of UTF-16 code units to be tokenized before the
+     *                next call to this method.
+     * @return true if successful; false if out of memory
+     */
+    bool EnsureBufferSpace(size_t aLength);
+
    void EnableViewSource(nsHtml5Highlighter* aHighlighter);

    void errStrayStartTag(nsIAtom* aName);
@ -224,3 +235,12 @@
    void errEndWithUnclosedElements(nsIAtom* aName);

    void MarkAsBroken(nsresult aRv);
+
+    /**
+     * Checks if this parser is broken. Returns a non-NS_OK (i.e. non-0)
+     * value if broken.
+     */
+    nsresult IsBroken()
+    {
+      return mBroken;
+    }
--- a/parser/html/nsHtml5UTF16Buffer.cpp
+++ b/parser/html/nsHtml5UTF16Buffer.cpp
@ -84,6 +84,12 @@ nsHtml5UTF16Buffer::hasMore()
  return start < end;
 }

+int32_t 
+nsHtml5UTF16Buffer::getLength()
+{
+  return end - start;
+}
+
 void 
 nsHtml5UTF16Buffer::adjust(bool lastWasCR)
 {
--- a/parser/html/nsHtml5UTF16Buffer.h
+++ b/parser/html/nsHtml5UTF16Buffer.h
@ -67,6 +67,7 @@ class nsHtml5UTF16Buffer
    char16_t* getBuffer();
    int32_t getEnd();
    bool hasMore();
+    int32_t getLength();
    void adjust(bool lastWasCR);
    void setEnd(int32_t end);
    static void initializeStatics();