diff --git a/content/html/parser/src/nsHtml5Parser.cpp b/content/html/parser/src/nsHtml5Parser.cpp
index fb0d078d678c..8c993db19a33 100644
--- a/content/html/parser/src/nsHtml5Parser.cpp
+++ b/content/html/parser/src/nsHtml5Parser.cpp
@@ -152,8 +152,9 @@ nsHtml5Parser::nsHtml5Parser()
mFirstBuffer(new nsHtml5UTF16Buffer(NS_HTML5_PARSER_READ_BUFFER_SIZE)), // XXX allocate elsewhere for fragment parser?
mLastBuffer(mFirstBuffer),
mTreeBuilder(new nsHtml5TreeBuilder(this)),
- mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, this))
+ mTokenizer(new nsHtml5Tokenizer(mTreeBuilder))
{
+ mTokenizer->setEncodingDeclarationHandler(this);
// There's a zeroing operator new for everything else
}
diff --git a/content/html/parser/src/nsHtml5Tokenizer.cpp b/content/html/parser/src/nsHtml5Tokenizer.cpp
index bdc06facf9bb..cb0235f61f7e 100644
--- a/content/html/parser/src/nsHtml5Tokenizer.cpp
+++ b/content/html/parser/src/nsHtml5Tokenizer.cpp
@@ -58,9 +58,9 @@
#include "nsHtml5Tokenizer.h"
-nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, nsHtml5Parser* encodingDeclarationHandler)
+nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler)
: tokenHandler(tokenHandler),
- encodingDeclarationHandler(encodingDeclarationHandler),
+ encodingDeclarationHandler(nsnull),
bmpChar(jArray(1)),
astralChar(jArray(2))
{
@@ -154,36 +154,10 @@ nsHtml5Tokenizer::contentModelElementToArray()
}
}
-nsString*
-nsHtml5Tokenizer::getPublicId()
-{
- return publicId;
-}
-
-nsString*
-nsHtml5Tokenizer::getSystemId()
-{
- return systemId;
-}
-
PRInt32
nsHtml5Tokenizer::getLineNumber()
{
- if (line > 0) {
- return line;
- } else {
- return -1;
- }
-}
-
-PRInt32
-nsHtml5Tokenizer::getColumnNumber()
-{
- if (col > 0) {
- return col;
- } else {
- return -1;
- }
+ return line;
}
nsHtml5HtmlAttributes*
@@ -192,16 +166,6 @@ nsHtml5Tokenizer::emptyAttributes()
return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
}
-void
-nsHtml5Tokenizer::detachStrBuf()
-{
-}
-
-void
-nsHtml5Tokenizer::detachLongStrBuf()
-{
-}
-
void
nsHtml5Tokenizer::clearStrBufAndAppendCurrentC(PRUnichar c)
{
@@ -252,10 +216,10 @@ nsHtml5Tokenizer::strBufToString()
return nsHtml5Portability::newStringFromBuffer(strBuf, 0, strBufLen);
}
-nsIAtom*
+void
nsHtml5Tokenizer::strBufToDoctypeName()
{
- return nsHtml5Portability::newLocalNameFromBuffer(strBuf, 0, strBufLen);
+ doctypeName = nsHtml5Portability::newLocalNameFromBuffer(strBuf, 0, strBufLen);
}
void
@@ -279,9 +243,9 @@ nsHtml5Tokenizer::clearLongStrBuf()
}
void
-nsHtml5Tokenizer::clearLongStrBufAndAppendCurrentC()
+nsHtml5Tokenizer::clearLongStrBufAndAppendCurrentC(PRUnichar c)
{
- longStrBuf[0] = buf[pos];
+ longStrBuf[0] = c;
longStrBufLen = 1;
}
@@ -313,6 +277,7 @@ nsHtml5Tokenizer::appendSecondHyphenToBogusComment()
void
nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToLongStrBuf(PRUnichar c)
{
+
appendLongStrBuf(c);
}
@@ -349,41 +314,17 @@ nsHtml5Tokenizer::longStrBufToString()
}
void
-nsHtml5Tokenizer::emitComment(PRInt32 provisionalHyphens)
+nsHtml5Tokenizer::emitComment(PRInt32 provisionalHyphens, PRInt32 pos)
{
tokenHandler->comment(longStrBuf, 0, longStrBufLen - provisionalHyphens);
cstart = pos + 1;
}
-PRBool
-nsHtml5Tokenizer::isPrivateUse(PRUnichar c)
-{
- return c >= 0xe000 && c <= 0xf8ff;
-}
-
-PRBool
-nsHtml5Tokenizer::isAstralPrivateUse(PRInt32 c)
-{
- return (c >= 0xF0000 && c <= 0xFFFFD) || (c >= 0x100000 && c <= 0x10FFFD);
-}
-
-PRBool
-nsHtml5Tokenizer::isNonCharacter(PRInt32 c)
-{
- return (c & 0xFFFE) == 0xFFFE;
-}
-
void
-nsHtml5Tokenizer::flushChars()
+nsHtml5Tokenizer::flushChars(PRUnichar* buf, PRInt32 pos)
{
if (pos > cstart) {
- PRInt32 currLine = line;
- PRInt32 currCol = col;
- line = linePrev;
- col = colPrev;
tokenHandler->characters(buf, cstart, pos - cstart);
- line = currLine;
- col = currCol;
}
cstart = 0x7fffffff;
}
@@ -394,25 +335,21 @@ nsHtml5Tokenizer::resetAttributes()
attributes->clear(0);
}
-nsHtml5ElementName*
+void
nsHtml5Tokenizer::strBufToElementNameString()
{
- return nsHtml5ElementName::elementNameByBuffer(strBuf, 0, strBufLen);
+ tagName = nsHtml5ElementName::elementNameByBuffer(strBuf, 0, strBufLen);
}
PRInt32
-nsHtml5Tokenizer::emitCurrentTagToken(PRBool selfClosing)
+nsHtml5Tokenizer::emitCurrentTagToken(PRBool selfClosing, PRInt32 pos)
{
cstart = pos + 1;
- if (selfClosing && endTag) {
- }
stateSave = NS_HTML5TOKENIZER_DATA;
nsHtml5HtmlAttributes* attrs = (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes);
if (endTag) {
- if (attrs->getLength()) {
- }
tokenHandler->endTag(tagName);
} else {
tokenHandler->startTag(tagName, attrs, selfClosing);
@@ -449,6 +386,11 @@ nsHtml5Tokenizer::addAttributeWithValue()
}
}
+void
+nsHtml5Tokenizer::startErrorReporting()
+{
+}
+
void
nsHtml5Tokenizer::start()
{
@@ -458,10 +400,8 @@ nsHtml5Tokenizer::start()
longStrBuf = jArray(1024);
longStrBufLen = 0;
stateSave = NS_HTML5TOKENIZER_DATA;
- line = linePrev = 0;
- col = colPrev = 1;
- nextCharOnNewLine = PR_TRUE;
- prev = '\0';
+ line = -1;
+ lastCR = PR_FALSE;
tokenHandler->startTokenization(this);
index = 0;
forceQuirks = PR_FALSE;
@@ -481,13 +421,13 @@ nsHtml5Tokenizer::start()
PRBool
nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
{
- buf = buffer->getBuffer();
PRInt32 state = stateSave;
PRInt32 returnState = returnStateSave;
PRUnichar c = '\0';
shouldSuspend = PR_FALSE;
+ lastCR = PR_FALSE;
PRInt32 start = buffer->getStart();
- pos = start - 1;
+ PRInt32 pos = start - 1;
switch(state) {
case NS_HTML5TOKENIZER_DATA:
case NS_HTML5TOKENIZER_RCDATA:
@@ -507,26 +447,17 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
break;
}
}
- endPos = buffer->getEnd();
- PRBool reconsume = PR_FALSE;
- stateLoop(state, c, reconsume, returnState);
- detachStrBuf();
- detachLongStrBuf();
- if (pos == endPos) {
+ pos = stateLoop(state, c, pos, buffer->getBuffer(), PR_FALSE, returnState, buffer->getEnd());
+ if (pos == buffer->getEnd()) {
buffer->setStart(pos);
} else {
buffer->setStart(pos + 1);
}
- if (prev == '\r') {
- prev = ' ';
- return PR_TRUE;
- } else {
- return PR_FALSE;
- }
+ return lastCR;
}
-void
-nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt32 returnState)
+PRInt32
+nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos)
{
stateloop: for (; ; ) {
switch(state) {
@@ -535,26 +466,36 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case '&': {
- flushChars();
+ flushChars(buf, pos);
clearStrBufAndAppendCurrentC(c);
- additional = '\0';
- rememberAmpersandLocation();
+ rememberAmpersandLocation('\0');
returnState = state;
state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
goto stateloop;
}
case '<': {
- flushChars();
+ flushChars(buf, pos);
state = NS_HTML5TOKENIZER_TAG_OPEN;
goto dataloop_end;
}
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
continue;
}
@@ -564,7 +505,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_TAG_OPEN: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
if (c >= 'A' && c <= 'Z') {
endTag = PR_FALSE;
clearStrBufAndAppendForceWrite((PRUnichar) (c + 0x20));
@@ -577,9 +521,6 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
goto tagopenloop_end;
}
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '!': {
state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN;
goto stateloop;
@@ -590,7 +531,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case '\?': {
- clearLongStrBufAndAppendToComment('\?');
+ clearLongStrBufAndAppendToComment(c);
state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
goto stateloop;
}
@@ -615,38 +556,48 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_TAG_NAME: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
+ strBufToElementNameString();
+ state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
- tagName = strBufToElementNameString();
+ strBufToElementNameString();
state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
goto tagnameloop_end;
}
case '/': {
- tagName = strBufToElementNameString();
+ strBufToElementNameString();
state = NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG;
goto stateloop;
}
case '>': {
- tagName = strBufToElementNameString();
- state = emitCurrentTagToken(PR_FALSE);
+ strBufToElementNameString();
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
if (c >= 'A' && c <= 'Z') {
- appendStrBufForceWrite((PRUnichar) (c + 0x20));
- } else {
- appendStrBuf(c);
+ c += 0x20;
}
+ appendStrBuf(c);
continue;
}
}
@@ -658,15 +609,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
@@ -675,27 +632,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
goto stateloop;
}
case '>': {
- state = emitCurrentTagToken(PR_FALSE);
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
case '\"':
case '\'':
- case '=': {
- if (c == '=') {
-
- } else {
-
- }
- }
+ case '=':
default: {
if (c >= 'A' && c <= 'Z') {
- clearStrBufAndAppendForceWrite((PRUnichar) (c + 0x20));
- } else {
- clearStrBufAndAppendCurrentC(c);
+ c += 0x20;
}
+ clearStrBufAndAppendCurrentC(c);
state = NS_HTML5TOKENIZER_ATTRIBUTE_NAME;
goto beforeattributenameloop_end;
}
@@ -705,14 +658,22 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_ATTRIBUTE_NAME: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
+ attributeNameComplete();
+ state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME;
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
attributeNameComplete();
state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME;
@@ -732,20 +693,22 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
case '>': {
attributeNameComplete();
addAttributeWithoutValue();
- state = emitCurrentTagToken(PR_FALSE);
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
case '\"':
case '\'':
default: {
if (c >= 'A' && c <= 'Z') {
- appendStrBufForceWrite((PRUnichar) (c + 0x20));
- } else {
- appendStrBuf(c);
+ c += 0x20;
}
+ appendStrBuf(c);
continue;
}
}
@@ -754,14 +717,20 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
@@ -784,15 +753,18 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
case '>': {
addAttributeWithoutValue();
- state = emitCurrentTagToken(PR_FALSE);
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
case '=':
default: {
- clearLongStrBufAndAppendCurrentC();
+ clearLongStrBufAndAppendCurrentC(c);
state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED;
goto stateloop;
}
@@ -805,26 +777,35 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case '\"': {
addAttributeWithValue();
state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED;
goto attributevaluedoublequotedloop_end;
}
case '&': {
- detachLongStrBuf();
clearStrBufAndAppendCurrentC(c);
- additional = '\"';
- rememberAmpersandLocation();
+ rememberAmpersandLocation('\"');
returnState = state;
state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
goto stateloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -835,14 +816,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
+ state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
goto stateloop;
@@ -852,7 +840,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
goto afterattributevaluequotedloop_end;
}
case '>': {
- state = emitCurrentTagToken(PR_FALSE);
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
@@ -869,13 +857,13 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
afterattributevaluequotedloop_end: ;
}
case NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG: {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '>': {
- state = emitCurrentTagToken(PR_TRUE);
+ state = emitCurrentTagToken(PR_TRUE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
@@ -894,48 +882,52 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
+ addAttributeWithValue();
+ state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
addAttributeWithValue();
state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
goto stateloop;
}
case '&': {
- detachLongStrBuf();
clearStrBufAndAppendCurrentC(c);
- additional = '\0';
- rememberAmpersandLocation();
+ rememberAmpersandLocation('\0');
returnState = state;
state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
goto stateloop;
}
case '>': {
addAttributeWithValue();
- state = emitCurrentTagToken(PR_FALSE);
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
case '<':
case '\"':
case '\'':
- case '=': {
- if (c == '<') {
-
- } else {
-
- }
- }
+ case '=':
default: {
+
appendLongStrBuf(c);
continue;
}
@@ -944,14 +936,20 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
@@ -966,21 +964,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case '>': {
addAttributeWithoutValue();
- state = emitCurrentTagToken(PR_FALSE);
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
case '\"':
case '\'':
default: {
addAttributeWithoutValue();
if (c >= 'A' && c <= 'Z') {
- clearStrBufAndAppendForceWrite((PRUnichar) (c + 0x20));
- } else {
- clearStrBufAndAppendCurrentC(c);
+ c += 0x20;
}
+ clearStrBufAndAppendCurrentC(c);
state = NS_HTML5TOKENIZER_ATTRIBUTE_NAME;
goto stateloop;
}
@@ -992,14 +992,14 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case '>': {
- emitComment(0);
+ emitComment(0, pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
@@ -1008,6 +1008,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
state = NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN;
goto boguscommentloop_end;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -1018,13 +1029,13 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN: {
boguscommenthyphenloop: for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '>': {
- emitComment(0);
+ emitComment(0, pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
@@ -1032,6 +1043,19 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
appendSecondHyphenToBogusComment();
goto boguscommenthyphenloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
+ goto stateloop;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
@@ -1043,11 +1067,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
clearLongStrBufAndAppendToComment(c);
state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN;
@@ -1082,7 +1106,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
case '\0': {
goto stateloop_end;
@@ -1104,11 +1131,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_COMMENT_START: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT_START_DASH;
@@ -1116,10 +1143,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case '>': {
- emitComment(0);
+ emitComment(0, pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto commentstartloop_end;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT;
@@ -1131,16 +1171,27 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_COMMENT: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT_END_DASH;
goto commentloop_end;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -1151,16 +1202,29 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_COMMENT_END_DASH: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT_END;
goto commentenddashloop_end;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto stateloop;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT;
@@ -1172,23 +1236,34 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_COMMENT_END: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '>': {
- emitComment(2);
+ emitComment(2, pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
case '-': {
-
adjustDoubleHyphenAndAppendToLongStrBuf(c);
continue;
}
+ case '\r': {
+ adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto stateloop_end;
+ }
+ case '\n': {
+ adjustDoubleHyphenAndAppendToLongStrBufLineFeed();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto stateloop;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
-
adjustDoubleHyphenAndAppendToLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT;
goto stateloop;
@@ -1197,11 +1272,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
}
case NS_HTML5TOKENIZER_COMMENT_START_DASH: {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT_END;
@@ -1209,10 +1284,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case '>': {
- emitComment(1);
+ emitComment(1, pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ state = NS_HTML5TOKENIZER_COMMENT;
+ goto stateloop;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
state = NS_HTML5TOKENIZER_COMMENT;
@@ -1222,10 +1310,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE: {
for (; ; ) {
- c = read();
- if (c == '\0') {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
if (index < 6) {
PRUnichar folded = c;
if (c >= 'A' && c <= 'Z') {
@@ -1254,19 +1342,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- doctypeName = nsHtml5Atoms::emptystring;
- systemIdentifier = nsnull;
- publicIdentifier = nsnull;
- forceQuirks = PR_FALSE;
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ initDoctypeFields();
+ switch(c) {
+ case '\r': {
+ silentCarriageReturn();
+ state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME;
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME;
goto doctypeloop_end;
@@ -1286,32 +1378,39 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
case '>': {
forceQuirks = PR_TRUE;
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
if (c >= 'A' && c <= 'Z') {
- clearStrBufAndAppendForceWrite((PRUnichar) (c + 0x20));
- } else {
- clearStrBufAndAppendCurrentC(c);
+ c += 0x20;
}
+ clearStrBufAndAppendCurrentC(c);
state = NS_HTML5TOKENIZER_DOCTYPE_NAME;
goto beforedoctypenameloop_end;
}
@@ -1321,26 +1420,36 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_DOCTYPE_NAME: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
+ strBufToDoctypeName();
+ state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME;
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
- doctypeName = strBufToDoctypeName();
+ strBufToDoctypeName();
state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME;
goto doctypenameloop_end;
}
case '>': {
- doctypeName = strBufToDoctypeName();
- emitDoctypeToken();
- cstart = pos + 1;
+ strBufToDoctypeName();
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
if (c >= 'A' && c <= 'Z') {
c += 0x0020;
@@ -1354,20 +1463,25 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
case '>': {
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
@@ -1394,10 +1508,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_DOCTYPE_UBLIC: {
for (; ; ) {
- c = read();
- if (c == '\0') {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
if (index < 5) {
PRUnichar folded = c;
if (c >= 'A' && c <= 'Z') {
@@ -1424,15 +1538,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
@@ -1449,8 +1569,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
case '>': {
forceQuirks = PR_TRUE;
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
@@ -1465,11 +1584,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '\"': {
publicIdentifier = longStrBufToString();
state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
@@ -1479,11 +1598,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
forceQuirks = PR_TRUE;
publicIdentifier = longStrBufToString();
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -1494,14 +1623,20 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
@@ -1516,8 +1651,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
goto stateloop;
}
case '>': {
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
@@ -1532,11 +1666,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '\"': {
systemIdentifier = longStrBufToString();
state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
@@ -1546,11 +1680,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
forceQuirks = PR_TRUE;
systemIdentifier = longStrBufToString();
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -1561,20 +1705,25 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
case '>': {
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
@@ -1592,18 +1741,24 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case '>': {
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\r': {
+ silentCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
continue;
}
@@ -1612,10 +1767,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_DOCTYPE_YSTEM: {
for (; ; ) {
- c = read();
- if (c == '\0') {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
if (index < 5) {
PRUnichar folded = c;
if (c >= 'A' && c <= 'Z') {
@@ -1642,15 +1797,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
switch(c) {
- case '\0': {
+ case '\r': {
+ silentCarriageReturn();
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
continue;
}
@@ -1667,8 +1828,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
case '>': {
forceQuirks = PR_TRUE;
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
@@ -1683,11 +1843,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '\'': {
systemIdentifier = longStrBufToString();
state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
@@ -1697,11 +1857,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
forceQuirks = PR_TRUE;
systemIdentifier = longStrBufToString();
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -1711,11 +1881,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '\'': {
publicIdentifier = longStrBufToString();
state = NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
@@ -1725,11 +1895,21 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
forceQuirks = PR_TRUE;
publicIdentifier = longStrBufToString();
- emitDoctypeToken();
- cstart = pos + 1;
+ emitDoctypeToken(pos);
state = NS_HTML5TOKENIZER_DATA;
goto stateloop;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -1739,10 +1919,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_CDATA_START: {
for (; ; ) {
- c = read();
- if (c == '\0') {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
if (index < 6) {
if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) {
appendLongStrBuf(c);
@@ -1767,17 +1947,28 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case ']': {
- flushChars();
+ flushChars(buf, pos);
state = NS_HTML5TOKENIZER_CDATA_RSQB;
goto cdatasectionloop_end;
}
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
continue;
}
@@ -1787,11 +1978,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_CDATA_RSQB: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case ']': {
state = NS_HTML5TOKENIZER_CDATA_RSQB_RSQB;
goto cdatarsqb_end;
@@ -1808,11 +1999,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
cdatarsqb_end: ;
}
case NS_HTML5TOKENIZER_CDATA_RSQB_RSQB: {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '>': {
cstart = pos + 1;
state = NS_HTML5TOKENIZER_DATA;
@@ -1832,26 +2023,35 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case '\'': {
addAttributeWithValue();
state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED;
goto stateloop;
}
case '&': {
- detachLongStrBuf();
clearStrBufAndAppendCurrentC(c);
- additional = '\'';
- rememberAmpersandLocation();
+ rememberAmpersandLocation('\'');
returnState = state;
state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
goto attributevaluesinglequotedloop_end;
}
+ case '\r': {
+ appendLongStrBufCarriageReturn();
+ goto stateloop_end;
+ }
+ case '\n': {
+ appendLongStrBufLineFeed();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
default: {
appendLongStrBuf(c);
continue;
@@ -1861,7 +2061,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
attributevaluesinglequotedloop_end: ;
}
case NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE: {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
if (c == '\0') {
goto stateloop_end;
}
@@ -1869,6 +2072,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
case ' ':
case '\t':
case '\n':
+ case '\r':
case '\f':
case '<':
case '&': {
@@ -1907,7 +2111,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
if (c == '\0') {
goto stateloop_end;
@@ -2006,14 +2213,14 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
}
case NS_HTML5TOKENIZER_CONSUME_NCR: {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
prevValue = -1;
value = 0;
seenDigits = PR_FALSE;
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case 'x':
case 'X': {
appendStrBuf(c);
@@ -2031,10 +2238,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- if (c == '\0') {
- goto stateloop_end;
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
if (value < prevValue) {
value = 0x110000;
@@ -2047,10 +2254,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
continue;
} else if (c == ';') {
if (seenDigits) {
- state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
if (!(returnState & (~1))) {
cstart = pos + 1;
}
+ state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
goto decimalloop_end;
} else {
@@ -2074,11 +2281,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
goto stateloop;
} else {
- state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
- reconsume = PR_TRUE;
if (!(returnState & (~1))) {
cstart = pos;
}
+ state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
+ reconsume = PR_TRUE;
goto decimalloop_end;
}
}
@@ -2092,10 +2299,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_HEX_NCR_LOOP: {
for (; ; ) {
- c = read();
- if (c == '\0') {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
if (value < prevValue) {
value = 0x110000;
}
@@ -2159,12 +2366,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
}
switch(c) {
case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
goto stateloop_end;
}
+ case '\n': {
+ silentLineFeed();
+ }
default: {
continue;
}
@@ -2177,18 +2395,29 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case '<': {
- flushChars();
+ flushChars(buf, pos);
returnState = state;
state = NS_HTML5TOKENIZER_TAG_OPEN_NON_PCDATA;
goto cdataloop_end;
}
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
continue;
}
@@ -2198,11 +2427,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_TAG_OPEN_NON_PCDATA: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '!': {
tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
cstart = pos;
@@ -2230,11 +2459,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION_HYPHEN;
goto escapeexclamationloop_end;
@@ -2250,11 +2479,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION_HYPHEN: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN;
goto escapeexclamationhyphenloop_end;
@@ -2270,11 +2499,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
continue;
}
@@ -2282,6 +2511,19 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
state = returnState;
goto stateloop;
}
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = NS_HTML5TOKENIZER_ESCAPE;
+ goto escapehyphenhyphenloop_end;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
+ state = NS_HTML5TOKENIZER_ESCAPE;
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
state = NS_HTML5TOKENIZER_ESCAPE;
goto escapehyphenhyphenloop_end;
@@ -2291,18 +2533,29 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
escapehyphenhyphenloop_end: ;
}
case NS_HTML5TOKENIZER_ESCAPE: {
- escapeloop: for (; ; ) {
- c = read();
+ for (; ; ) {
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_HYPHEN;
goto escapeloop_end;
}
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
- goto escapeloop;
+ continue;
}
}
}
@@ -2310,15 +2563,28 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_ESCAPE_HYPHEN: {
for (; ; ) {
- c = read();
+ if (++pos == endPos) {
+ goto stateloop_end;
+ }
+ c = checkChar(buf, pos);
switch(c) {
- case '\0': {
- goto stateloop_end;
- }
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN;
goto stateloop;
}
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = NS_HTML5TOKENIZER_ESCAPE;
+ goto stateloop;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
+ state = NS_HTML5TOKENIZER_ESCAPE;
+ goto stateloop;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
state = NS_HTML5TOKENIZER_ESCAPE;
goto stateloop;
@@ -2329,11 +2595,10 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
case NS_HTML5TOKENIZER_CLOSE_TAG_OPEN_NOT_PCDATA: {
for (; ; ) {
- c = read();
- if (c == '\0') {
+ if (++pos == endPos) {
goto stateloop_end;
}
-
+ c = checkChar(buf, pos);
if (index < contentModelElementNameAsArray.length) {
PRUnichar e = contentModelElementNameAsArray[index];
PRUnichar folded = c;
@@ -2355,15 +2620,22 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
endTag = PR_TRUE;
tagName = contentModelElement;
switch(c) {
+ case '\r': {
+ silentCarriageReturn();
+ state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
case ' ':
case '\t':
- case '\n':
case '\f': {
state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
goto stateloop;
}
case '>': {
- state = emitCurrentTagToken(PR_FALSE);
+ state = emitCurrentTagToken(PR_FALSE, pos);
if (shouldSuspend) {
goto stateloop_end;
}
@@ -2376,7 +2648,11 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
default: {
tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
emitStrBuf();
- cstart = pos;
+ if (c == '\0') {
+ emitReplacementCharacter(buf, pos);
+ } else {
+ cstart = pos;
+ }
state = returnState;
goto stateloop;
}
@@ -2385,30 +2661,50 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
}
case NS_HTML5TOKENIZER_CLOSE_TAG_OPEN_PCDATA: {
- c = read();
- if (c == '\0') {
+ if (++pos == endPos) {
goto stateloop_end;
}
- if (c >= 'A' && c <= 'Z') {
- endTag = PR_TRUE;
- clearStrBufAndAppendForceWrite((PRUnichar) (c + 0x20));
- state = NS_HTML5TOKENIZER_TAG_NAME;
- goto stateloop;
- } else if (c >= 'a' && c <= 'z') {
- endTag = PR_TRUE;
- clearStrBufAndAppendCurrentC(c);
- state = NS_HTML5TOKENIZER_TAG_NAME;
- goto stateloop;
- } else if (c == '>') {
+ c = checkChar(buf, pos);
+ switch(c) {
+ case '>': {
- cstart = pos + 1;
- state = NS_HTML5TOKENIZER_DATA;
- goto stateloop;
- } else {
+ cstart = pos + 1;
+ state = NS_HTML5TOKENIZER_DATA;
+ goto stateloop;
+ }
+ case '\r': {
+ silentCarriageReturn();
- clearLongStrBufAndAppendToComment(c);
- state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
- goto stateloop;
+ clearLongStrBufAndAppendToComment('\n');
+ state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+
+ clearLongStrBufAndAppendToComment('\n');
+ state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
+ goto stateloop;
+ }
+ case '\0': {
+ c = 0xfffd;
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (c >= 'a' && c <= 'z') {
+ endTag = PR_TRUE;
+ clearStrBufAndAppendCurrentC(c);
+ state = NS_HTML5TOKENIZER_TAG_NAME;
+ goto stateloop;
+ } else {
+
+ clearLongStrBufAndAppendToComment(c);
+ state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
+ goto stateloop;
+ }
+ }
}
}
case NS_HTML5TOKENIZER_RCDATA: {
@@ -2416,14 +2712,14 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
if (reconsume) {
reconsume = PR_FALSE;
} else {
- c = read();
- }
- switch(c) {
- case '\0': {
+ if (++pos == endPos) {
goto stateloop_end;
}
+ c = checkChar(buf, pos);
+ }
+ switch(c) {
case '&': {
- flushChars();
+ flushChars(buf, pos);
clearStrBufAndAppendCurrentC(c);
additional = '\0';
returnState = state;
@@ -2431,11 +2727,22 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
goto stateloop;
}
case '<': {
- flushChars();
+ flushChars(buf, pos);
returnState = state;
state = NS_HTML5TOKENIZER_TAG_OPEN_NON_PCDATA;
goto stateloop;
}
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn(buf, pos);
+ goto stateloop_end;
+ }
+ case '\n': {
+ silentLineFeed();
+ }
default: {
continue;
}
@@ -2446,18 +2753,34 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt3
}
}
stateloop_end: ;
- flushChars();
- if (prev == '\r' && pos != endPos) {
- pos--;
- col--;
- }
+ flushChars(buf, pos);
stateSave = state;
returnStateSave = returnState;
+ return pos;
}
void
-nsHtml5Tokenizer::rememberAmpersandLocation()
+nsHtml5Tokenizer::emitCarriageReturn(PRUnichar* buf, PRInt32 pos)
{
+ silentCarriageReturn();
+ flushChars(buf, pos);
+ tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1);
+ cstart = PR_INT32_MAX;
+}
+
+void
+nsHtml5Tokenizer::emitReplacementCharacter(PRUnichar* buf, PRInt32 pos)
+{
+ silentCarriageReturn();
+ flushChars(buf, pos);
+ tokenHandler->characters(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, 0, 1);
+ cstart = PR_INT32_MAX;
+}
+
+void
+nsHtml5Tokenizer::rememberAmpersandLocation(PRUnichar add)
+{
+ additional = add;
}
void
@@ -2500,7 +2823,7 @@ nsHtml5Tokenizer::handleNcrValue(PRInt32 returnState)
} else if ((value & 0xF800) == 0xD800) {
emitOrAppendOne(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, returnState);
- } else if (isNonCharacter(value)) {
+ } else if ((value & 0xFFFE) == 0xFFFE) {
emitOrAppendOne(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, returnState);
} else if (value >= 0xFDD0 && value <= 0xFDEF) {
@@ -2575,36 +2898,35 @@ nsHtml5Tokenizer::eof()
goto eofloop_end;
}
case NS_HTML5TOKENIZER_BOGUS_COMMENT: {
- emitComment(0);
+ emitComment(0, 0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN: {
- emitComment(0);
+ emitComment(0, 0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: {
clearLongStrBuf();
- emitComment(0);
+ emitComment(0, 0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN: {
- emitComment(0);
+ emitComment(0, 0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE: {
if (index < 6) {
- emitComment(0);
+ emitComment(0, 0);
} else {
-
doctypeName = nsHtml5Atoms::emptystring;
publicIdentifier = nsnull;
systemIdentifier = nsnull;
forceQuirks = PR_TRUE;
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
goto eofloop_end;
@@ -2612,32 +2934,32 @@ nsHtml5Tokenizer::eof()
case NS_HTML5TOKENIZER_COMMENT_START:
case NS_HTML5TOKENIZER_COMMENT: {
- emitComment(0);
+ emitComment(0, 0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_COMMENT_END: {
- emitComment(2);
+ emitComment(2, 0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_COMMENT_END_DASH:
case NS_HTML5TOKENIZER_COMMENT_START_DASH: {
- emitComment(1);
+ emitComment(1, 0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_DOCTYPE:
case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME: {
forceQuirks = PR_TRUE;
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_DOCTYPE_NAME: {
- doctypeName = strBufToDoctypeName();
+ strBufToDoctypeName();
forceQuirks = PR_TRUE;
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_DOCTYPE_UBLIC:
@@ -2646,7 +2968,7 @@ nsHtml5Tokenizer::eof()
case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
forceQuirks = PR_TRUE;
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
@@ -2654,14 +2976,14 @@ nsHtml5Tokenizer::eof()
forceQuirks = PR_TRUE;
publicIdentifier = longStrBufToString();
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
forceQuirks = PR_TRUE;
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
@@ -2669,17 +2991,17 @@ nsHtml5Tokenizer::eof()
forceQuirks = PR_TRUE;
systemIdentifier = longStrBufToString();
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
forceQuirks = PR_TRUE;
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_BOGUS_DOCTYPE: {
- emitDoctypeToken();
+ emitDoctypeToken(0);
goto eofloop_end;
}
case NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE: {
@@ -2800,8 +3122,9 @@ nsHtml5Tokenizer::eof()
}
void
-nsHtml5Tokenizer::emitDoctypeToken()
+nsHtml5Tokenizer::emitDoctypeToken(PRInt32 pos)
{
+ cstart = pos + 1;
tokenHandler->doctype(doctypeName, publicIdentifier, systemIdentifier, forceQuirks);
nsHtml5Portability::releaseLocal(doctypeName);
nsHtml5Portability::releaseString(publicIdentifier);
@@ -2869,25 +3192,25 @@ nsHtml5Tokenizer::becomeConfident()
PRBool
nsHtml5Tokenizer::isNextCharOnNewLine()
{
- return nextCharOnNewLine;
+ return PR_FALSE;
}
PRBool
nsHtml5Tokenizer::isPrevCR()
{
- return prev == '\r';
+ return lastCR;
}
PRInt32
nsHtml5Tokenizer::getLine()
{
- return line;
+ return -1;
}
PRInt32
nsHtml5Tokenizer::getCol()
{
- return col;
+ return -1;
}
PRBool
@@ -2896,6 +3219,12 @@ nsHtml5Tokenizer::isInDataState()
return (stateSave == NS_HTML5TOKENIZER_DATA);
}
+void
+nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5Parser* encodingDeclarationHandler)
+{
+ this->encodingDeclarationHandler = encodingDeclarationHandler;
+}
+
void
nsHtml5Tokenizer::initializeStatics()
{
diff --git a/content/html/parser/src/nsHtml5Tokenizer.h b/content/html/parser/src/nsHtml5Tokenizer.h
index 31d0a7bc2176..e5b0e5750699 100644
--- a/content/html/parser/src/nsHtml5Tokenizer.h
+++ b/content/html/parser/src/nsHtml5Tokenizer.h
@@ -110,17 +110,16 @@ class nsHtml5Tokenizer
static PRUnichar NOFRAMES_ARR_DATA[];
#endif
static jArray NOFRAMES_ARR;
+ protected:
nsHtml5TreeBuilder* tokenHandler;
nsHtml5Parser* encodingDeclarationHandler;
- PRUnichar prev;
- PRInt32 line;
- PRInt32 linePrev;
- PRInt32 col;
- PRInt32 colPrev;
- PRBool nextCharOnNewLine;
+ PRBool lastCR;
PRInt32 stateSave;
+ private:
PRInt32 returnStateSave;
+ protected:
PRInt32 index;
+ private:
PRBool forceQuirks;
PRUnichar additional;
PRInt32 entCol;
@@ -129,12 +128,13 @@ class nsHtml5Tokenizer
PRInt32 candidate;
PRInt32 strBufMark;
PRInt32 prevValue;
+ protected:
PRInt32 value;
+ private:
PRBool seenDigits;
- PRInt32 pos;
- PRInt32 endPos;
- PRUnichar* buf;
+ protected:
PRInt32 cstart;
+ private:
nsString* publicId;
nsString* systemId;
jArray strBuf;
@@ -144,20 +144,28 @@ class nsHtml5Tokenizer
nsHtml5HtmlAttributes* attributes;
jArray bmpChar;
jArray astralChar;
- PRBool alreadyWarnedAboutPrivateUseCharacters;
+ protected:
nsHtml5ElementName* contentModelElement;
+ private:
jArray contentModelElementNameAsArray;
+ protected:
PRBool endTag;
+ private:
nsHtml5ElementName* tagName;
+ protected:
nsHtml5AttributeName* attributeName;
+ private:
nsIAtom* doctypeName;
nsString* publicIdentifier;
nsString* systemIdentifier;
PRInt32 mappingLangToXmlLang;
PRBool shouldSuspend;
+ protected:
PRBool confident;
+ private:
+ PRInt32 line;
public:
- nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, nsHtml5Parser* encodingDeclarationHandler);
+ nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler);
void initLocation(nsString* newPublicId, nsString* newSystemId);
~nsHtml5Tokenizer();
void setContentModelFlag(PRInt32 contentModelFlag, nsIAtom* contentModelElement);
@@ -165,25 +173,22 @@ class nsHtml5Tokenizer
private:
void contentModelElementToArray();
public:
- nsString* getPublicId();
- nsString* getSystemId();
PRInt32 getLineNumber();
- PRInt32 getColumnNumber();
nsHtml5HtmlAttributes* emptyAttributes();
private:
- void detachStrBuf();
- void detachLongStrBuf();
void clearStrBufAndAppendCurrentC(PRUnichar c);
void clearStrBufAndAppendForceWrite(PRUnichar c);
void clearStrBufForNextState();
void appendStrBuf(PRUnichar c);
void appendStrBufForceWrite(PRUnichar c);
+ protected:
nsString* strBufToString();
- nsIAtom* strBufToDoctypeName();
+ private:
+ void strBufToDoctypeName();
void emitStrBuf();
void clearLongStrBufForNextState();
void clearLongStrBuf();
- void clearLongStrBufAndAppendCurrentC();
+ void clearLongStrBufAndAppendCurrentC(PRUnichar c);
void clearLongStrBufAndAppendToComment(PRUnichar c);
void appendLongStrBuf(PRUnichar c);
void appendSecondHyphenToBogusComment();
@@ -192,23 +197,71 @@ class nsHtml5Tokenizer
void appendLongStrBuf(jArray arr);
void appendStrBufToLongStrBuf();
nsString* longStrBufToString();
- void emitComment(PRInt32 provisionalHyphens);
- PRBool isPrivateUse(PRUnichar c);
- PRBool isAstralPrivateUse(PRInt32 c);
- PRBool isNonCharacter(PRInt32 c);
- void flushChars();
+ void emitComment(PRInt32 provisionalHyphens, PRInt32 pos);
+ protected:
+ void flushChars(PRUnichar* buf, PRInt32 pos);
+ private:
void resetAttributes();
- nsHtml5ElementName* strBufToElementNameString();
- PRInt32 emitCurrentTagToken(PRBool selfClosing);
+ void strBufToElementNameString();
+ PRInt32 emitCurrentTagToken(PRBool selfClosing, PRInt32 pos);
void attributeNameComplete();
void addAttributeWithoutValue();
void addAttributeWithValue();
+ protected:
+ void startErrorReporting();
public:
void start();
PRBool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
private:
- void stateLoop(PRInt32 state, PRUnichar c, PRBool reconsume, PRInt32 returnState);
- void rememberAmpersandLocation();
+ PRInt32 stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos);
+ inline void initDoctypeFields()
+ {
+ doctypeName = nsHtml5Atoms::emptystring;
+ systemIdentifier = nsnull;
+ publicIdentifier = nsnull;
+ forceQuirks = PR_FALSE;
+ }
+
+ inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
+ {
+ silentCarriageReturn();
+ adjustDoubleHyphenAndAppendToLongStrBuf('\n');
+ }
+
+ inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
+ {
+ silentLineFeed();
+ adjustDoubleHyphenAndAppendToLongStrBuf('\n');
+ }
+
+ inline void appendLongStrBufLineFeed()
+ {
+ silentLineFeed();
+ appendLongStrBuf('\n');
+ }
+
+ inline void appendLongStrBufCarriageReturn()
+ {
+ silentCarriageReturn();
+ appendLongStrBuf('\n');
+ }
+
+ protected:
+ inline void silentCarriageReturn()
+ {
+ ++line;
+ lastCR = PR_TRUE;
+ }
+
+ inline void silentLineFeed()
+ {
+ ++line;
+ }
+
+ private:
+ void emitCarriageReturn(PRUnichar* buf, PRInt32 pos);
+ void emitReplacementCharacter(PRUnichar* buf, PRInt32 pos);
+ void rememberAmpersandLocation(PRUnichar add);
void bogusDoctype();
void bogusDoctypeWithoutQuirks();
void emitOrAppendStrBuf(PRInt32 returnState);
@@ -216,45 +269,11 @@ class nsHtml5Tokenizer
public:
void eof();
private:
- void emitDoctypeToken();
- inline PRUnichar read()
+ void emitDoctypeToken(PRInt32 pos);
+ protected:
+ inline PRUnichar checkChar(PRUnichar* buf, PRInt32 pos)
{
- PRUnichar c;
- pos++;
- if (pos == endPos) {
- return '\0';
- }
- linePrev = line;
- colPrev = col;
- if (nextCharOnNewLine) {
- line++;
- col = 1;
- nextCharOnNewLine = PR_FALSE;
- } else {
- col++;
- }
- c = buf[pos];
- switch(c) {
- case '\r': {
- nextCharOnNewLine = PR_TRUE;
- buf[pos] = '\n';
- prev = '\r';
- return '\n';
- }
- case '\n': {
- if (prev == '\r') {
- return '\0';
- }
- nextCharOnNewLine = PR_TRUE;
- break;
- }
- case '\0': {
- c = buf[pos] = 0xfffd;
- break;
- }
- }
- prev = c;
- return c;
+ return buf[pos];
}
public:
@@ -271,6 +290,7 @@ class nsHtml5Tokenizer
PRInt32 getLine();
PRInt32 getCol();
PRBool isInDataState();
+ void setEncodingDeclarationHandler(nsHtml5Parser* encodingDeclarationHandler);
static void initializeStatics();
static void releaseStatics();
};
@@ -366,7 +386,6 @@ jArray nsHtml5Tokenizer::NOFRAMES_ARR = J_ARRAY_STATIC(PRUnic
#define NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN 56
#define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 57
#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
-#define NS_HTML5TOKENIZER_SURROGATE_OFFSET (0x10000 - (0xD800 << 10) - 0xDC00)
#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
diff --git a/content/html/parser/src/nsHtml5TreeBuilder.cpp b/content/html/parser/src/nsHtml5TreeBuilder.cpp
index 7c337fa2e24c..4ac191c31a22 100644
--- a/content/html/parser/src/nsHtml5TreeBuilder.cpp
+++ b/content/html/parser/src/nsHtml5TreeBuilder.cpp
@@ -2449,7 +2449,9 @@ nsHtml5TreeBuilder::endTag(nsHtml5ElementName* elementName)
case NS_HTML5TREE_BUILDER_BEFORE_HEAD: {
switch(group) {
case NS_HTML5TREE_BUILDER_HEAD:
- case NS_HTML5TREE_BUILDER_BR: {
+ case NS_HTML5TREE_BUILDER_BR:
+ case NS_HTML5TREE_BUILDER_HTML:
+ case NS_HTML5TREE_BUILDER_BODY: {
appendToCurrentNodeAndPushHeadElement(nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES);
mode = NS_HTML5TREE_BUILDER_IN_HEAD;
continue;
@@ -2467,7 +2469,9 @@ nsHtml5TreeBuilder::endTag(nsHtml5ElementName* elementName)
mode = NS_HTML5TREE_BUILDER_AFTER_HEAD;
goto endtagloop_end;
}
- case NS_HTML5TREE_BUILDER_BR: {
+ case NS_HTML5TREE_BUILDER_BR:
+ case NS_HTML5TREE_BUILDER_HTML:
+ case NS_HTML5TREE_BUILDER_BODY: {
pop();
mode = NS_HTML5TREE_BUILDER_AFTER_HEAD;
continue;
@@ -2499,6 +2503,8 @@ nsHtml5TreeBuilder::endTag(nsHtml5ElementName* elementName)
}
case NS_HTML5TREE_BUILDER_AFTER_HEAD: {
switch(group) {
+ case NS_HTML5TREE_BUILDER_HTML:
+ case NS_HTML5TREE_BUILDER_BODY:
case NS_HTML5TREE_BUILDER_BR: {
appendToCurrentNodeAndPushBodyElement();
mode = NS_HTML5TREE_BUILDER_FRAMESET_OK;