Bug 503632 - Script containing <!-- in a string never ends up closed in the HTML5 parser. rs=sicking.

--HG--
extra : rebase_source : 688505ad32f1e88d07ee31bf62f302c131d7f2bb
This commit is contained in:
Henri Sivonen 2009-11-30 17:34:51 +02:00
parent 888d6846dc
commit 5d28b83605
2 changed files with 334 additions and 42 deletions

View File

@ -447,11 +447,18 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
case NS_HTML5TOKENIZER_PLAINTEXT:
case NS_HTML5TOKENIZER_RAWTEXT:
case NS_HTML5TOKENIZER_CDATA_SECTION:
case NS_HTML5TOKENIZER_ESCAPE:
case NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION:
case NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION_HYPHEN:
case NS_HTML5TOKENIZER_ESCAPE_HYPHEN:
case NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN: {
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED:
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START:
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH:
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH:
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH:
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN:
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START:
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED:
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN:
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END: {
cstart = start;
break;
}
@ -2699,7 +2706,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
case '!': {
tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
cstart = pos;
state = NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START;
goto scriptdatalessthansignloop_end;
}
default: {
@ -2713,7 +2720,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
}
scriptdatalessthansignloop_end: ;
}
case NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION: {
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
@ -2721,19 +2728,19 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
c = checkChar(buf, pos);
switch(c) {
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION_HYPHEN;
goto escapeexclamationloop_end;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH;
goto scriptdataescapestartloop_end;
}
default: {
state = returnState;
state = NS_HTML5TOKENIZER_SCRIPT_DATA;
reconsume = PR_TRUE;
goto stateloop;
}
}
}
escapeexclamationloop_end: ;
scriptdataescapestartloop_end: ;
}
case NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION_HYPHEN: {
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
@ -2741,19 +2748,19 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
c = checkChar(buf, pos);
switch(c) {
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN;
goto escapeexclamationhyphenloop_end;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH;
goto scriptdataescapestartdashloop_end;
}
default: {
state = returnState;
state = NS_HTML5TOKENIZER_SCRIPT_DATA;
reconsume = PR_TRUE;
goto stateloop;
}
}
}
escapeexclamationhyphenloop_end: ;
scriptdataescapestartdashloop_end: ;
}
case NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN: {
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
@ -2763,41 +2770,55 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
case '-': {
continue;
}
case '<': {
flushChars(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN;
goto stateloop;
}
case '>': {
state = returnState;
state = NS_HTML5TOKENIZER_SCRIPT_DATA;
goto stateloop;
}
case '\0': {
emitReplacementCharacter(buf, pos);
state = NS_HTML5TOKENIZER_ESCAPE;
goto escapehyphenhyphenloop_end;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto scriptdataescapeddashdashloop_end;
}
case '\r': {
emitCarriageReturn(buf, pos);
state = NS_HTML5TOKENIZER_ESCAPE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop_end;
}
case '\n': {
silentLineFeed();
}
default: {
state = NS_HTML5TOKENIZER_ESCAPE;
goto escapehyphenhyphenloop_end;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto scriptdataescapeddashdashloop_end;
}
}
}
escapehyphenhyphenloop_end: ;
scriptdataescapeddashdashloop_end: ;
}
case NS_HTML5TOKENIZER_ESCAPE: {
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
if (reconsume) {
reconsume = PR_FALSE;
} else {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
}
c = checkChar(buf, pos);
switch(c) {
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_HYPHEN;
goto escapeloop_end;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH;
goto scriptdataescapedloop_end;
}
case '<': {
flushChars(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN;
goto stateloop;
}
case '\0': {
emitReplacementCharacter(buf, pos);
@ -2815,9 +2836,9 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
}
}
}
escapeloop_end: ;
scriptdataescapedloop_end: ;
}
case NS_HTML5TOKENIZER_ESCAPE_HYPHEN: {
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
@ -2825,24 +2846,288 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
c = checkChar(buf, pos);
switch(c) {
case '-': {
state = NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH;
goto stateloop;
}
case '<': {
flushChars(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN;
goto scriptdataescapeddashloop_end;
}
case '\0': {
emitReplacementCharacter(buf, pos);
state = NS_HTML5TOKENIZER_ESCAPE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop;
}
case '\r': {
emitCarriageReturn(buf, pos);
state = NS_HTML5TOKENIZER_ESCAPE;
goto stateloop;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop_end;
}
case '\n': {
silentLineFeed();
}
default: {
state = NS_HTML5TOKENIZER_ESCAPE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop;
}
}
}
scriptdataescapeddashloop_end: ;
}
case NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
switch(c) {
case '/': {
index = 0;
clearStrBufForNextState();
returnState = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
state = NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME;
goto stateloop;
}
case 'S':
case 's': {
tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
cstart = pos;
index = 1;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START;
goto scriptdataescapedlessthanloop_end;
}
default: {
tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
cstart = pos;
reconsume = PR_TRUE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop;
}
}
}
scriptdataescapedlessthanloop_end: ;
}
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
if (index < 6) {
PRUnichar folded = c;
if (c >= 'A' && c <= 'Z') {
folded += 0x20;
}
if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
reconsume = PR_TRUE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop;
}
index++;
continue;
}
switch(c) {
case '\r': {
emitCarriageReturn(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop_end;
}
case '\n': {
silentLineFeed();
}
case ' ':
case '\t':
case '\f':
case '/':
case '>': {
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto scriptdatadoubleescapestartloop_end;
}
default: {
reconsume = PR_TRUE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop;
}
}
}
scriptdatadoubleescapestartloop_end: ;
}
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED: {
for (; ; ) {
if (reconsume) {
reconsume = PR_FALSE;
} else {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
}
switch(c) {
case '-': {
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
goto scriptdatadoubleescapedloop_end;
}
case '<': {
flushChars(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN;
goto stateloop;
}
case '\0': {
emitReplacementCharacter(buf, pos);
continue;
}
case '\r': {
emitCarriageReturn(buf, pos);
goto stateloop_end;
}
case '\n': {
silentLineFeed();
}
default: {
continue;
}
}
}
scriptdatadoubleescapedloop_end: ;
}
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
switch(c) {
case '-': {
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
goto scriptdatadoubleescapeddashloop_end;
}
case '<': {
flushChars(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN;
goto stateloop;
}
case '\0': {
emitReplacementCharacter(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop;
}
case '\r': {
emitCarriageReturn(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop_end;
}
case '\n': {
silentLineFeed();
}
default: {
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop;
}
}
}
scriptdatadoubleescapeddashloop_end: ;
}
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
switch(c) {
case '-': {
continue;
}
case '<': {
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN;
goto scriptdatadoubleescapeddashdashloop_end;
}
case '>': {
state = NS_HTML5TOKENIZER_SCRIPT_DATA;
goto stateloop;
}
case '\0': {
emitReplacementCharacter(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop;
}
case '\r': {
emitCarriageReturn(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop_end;
}
case '\n': {
silentLineFeed();
}
default: {
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop;
}
}
}
scriptdatadoubleescapeddashdashloop_end: ;
}
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
switch(c) {
case '/': {
index = 0;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END;
goto scriptdatadoubleescapedlessthanloop_end;
}
default: {
reconsume = PR_TRUE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop;
}
}
}
scriptdatadoubleescapedlessthanloop_end: ;
}
case NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END: {
for (; ; ) {
if (++pos == endPos) {
goto stateloop_end;
}
c = checkChar(buf, pos);
if (index < 6) {
PRUnichar folded = c;
if (c >= 'A' && c <= 'Z') {
folded += 0x20;
}
if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
reconsume = PR_TRUE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop;
}
index++;
continue;
}
switch(c) {
case '\r': {
emitCarriageReturn(buf, pos);
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop_end;
}
case '\n': {
silentLineFeed();
}
case ' ':
case '\t':
case '\f':
case '/':
case '>': {
state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
goto stateloop;
}
default: {
reconsume = PR_TRUE;
state = NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED;
goto stateloop;
}
}

View File

@ -347,17 +347,24 @@ jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOFRAMES_ARR = 0;
#define NS_HTML5TOKENIZER_CDATA_RSQB 51
#define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 52
#define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN_STATE 53
#define NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION 54
#define NS_HTML5TOKENIZER_ESCAPE_EXCLAMATION_HYPHEN 55
#define NS_HTML5TOKENIZER_ESCAPE 56
#define NS_HTML5TOKENIZER_ESCAPE_HYPHEN 57
#define NS_HTML5TOKENIZER_ESCAPE_HYPHEN_HYPHEN 58
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 54
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 55
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 56
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 57
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 58
#define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 59
#define NS_HTML5TOKENIZER_RAWTEXT 60
#define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN_STATE 61
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 62
#define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 63
#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 64
#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN 65
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 66
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 67
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN 68
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 69
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 70
#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 71
#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024