Bug 563526 - Ignore U+0000 in element content when the tree builder is not in the "text" mode or the "in foreign" mode. r=jonas.

--HG--
extra : rebase_source : 97a67dc820c9b8ae1265b0c2c7e39c69f1733cee
This commit is contained in:
Henri Sivonen 2010-06-09 09:45:32 +03:00
parent 00f26a48f7
commit 788d95420b
5 changed files with 36 additions and 3 deletions

View File

@ -5860,7 +5860,7 @@ public class Tokenizer implements Locator {
private void emitReplacementCharacter(@NoLength char[] buf, int pos)
throws SAXException {
flushChars(buf, pos);
tokenHandler.characters(Tokenizer.REPLACEMENT_CHARACTER, 0, 1);
tokenHandler.zeroOriginatingReplacementCharacter();
cstart = pos + 1;
}

View File

@ -60,6 +60,11 @@ import org.xml.sax.SAXParseException;
public abstract class TreeBuilder<T> implements TokenHandler,
TreeBuilderState<T> {
/**
* Array version of U+FFFD.
*/
private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
// Start dispatch groups
final static int OTHER = 0;
@ -839,8 +844,11 @@ public abstract class TreeBuilder<T> implements TokenHandler,
needToDropLF = false;
}
if (inForeign) {
accumulateCharacters(buf, start, length);
return;
}
// optimize the most common case
// XXX should there be an IN FOREIGN check here?
switch (mode) {
case IN_BODY:
case IN_CELL:
@ -1206,6 +1214,16 @@ public abstract class TreeBuilder<T> implements TokenHandler,
}
}
/**
* @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter()
*/
@Override public void zeroOriginatingReplacementCharacter()
throws SAXException {
if (inForeign || mode == TEXT) {
characters(REPLACEMENT_CHARACTER, 0, 1);
}
}
public final void eof() throws SAXException {
flushCharacters();
if (inForeign) {

View File

@ -3339,7 +3339,7 @@ void
nsHtml5Tokenizer::emitReplacementCharacter(PRUnichar* buf, PRInt32 pos)
{
flushChars(buf, pos);
tokenHandler->characters(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, 0, 1);
tokenHandler->zeroOriginatingReplacementCharacter();
cstart = pos + 1;
}

View File

@ -190,6 +190,10 @@ nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 leng
}
needToDropLF = PR_FALSE;
}
if (inForeign) {
accumulateCharacters(buf, start, length);
return;
}
switch(mode) {
case NS_HTML5TREE_BUILDER_IN_BODY:
case NS_HTML5TREE_BUILDER_IN_CELL:
@ -413,6 +417,14 @@ nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 leng
}
}
void
nsHtml5TreeBuilder::zeroOriginatingReplacementCharacter()
{
if (inForeign || mode == NS_HTML5TREE_BUILDER_TEXT) {
characters(REPLACEMENT_CHARACTER, 0, 1);
}
}
void
nsHtml5TreeBuilder::eof()
{

View File

@ -70,6 +70,7 @@ class nsHtml5Portability;
class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
{
private:
static PRUnichar REPLACEMENT_CHARACTER[];
static jArray<const char*,PRInt32> QUIRKY_PUBLIC_IDS;
PRInt32 mode;
PRInt32 originalMode;
@ -100,6 +101,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
void doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, PRBool forceQuirks);
void comment(PRUnichar* buf, PRInt32 start, PRInt32 length);
void characters(const PRUnichar* buf, PRInt32 start, PRInt32 length);
void zeroOriginatingReplacementCharacter();
void eof();
void endTokenization();
void startTag(nsHtml5ElementName* elementName, nsHtml5HtmlAttributes* attributes, PRBool selfClosing);
@ -239,6 +241,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
};
#ifdef nsHtml5TreeBuilder_cpp__
PRUnichar nsHtml5TreeBuilder::REPLACEMENT_CHARACTER[] = { 0xfffd };
jArray<const char*,PRInt32> nsHtml5TreeBuilder::QUIRKY_PUBLIC_IDS = nsnull;
#endif