Bug 348447. feed sanitizer whitelist accessibility and i18n.

This commit is contained in:
sayrer%gmail.com 2006-11-08 18:20:55 +00:00
parent e71bbe8d57
commit 8372e94e34
7 changed files with 223 additions and 12 deletions

View File

@ -127,7 +127,11 @@ protected:
nsCString mRef; // ScrollTo #ref
};
//
// these two lists are used by the sanitizing fragment serializers
// Thanks to Mark Pilgrim and Sam Ruby for the initial whitelist
//
static nsIAtom** const kDefaultAllowedTags [] = {
&nsHTMLAtoms::a,
&nsHTMLAtoms::abbr,
@ -135,6 +139,7 @@ static nsIAtom** const kDefaultAllowedTags [] = {
&nsHTMLAtoms::address,
&nsHTMLAtoms::area,
&nsHTMLAtoms::b,
&nsHTMLAtoms::bdo,
&nsHTMLAtoms::big,
&nsHTMLAtoms::blockquote,
&nsHTMLAtoms::br,
@ -171,8 +176,10 @@ static nsIAtom** const kDefaultAllowedTags [] = {
&nsHTMLAtoms::label,
&nsHTMLAtoms::legend,
&nsHTMLAtoms::li,
&nsHTMLAtoms::listing,
&nsHTMLAtoms::map,
&nsHTMLAtoms::menu,
&nsHTMLAtoms::nobr,
&nsHTMLAtoms::ol,
&nsHTMLAtoms::optgroup,
&nsHTMLAtoms::option,
@ -198,17 +205,22 @@ static nsIAtom** const kDefaultAllowedTags [] = {
&nsHTMLAtoms::tr,
&nsHTMLAtoms::tt,
&nsHTMLAtoms::u,
&nsHTMLAtoms::ul
&nsHTMLAtoms::ul,
&nsHTMLAtoms::var
};
static nsIAtom** const kDefaultAllowedAttributes [] = {
&nsHTMLAtoms::abbr,
&nsHTMLAtoms::accept,
&nsHTMLAtoms::acceptcharset,
&nsHTMLAtoms::accesskey,
&nsHTMLAtoms::action,
&nsHTMLAtoms::align,
&nsHTMLAtoms::alt,
&nsHTMLAtoms::autocomplete,
&nsHTMLAtoms::axis,
&nsHTMLAtoms::background,
&nsHTMLAtoms::bgcolor,
&nsHTMLAtoms::border,
&nsHTMLAtoms::cellpadding,
&nsHTMLAtoms::cellspacing,
@ -248,10 +260,12 @@ static nsIAtom** const kDefaultAllowedAttributes [] = {
&nsHTMLAtoms::nohref,
&nsHTMLAtoms::noshade,
&nsHTMLAtoms::nowrap,
&nsHTMLAtoms::pointSize,
&nsHTMLAtoms::prompt,
&nsHTMLAtoms::readonly,
&nsHTMLAtoms::rel,
&nsHTMLAtoms::rev,
&nsHTMLAtoms::role,
&nsHTMLAtoms::rows,
&nsHTMLAtoms::rowspan,
&nsHTMLAtoms::rules,
@ -283,6 +297,7 @@ PRBool IsAttrURI(nsIAtom *aName)
aName == nsHTMLAtoms::src ||
aName == nsHTMLAtoms::longdesc ||
aName == nsHTMLAtoms::usemap ||
aName == nsHTMLAtoms::cite);
aName == nsHTMLAtoms::cite ||
aName == nsHTMLAtoms::background);
}
#endif // _nsContentSink_h_

View File

@ -440,6 +440,7 @@ GK_ATOM(listcols, "listcols")
GK_ATOM(listener, "listener")
GK_ATOM(listhead, "listhead")
GK_ATOM(listheader, "listheader")
GK_ATOM(listing, "listing")
GK_ATOM(listitem, "listitem")
GK_ATOM(listrows, "listrows")
GK_ATOM(load, "load")
@ -500,6 +501,7 @@ GK_ATOM(never, "never")
GK_ATOM(_new, "new")
GK_ATOM(nextBidi, "NextBidi")
GK_ATOM(no, "no")
GK_ATOM(nobr, "nobr")
GK_ATOM(node, "node")
GK_ATOM(noembed, "noembed")
GK_ATOM(noframes, "noframes")
@ -677,6 +679,7 @@ GK_ATOM(reverse, "reverse")
GK_ATOM(right, "right")
GK_ATOM(rightmargin, "rightmargin")
GK_ATOM(rightpadding, "rightpadding")
GK_ATOM(role, "role")
GK_ATOM(round, "round")
GK_ATOM(row, "row")
GK_ATOM(rows, "rows")

View File

@ -663,9 +663,12 @@ nsXHTMLParanoidFragmentSink::HandleStartElement(const PRUnichar *aName,
NS_ENSURE_SUCCESS(rv, rv);
name = nodeInfo->NameAtom();
// Add if it's xmlns, xml:, or on the HTML whitelist
// Add if it's xmlns, xml:, aaa:, xhtml2:role, or on the HTML whitelist
if (nameSpaceID == kNameSpaceID_XMLNS ||
nameSpaceID == kNameSpaceID_XML ||
nameSpaceID == kNameSpaceID_WAIProperties ||
(nameSpaceID == kNameSpaceID_XHTML2_Unofficial &&
name == nsHTMLAtoms::role) ||
sAllowedAttributes && sAllowedAttributes->GetEntry(name)) {
allowedAttrs.AppendElement(aAtts[i]);
allowedAttrs.AppendElement(aAtts[i + 1]);

View File

@ -56,6 +56,7 @@ var gUnescapeHTML = Cc[UNESCAPE_CONTRACTID].
const XMLNS = "http://www.w3.org/XML/1998/namespace";
const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/";
const WAIROLE_NS = "http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#";
/***** Some general utils *****/
function strToURI(link, base) {
@ -284,6 +285,16 @@ var gNamespaces = {
"http://www.w3.org/XML/1998/namespace":"xml"
}
// We allow a very small set of namespaces in XHTML content,
// for attributes only
var gAllowedXHTMLNamespaces = {
"http://www.w3.org/XML/1998/namespace":"xml",
"http://www.w3.org/TR/xhtml2":"xhtml2",
"http://www.w3.org/2005/07/aaa":"aaa",
// if someone ns qualifies XHTML, we have to prefix it to avoid an
// attribute collision.
"http://www.w3.org/1999/xhtml":"xhtml"
}
function FeedResult() {}
FeedResult.prototype = {
@ -505,6 +516,7 @@ TextConstruct.prototype = {
docFragment.appendChild(node);
return docFragment;
}
LOG("entry text: " + this.text + "\n");
var isXML;
if (this.type == "xhtml")
isXML = true
@ -774,22 +786,39 @@ function dateParse(dateString) {
const XHTML_NS = "http://www.w3.org/1999/xhtml";
// The XHTMLHandler handles inline XHTML found in things like atom:summary
function XHTMLHandler(processor, isAtom) {
function XHTMLHandler(processor, isAtom, waiPrefixes) {
this._buf = "";
this._processor = processor;
this._depth = 0;
this._isAtom = isAtom;
// a stack of lists tracking in-scope namespaces
this._inScopeNS = [];
this._waiPrefixes = waiPrefixes;
}
// The fidelity can be improved here, to allow handling of stuff like
// SVG and MathML. XXX
XHTMLHandler.prototype = {
// look back up at the declared namespaces
// we always use the same prefixes for our safe stuff
_isInScope: function XH__isInScope(ns) {
for (var i in this._inScopeNS) {
for (var uri in this._inScopeNS[i]) {
if (this._inScopeNS[i][uri] == ns)
return true;
}
}
return false;
},
startDocument: function XH_startDocument() {
},
endDocument: function XH_endDocument() {
},
startElement: function XH_startElement(uri, localName, qName, attributes) {
++this._depth;
this._inScopeNS.push([]);
// RFC4287 requires XHTML to be wrapped in a div that is *not* part of
// the content. This prevents people from screwing up namespaces, but
@ -800,11 +829,65 @@ XHTMLHandler.prototype = {
// If it's an XHTML element, record it. Otherwise, it's ignored.
if (uri == XHTML_NS) {
this._buf += "<" + localName;
var uri;
for (var i=0; i < attributes.length; ++i) {
uri = attributes.getURI(i);
// XHTML attributes aren't in a namespace
if (attributes.getURI(i) == "") {
if (uri == "") {
this._buf += (" " + attributes.getLocalName(i) + "='" +
xmlEscape(attributes.getValue(i)) + "'");
} else {
// write a small set of allowed attribute namespaces
var prefix = gAllowedXHTMLNamespaces[uri];
if (prefix != null) {
// The attribute value we'll attempt to write
var attributeValue = xmlEscape(attributes.getValue(i));
// More QName abuse from W3C
var rolePrefix = "";
if (attributes.getLocalName(i) == "role") {
for (var aPrefix in this._waiPrefixes) {
if (attributeValue.indexOf(aPrefix + ":") == 0) {
// Now, due to the terrible layer mismatch
// that is QNames in content, we have to see
// if the attribute value clashes with our
// namespace declarations.
var isCollision = false;
for (var uriKey in gAllowedXHTMLNamespaces) {
if (gAllowedXHTMLNamespaces[uriKey] == aPrefix)
isCollision = true;
}
if (isCollision) {
rolePrefix = aPrefix + i;
attributeValue =
rolePrefix + ":" +
attributeValue.substring(aPrefix.length + 1);
} else {
rolePrefix = aPrefix;
}
break;
}
}
if (rolePrefix)
this._buf += (" xmlns:" + rolePrefix +
"='" + WAIROLE_NS + "'");
}
// it's an allowed attribute NS.
// write the attribute
this._buf += (" " + prefix + ":" +
attributes.getLocalName(i) +
"='" + attributeValue + "'");
// write an xmlns declaration if necessary
if (prefix != "xml" && !this._isInScope(uri)) {
this._inScopeNS[this._inScopeNS.length - 1].push(uri);
this._buf += " xmlns:" + prefix + "='" + uri + "'";
}
}
}
}
this._buf += ">";
@ -812,7 +895,8 @@ XHTMLHandler.prototype = {
},
endElement: function XH_endElement(uri, localName, qName) {
--this._depth;
this._inScopeNS.pop();
// We need to skip outer divs in Atom. See comment in startElement.
if (this._isAtom && this._depth == 0 && localName == "div")
return;
@ -831,9 +915,13 @@ XHTMLHandler.prototype = {
characters: function XH_characters(data) {
this._buf += xmlEscape(data);
},
startPrefixMapping: function XH_startPrefixMapping() {
startPrefixMapping: function XH_startPrefixMapping(prefix, uri) {
if (prefix && uri == WAIROLE_NS)
this._waiPrefixes[prefix] = WAIROLE_NS;
},
endPrefixMapping: function XH_endPrefixMapping() {
endPrefixMapping: function FP_endPrefixMapping(prefix) {
if (prefix)
delete this._waiPrefixes[prefix];
},
processingInstruction: function XH_processingInstruction() {
},
@ -945,6 +1033,9 @@ function FeedProcessor() {
this._result = null;
this._extensionHandler = null;
this._xhtmlHandler = null;
// http://www.w3.org/WAI/PF/GUI/ uses QNames in content :(
this._waiPrefixes = {};
// The nsIFeedResultListener waiting for the parse results
this.listener = null;
@ -1282,7 +1373,8 @@ FeedProcessor.prototype = {
var type = attributes.getValueFromName("","type");
if (type != null && type.indexOf("xhtml") >= 0) {
this._xhtmlHandler =
new XHTMLHandler(this, (this._result.version == "atom"));
new XHTMLHandler(this, (this._result.version == "atom"),
this._waiPrefixes);
this._reader.contentHandler = this._xhtmlHandler;
return;
}
@ -1354,14 +1446,22 @@ FeedProcessor.prototype = {
characters: function FP_characters(data) {
this._buf += data;
},
// TODO: It would be nice to check new prefixes here, and if they
// don't conflict with the ones we've defined, throw them in a
// dictionary to check.
startPrefixMapping: function FP_startPrefixMapping() {
startPrefixMapping: function FP_startPrefixMapping(prefix, uri) {
// Thanks for QNames in content, W3C
// This will even be a perf hit for every single feed
// http://www.w3.org/WAI/PF/GUI/
if (prefix && uri == WAIROLE_NS)
this._waiPrefixes[prefix] = WAIROLE_NS;
},
endPrefixMapping: function FP_endPrefixMapping() {
endPrefixMapping: function FP_endPrefixMapping(prefix) {
if (prefix)
delete this._waiPrefixes[prefix];
},
processingInstruction: function FP_processingInstruction(target, data) {
if (target == "xml-stylesheet") {
var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/);

View File

@ -69,6 +69,7 @@ TestListener.prototype = {
}
ran += 1;
result = null;
}
}

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<!--
Description: atom entry with many funky namespaces
Expect: var content = feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).content;((content.text.indexOf("h2 aaa:checked") > -1) && (content.text.indexOf("h4 aaa:checked") > -1) && (content.text.indexOf("h6 xml:base") > -1));
-->
<feed xmlns="http://www.w3.org/2005/Atom"
xmlns:p="http://www.w3.org/2005/07/aaa"
xmlns:r="http://www.w3.org/TR/xhtml2">
<id>tag:example.com,2006:/atom/conformance/linktest/</id>
<title>Atom Link Tests</title>
<updated>2005-06-18T6:23:00Z</updated>
<link href="http://www.example.org" />
<entry xml:base="http://www.example.org">
<id>tag:example.org,2006:/linkreltest/1</id>
<title>Does your reader support xml:base properly? </title>
<updated>2006-06-23T12:12:12Z</updated>
<link href="foo"/>
<content type="xhtml">
<div
xmlns:aaa="http://www.w3.org/2005/07/aaa"
xmlns="http://www.w3.org/1999/xhtml">
<p xmlns:foo="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#"
p:checked="true"
aaa:iconed="true" p:disabled="true">
<b r:role="asdf" aaa:iconed="true" p:disabled="true">hmm</b>
<i foo:role="asdf">hmm</i>
<h4 aaa:checked="true"></h4>
</p>
<p xmlns:foo="http://www.w3.org/2005/07/aaa">
<h2 foo:checked="true">hmm</h2>
<h6 xml:base="http://www.google.com">hmm</h6>
</p>
</div>
</content>
</entry>
</feed>

View File

@ -0,0 +1,49 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<!--
Description: atom entry with many funky namespaces
Expect: var content = feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).content; ((content.text.indexOf("xhtml2:role='wwwwwww") > -1) && (content.text.indexOf("xmlns:wwwwwww='http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#'") > -1) && (content.text.indexOf("xmlns:xhtml20='http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#'") > -1));
-->
<feed xmlns="http://www.w3.org/2005/Atom"
xmlns:p="http://www.w3.org/2005/07/aaa"
xmlns:r="http://www.w3.org/TR/xhtml2"
xmlns:wwwwwww="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#">
<id>tag:example.com,2006:/atom/conformance/linktest/</id>
<title>Atom Link Tests</title>
<updated>2005-06-18T6:23:00Z</updated>
<link href="http://www.example.org" />
<entry xml:base="http://www.example.org">
<id>tag:example.org,2006:/linkreltest/1</id>
<title>Does your reader support xml:base properly? </title>
<updated>2006-06-23T12:12:12Z</updated>
<link href="foo"/>
<content type="xhtml">
<div
xmlns:aaa="http://www.w3.org/2005/07/aaa"
xmlns="http://www.w3.org/1999/xhtml">
<p xmlns:foo="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#"
p:checked="true"
aaa:iconed="true" p:disabled="true">
<b r:role="asdf" aaa:iconed="true" p:disabled="true">hmm</b>
<i foo:role="asdf">hmm</i>
<h4 aaa:checked="true"></h4>
</p>
<p xmlns:foo="http://www.w3.org/2005/07/aaa">
<h2 foo:checked="true">hmm</h2>
<h6 xml:base="http://www.google.com">hmm</h6>
<h3 r:role="wwwwwww:checkboxtristate">hmm</h3>
<!-- Really abusive namespace clash -->
<!-- Don't mess with this test case -->
<div xmlns:xhtml2="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#">
<p r:role="xhtml2:checkboxtristate">hmm</p>
</div>
</p>
</div>
</content>
</entry>
</feed>