Bug 348447. feed sanitizer whitelist accessibility and i18n.

2024-11-26 06:11:37 +00:00 · 2006-11-08 18:20:55 +00:00 · 2006-11-08 18:20:55 +00:00 · 8372e94e34
commit 8372e94e34
parent e71bbe8d57
7 changed files with 223 additions and 12 deletions
--- a/content/base/src/nsContentSink.h
+++ b/content/base/src/nsContentSink.h
@ -127,7 +127,11 @@ protected:
  nsCString mRef; // ScrollTo #ref
 };

+
+//
 // these two lists are used by the sanitizing fragment serializers
+// Thanks to Mark Pilgrim and Sam Ruby for the initial whitelist
+//
 static nsIAtom** const kDefaultAllowedTags [] = {
  &nsHTMLAtoms::a,
  &nsHTMLAtoms::abbr,
@ -135,6 +139,7 @@ static nsIAtom** const kDefaultAllowedTags [] = {
  &nsHTMLAtoms::address,
  &nsHTMLAtoms::area,
  &nsHTMLAtoms::b,
+  &nsHTMLAtoms::bdo,
  &nsHTMLAtoms::big,
  &nsHTMLAtoms::blockquote,
  &nsHTMLAtoms::br,
@ -171,8 +176,10 @@ static nsIAtom** const kDefaultAllowedTags [] = {
  &nsHTMLAtoms::label,
  &nsHTMLAtoms::legend,
  &nsHTMLAtoms::li,
+  &nsHTMLAtoms::listing,
  &nsHTMLAtoms::map,
  &nsHTMLAtoms::menu,
+  &nsHTMLAtoms::nobr,
  &nsHTMLAtoms::ol,
  &nsHTMLAtoms::optgroup,
  &nsHTMLAtoms::option,
@ -198,17 +205,22 @@ static nsIAtom** const kDefaultAllowedTags [] = {
  &nsHTMLAtoms::tr,
  &nsHTMLAtoms::tt,
  &nsHTMLAtoms::u,
-  &nsHTMLAtoms::ul
+  &nsHTMLAtoms::ul,
+  &nsHTMLAtoms::var
 };

 static nsIAtom** const kDefaultAllowedAttributes [] = {
+  &nsHTMLAtoms::abbr,
  &nsHTMLAtoms::accept,
  &nsHTMLAtoms::acceptcharset,
  &nsHTMLAtoms::accesskey,
  &nsHTMLAtoms::action,
  &nsHTMLAtoms::align,
  &nsHTMLAtoms::alt,
+  &nsHTMLAtoms::autocomplete,
  &nsHTMLAtoms::axis,
+  &nsHTMLAtoms::background,
+  &nsHTMLAtoms::bgcolor,
  &nsHTMLAtoms::border,
  &nsHTMLAtoms::cellpadding,
  &nsHTMLAtoms::cellspacing,
@ -248,10 +260,12 @@ static nsIAtom** const kDefaultAllowedAttributes [] = {
  &nsHTMLAtoms::nohref,
  &nsHTMLAtoms::noshade,
  &nsHTMLAtoms::nowrap,
+  &nsHTMLAtoms::pointSize,
  &nsHTMLAtoms::prompt,
  &nsHTMLAtoms::readonly,
  &nsHTMLAtoms::rel,
  &nsHTMLAtoms::rev,
+  &nsHTMLAtoms::role,
  &nsHTMLAtoms::rows,
  &nsHTMLAtoms::rowspan,
  &nsHTMLAtoms::rules,
@ -283,6 +297,7 @@ PRBool IsAttrURI(nsIAtom *aName)
          aName == nsHTMLAtoms::src ||
          aName == nsHTMLAtoms::longdesc ||
          aName == nsHTMLAtoms::usemap ||
-          aName == nsHTMLAtoms::cite);
+          aName == nsHTMLAtoms::cite ||
+          aName == nsHTMLAtoms::background);
 }
 #endif // _nsContentSink_h_
--- a/content/base/src/nsGkAtomList.h
+++ b/content/base/src/nsGkAtomList.h
@ -440,6 +440,7 @@ GK_ATOM(listcols, "listcols")
 GK_ATOM(listener, "listener")
 GK_ATOM(listhead, "listhead")
 GK_ATOM(listheader, "listheader")
+GK_ATOM(listing, "listing")
 GK_ATOM(listitem, "listitem")
 GK_ATOM(listrows, "listrows")
 GK_ATOM(load, "load")
@ -500,6 +501,7 @@ GK_ATOM(never, "never")
 GK_ATOM(_new, "new")
 GK_ATOM(nextBidi, "NextBidi")
 GK_ATOM(no, "no")
+GK_ATOM(nobr, "nobr")
 GK_ATOM(node, "node")
 GK_ATOM(noembed, "noembed")
 GK_ATOM(noframes, "noframes")
@ -677,6 +679,7 @@ GK_ATOM(reverse, "reverse")
 GK_ATOM(right, "right")
 GK_ATOM(rightmargin, "rightmargin")
 GK_ATOM(rightpadding, "rightpadding")
+GK_ATOM(role, "role")
 GK_ATOM(round, "round")
 GK_ATOM(row, "row")
 GK_ATOM(rows, "rows")
--- a/content/xml/document/src/nsXMLFragmentContentSink.cpp
+++ b/content/xml/document/src/nsXMLFragmentContentSink.cpp
@ -663,9 +663,12 @@ nsXHTMLParanoidFragmentSink::HandleStartElement(const PRUnichar *aName,
    NS_ENSURE_SUCCESS(rv, rv);
    
    name = nodeInfo->NameAtom();
-    // Add if it's xmlns, xml:, or on the HTML whitelist
+    // Add if it's xmlns, xml:, aaa:, xhtml2:role, or on the HTML whitelist
    if (nameSpaceID == kNameSpaceID_XMLNS ||
        nameSpaceID == kNameSpaceID_XML ||
+        nameSpaceID == kNameSpaceID_WAIProperties ||
+        (nameSpaceID == kNameSpaceID_XHTML2_Unofficial &&
+         name == nsHTMLAtoms::role) ||
        sAllowedAttributes && sAllowedAttributes->GetEntry(name)) {
      allowedAttrs.AppendElement(aAtts[i]);
      allowedAttrs.AppendElement(aAtts[i + 1]);
--- a/toolkit/components/feeds/src/FeedProcessor.js
+++ b/toolkit/components/feeds/src/FeedProcessor.js
@ -56,6 +56,7 @@ var gUnescapeHTML = Cc[UNESCAPE_CONTRACTID].

 const XMLNS = "http://www.w3.org/XML/1998/namespace";
 const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/";
+const WAIROLE_NS = "http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#";

 /***** Some general utils *****/
 function strToURI(link, base) {
@ -284,6 +285,16 @@ var gNamespaces = {
  "http://www.w3.org/XML/1998/namespace":"xml"
 }

+// We allow a very small set of namespaces in XHTML content,
+// for attributes only
+var gAllowedXHTMLNamespaces = {
+  "http://www.w3.org/XML/1998/namespace":"xml",
+  "http://www.w3.org/TR/xhtml2":"xhtml2",
+  "http://www.w3.org/2005/07/aaa":"aaa",
+  // if someone ns qualifies XHTML, we have to prefix it to avoid an
+  // attribute collision.
+  "http://www.w3.org/1999/xhtml":"xhtml"
+}

 function FeedResult() {}
 FeedResult.prototype = {
@ -505,6 +516,7 @@ TextConstruct.prototype = {
      docFragment.appendChild(node);
      return docFragment;
    }
+    LOG("entry text: " + this.text + "\n");
    var isXML;
    if (this.type == "xhtml")
      isXML = true
@ -774,22 +786,39 @@ function dateParse(dateString) {
 const XHTML_NS = "http://www.w3.org/1999/xhtml";

 // The XHTMLHandler handles inline XHTML found in things like atom:summary
-function XHTMLHandler(processor, isAtom) {
+function XHTMLHandler(processor, isAtom, waiPrefixes) {
  this._buf = "";
  this._processor = processor;
  this._depth = 0;
  this._isAtom = isAtom;
+  // a stack of lists tracking in-scope namespaces
+  this._inScopeNS = [];
+  this._waiPrefixes = waiPrefixes;
 }

 // The fidelity can be improved here, to allow handling of stuff like
 // SVG and MathML. XXX
 XHTMLHandler.prototype = {
+
+   // look back up at the declared namespaces 
+   // we always use the same prefixes for our safe stuff
+  _isInScope: function XH__isInScope(ns) {
+    for (var i in this._inScopeNS) {
+      for (var uri in this._inScopeNS[i]) {
+        if (this._inScopeNS[i][uri] == ns)
+          return true;
+      }
+    }
+    return false;
+  },
+
  startDocument: function XH_startDocument() {
  },
  endDocument: function XH_endDocument() {
  },
  startElement: function XH_startElement(uri, localName, qName, attributes) {
    ++this._depth;
+    this._inScopeNS.push([]);

    // RFC4287 requires XHTML to be wrapped in a div that is *not* part of 
    // the content. This prevents people from screwing up namespaces, but
@ -800,11 +829,65 @@ XHTMLHandler.prototype = {
    // If it's an XHTML element, record it. Otherwise, it's ignored.
    if (uri == XHTML_NS) {
      this._buf += "<" + localName;
+      var uri;
      for (var i=0; i < attributes.length; ++i) {
+        uri = attributes.getURI(i);
        // XHTML attributes aren't in a namespace
-        if (attributes.getURI(i) == "") { 
+        if (uri == "") { 
          this._buf += (" " + attributes.getLocalName(i) + "='" +
                        xmlEscape(attributes.getValue(i)) + "'");
+        } else {
+          // write a small set of allowed attribute namespaces
+          var prefix = gAllowedXHTMLNamespaces[uri];
+          if (prefix != null) {
+            // The attribute value we'll attempt to write
+            var attributeValue = xmlEscape(attributes.getValue(i));
+
+            // More QName abuse from W3C
+            var rolePrefix = "";
+            if (attributes.getLocalName(i) == "role") {
+              for (var aPrefix in this._waiPrefixes) {
+                if (attributeValue.indexOf(aPrefix + ":") == 0) {     
+                  // Now, due to the terrible layer mismatch 
+                  // that is QNames in content, we have to see
+                  // if the attribute value clashes with our 
+                  // namespace declarations.
+                  var isCollision = false;
+                  for (var uriKey in gAllowedXHTMLNamespaces) {
+                    if (gAllowedXHTMLNamespaces[uriKey] == aPrefix)
+                      isCollision = true;
+                  }
+                  
+                  if (isCollision) {
+                    rolePrefix = aPrefix + i;
+                    attributeValue = 
+                      rolePrefix + ":" + 
+                      attributeValue.substring(aPrefix.length + 1);
+                  } else {
+                    rolePrefix = aPrefix;
+                  }
+
+                  break;
+                }
+              }
+
+              if (rolePrefix)
+                this._buf += (" xmlns:" + rolePrefix + 
+                              "='" + WAIROLE_NS + "'");
+            }
+
+            // it's an allowed attribute NS.            
+            // write the attribute
+            this._buf += (" " + prefix + ":" + 
+                          attributes.getLocalName(i) + 
+                          "='" + attributeValue + "'");
+
+            // write an xmlns declaration if necessary
+            if (prefix != "xml" && !this._isInScope(uri)) {
+              this._inScopeNS[this._inScopeNS.length - 1].push(uri);
+              this._buf += " xmlns:" + prefix + "='" + uri + "'";
+            }
+          }
        }
      }
      this._buf += ">";
@ -812,7 +895,8 @@ XHTMLHandler.prototype = {
  },
  endElement: function XH_endElement(uri, localName, qName) {
    --this._depth;
-    
+    this._inScopeNS.pop();
+
    // We need to skip outer divs in Atom. See comment in startElement.
    if (this._isAtom && this._depth == 0 && localName == "div")
      return;
@ -831,9 +915,13 @@ XHTMLHandler.prototype = {
  characters: function XH_characters(data) {
    this._buf += xmlEscape(data);
  },
-  startPrefixMapping: function XH_startPrefixMapping() {
+  startPrefixMapping: function XH_startPrefixMapping(prefix, uri) {
+    if (prefix && uri == WAIROLE_NS) 
+      this._waiPrefixes[prefix] = WAIROLE_NS;
  },
-  endPrefixMapping: function XH_endPrefixMapping() {
+  endPrefixMapping: function FP_endPrefixMapping(prefix) {
+    if (prefix)
+      delete this._waiPrefixes[prefix];
  },
  processingInstruction: function XH_processingInstruction() {
  }, 
@ -945,6 +1033,9 @@ function FeedProcessor() {
  this._result = null;
  this._extensionHandler = null;
  this._xhtmlHandler = null;
+  
+  // http://www.w3.org/WAI/PF/GUI/ uses QNames in content :(
+  this._waiPrefixes = {};

  // The nsIFeedResultListener waiting for the parse results
  this.listener = null;
@ -1282,7 +1373,8 @@ FeedProcessor.prototype = {
      var type = attributes.getValueFromName("","type");
      if (type != null && type.indexOf("xhtml") >= 0) {
        this._xhtmlHandler = 
-          new XHTMLHandler(this, (this._result.version == "atom"));
+          new XHTMLHandler(this, (this._result.version == "atom"), 
+                           this._waiPrefixes);
        this._reader.contentHandler = this._xhtmlHandler;
        return;
      }
@ -1354,14 +1446,22 @@ FeedProcessor.prototype = {
  characters: function FP_characters(data) {
    this._buf += data;
  },
-
  // TODO: It would be nice to check new prefixes here, and if they
  // don't conflict with the ones we've defined, throw them in a 
  // dictionary to check.
-  startPrefixMapping: function FP_startPrefixMapping() {
+  startPrefixMapping: function FP_startPrefixMapping(prefix, uri) {
+    // Thanks for QNames in content, W3C
+    // This will even be a perf hit for every single feed
+    // http://www.w3.org/WAI/PF/GUI/
+    if (prefix && uri == WAIROLE_NS) 
+      this._waiPrefixes[prefix] = WAIROLE_NS;
  },
-  endPrefixMapping: function FP_endPrefixMapping() {
+  
+  endPrefixMapping: function FP_endPrefixMapping(prefix) {
+    if (prefix)
+      delete this._waiPrefixes[prefix];
  },
+  
  processingInstruction: function FP_processingInstruction(target, data) {
    if (target == "xml-stylesheet") {
      var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/);
--- a/toolkit/components/feeds/test/test.js
+++ b/toolkit/components/feeds/test/test.js
@ -69,6 +69,7 @@ TestListener.prototype = {
    }

    ran += 1;
+    result = null;
  }
 }

--- a/toolkit/components/feeds/test/xml/rfc4287/feed_accessible.xml
+++ b/toolkit/components/feeds/test/xml/rfc4287/feed_accessible.xml
@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!--
+
+Description: atom entry with many funky namespaces
+Expect: var content = feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).content;((content.text.indexOf("h2 aaa:checked") > -1) && (content.text.indexOf("h4 aaa:checked") > -1) && (content.text.indexOf("h6 xml:base") > -1));
+
+-->
+<feed xmlns="http://www.w3.org/2005/Atom"
+      xmlns:p="http://www.w3.org/2005/07/aaa"
+      xmlns:r="http://www.w3.org/TR/xhtml2">
+  <id>tag:example.com,2006:/atom/conformance/linktest/</id>
+  <title>Atom Link Tests</title>
+  <updated>2005-06-18T6:23:00Z</updated>
+  <link href="http://www.example.org" />
+  
+  <entry xml:base="http://www.example.org">
+    <id>tag:example.org,2006:/linkreltest/1</id>
+    <title>Does your reader support xml:base properly? </title>
+    <updated>2006-06-23T12:12:12Z</updated>
+    <link href="foo"/>
+    <content type="xhtml">
+      <div 
+	xmlns:aaa="http://www.w3.org/2005/07/aaa"
+        xmlns="http://www.w3.org/1999/xhtml">
+	<p  xmlns:foo="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#"
+            p:checked="true"
+            aaa:iconed="true" p:disabled="true">
+	  <b r:role="asdf" aaa:iconed="true" p:disabled="true">hmm</b>
+	  <i foo:role="asdf">hmm</i>
+	  <h4 aaa:checked="true"></h4>
+        </p>
+	<p  xmlns:foo="http://www.w3.org/2005/07/aaa">
+	  <h2 foo:checked="true">hmm</h2>
+	  <h6 xml:base="http://www.google.com">hmm</h6>
+	</p>
+      </div>
+    </content>
+  </entry>
+  
+</feed>
--- a/toolkit/components/feeds/test/xml/rfc4287/feed_roleatt.xml
+++ b/toolkit/components/feeds/test/xml/rfc4287/feed_roleatt.xml
@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!--
+
+Description: atom entry with many funky namespaces
+Expect: var content = feed.items.queryElementAt(0, Components.interfaces.nsIFeedEntry).content; ((content.text.indexOf("xhtml2:role='wwwwwww") > -1) && (content.text.indexOf("xmlns:wwwwwww='http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#'") > -1) && (content.text.indexOf("xmlns:xhtml20='http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#'") > -1));
+
+-->
+<feed xmlns="http://www.w3.org/2005/Atom"
+      xmlns:p="http://www.w3.org/2005/07/aaa"
+      xmlns:r="http://www.w3.org/TR/xhtml2"
+      xmlns:wwwwwww="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#">
+  <id>tag:example.com,2006:/atom/conformance/linktest/</id>
+  <title>Atom Link Tests</title>
+  <updated>2005-06-18T6:23:00Z</updated>
+  <link href="http://www.example.org" />
+  
+  <entry xml:base="http://www.example.org">
+    <id>tag:example.org,2006:/linkreltest/1</id>
+    <title>Does your reader support xml:base properly? </title>
+    <updated>2006-06-23T12:12:12Z</updated>
+    <link href="foo"/>
+    <content type="xhtml">
+      <div 
+	xmlns:aaa="http://www.w3.org/2005/07/aaa"
+        xmlns="http://www.w3.org/1999/xhtml">
+	<p  xmlns:foo="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#"
+            p:checked="true"
+            aaa:iconed="true" p:disabled="true">
+	  <b r:role="asdf" aaa:iconed="true" p:disabled="true">hmm</b>
+	  <i foo:role="asdf">hmm</i>
+	  <h4 aaa:checked="true"></h4>
+        </p>
+	<p  xmlns:foo="http://www.w3.org/2005/07/aaa">
+	  <h2 foo:checked="true">hmm</h2>
+	  <h6 xml:base="http://www.google.com">hmm</h6>
+	  <h3 r:role="wwwwwww:checkboxtristate">hmm</h3>
+
+	  <!-- Really abusive namespace clash -->
+	  <!-- Don't mess with this test case -->
+	  <div xmlns:xhtml2="http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#">
+	    <p r:role="xhtml2:checkboxtristate">hmm</p>
+	  </div>
+
+	</p>
+      </div>
+    </content>
+  </entry>
+  
+</feed>