Bug 412457 - should unescape hostname first, then perform IDNA r=mcmanus

2024-10-08 19:04:45 +00:00 · 2015-11-18 15:25:27 +01:00 · 2015-11-18 15:25:27 +01:00 · 67b179e6f9
commit 67b179e6f9
parent 592eed9711
6 changed files with 100 additions and 34 deletions
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@ -382,7 +382,7 @@ nsStandardURL::InvalidateCache(bool invalidateCachedFile)
    mSpecEncoding = eEncoding_Unknown;
 }

-bool
+nsresult
 nsStandardURL::NormalizeIDN(const nsCSubstring &host, nsCString &result)
 {
    // If host is ACE, then convert to UTF-8.  Else, if host is already UTF-8,
@ -406,16 +406,16 @@ nsStandardURL::NormalizeIDN(const nsCSubstring &host, nsCString &result)
        }
    }

-    if (gIDN &&
-        NS_SUCCEEDED(gIDN->ConvertToDisplayIDN(host, &isASCII, result))) {
-        if (!isASCII)
+    result.Truncate();
+    nsresult rv = NS_ERROR_UNEXPECTED;
+    if (gIDN) {
+        rv = gIDN->ConvertToDisplayIDN(host, &isASCII, result);
+        if (NS_SUCCEEDED(rv) && !isASCII) {
          mHostEncoding = eEncoding_UTF8;
-
-        return true;
+        }
    }

-    result.Truncate();
-    return false;
+    return rv;
 }

 bool
@ -445,7 +445,9 @@ nsStandardURL::ValidIPv6orHostname(const char *host, uint32_t length)

    const char *end = host + length;
    if (end != net_FindCharInSet(host, end, "\t\n\v\f\r #/:?@[\\]")) {
-        // % is allowed because we don't do hostname percent decoding yet.
+        // We still allow % because it is in the ID of addons.
+        // Any percent encoded ASCII characters that are not allowed in the
+        // hostname are not percent decoded, and will be parsed just fine.
        return false;
    }

@ -573,19 +575,22 @@ nsStandardURL::BuildNormalizedSpec(const char *spec)
    mHostEncoding = eEncoding_ASCII;
    // Note that we don't disallow URLs without a host - file:, etc
    if (mHost.mLen > 0) {
-        const nsCSubstring& tempHost =
-            Substring(spec + mHost.mPos, spec + mHost.mPos + mHost.mLen);
+        nsAutoCString tempHost;
+        NS_UnescapeURL(spec + mHost.mPos, mHost.mLen, esc_AlwaysCopy | esc_Host, tempHost);
        if (tempHost.Contains('\0'))
            return NS_ERROR_MALFORMED_URI;  // null embedded in hostname
        if (tempHost.Contains(' '))
            return NS_ERROR_MALFORMED_URI;  // don't allow spaces in the hostname
-        if ((useEncHost = NormalizeIDN(tempHost, encHost)))
-            approxLen += encHost.Length();
-        else
-            approxLen += mHost.mLen;
+        nsresult rv = NormalizeIDN(tempHost, encHost);
+        if (NS_FAILED(rv)) {
+            return rv;
+        }

-        if ((useEncHost && !ValidIPv6orHostname(encHost.BeginReading(), encHost.Length())) ||
-            (!useEncHost && !ValidIPv6orHostname(tempHost.BeginReading(), tempHost.Length()))) {
+        // NormalizeIDN always copies, if the call was successful.
+        useEncHost = true;
+        approxLen += encHost.Length();
+
+        if (!ValidIPv6orHostname(encHost.BeginReading(), encHost.Length())) {
            return NS_ERROR_MALFORMED_URI;
        }
    }
@ -1589,7 +1594,11 @@ nsStandardURL::SetHost(const nsACString &input)

    FindHostLimit(start, end);

-    const nsCString flat(Substring(start, end));
+    const nsCString unescapedHost(Substring(start, end));
+    // Do percent decoding on the the input.
+    nsAutoCString flat;
+    NS_UnescapeURL(unescapedHost.BeginReading(), unescapedHost.Length(),
+                   esc_AlwaysCopy | esc_Host, flat);
    const char *host = flat.get();

    LOG(("nsStandardURL::SetHost [host=%s]\n", host));
@ -1620,12 +1629,14 @@ nsStandardURL::SetHost(const nsACString &input)

    uint32_t len;
    nsAutoCString hostBuf;
-    if (NormalizeIDN(flat, hostBuf)) {
-        host = hostBuf.get();
-        len = hostBuf.Length();
+    nsresult rv = NormalizeIDN(flat, hostBuf);
+    if (NS_FAILED(rv)) {
+        return rv;
    }
-    else
-        len = flat.Length();
+
+    // NormalizeIDN always copies if the call was successful
+    host = hostBuf.get();
+    len = hostBuf.Length();

    if (!ValidIPv6orHostname(host, len)) {
        return NS_ERROR_MALFORMED_URI;
--- a/netwerk/base/nsStandardURL.h
+++ b/netwerk/base/nsStandardURL.h
@ -177,7 +177,7 @@ private:
    void     InvalidateCache(bool invalidateCachedFile = true);

    bool     ValidIPv6orHostname(const char *host, uint32_t aLen);
-    bool     NormalizeIDN(const nsCSubstring &host, nsCString &result);
+    nsresult NormalizeIDN(const nsCSubstring &host, nsCString &result);
    void     CoalescePath(netCoalesceFlags coalesceFlag, char *path);

    uint32_t AppendSegmentToBuf(char *, uint32_t, const char *, URLSegment &, const nsCString *esc=nullptr, bool useEsc = false);
--- a/netwerk/test/unit/test_bug412457.js
+++ b/netwerk/test/unit/test_bug412457.js
@ -0,0 +1,44 @@
+function run_test() {
+  var ios = Cc["@mozilla.org/network/io-service;1"].
+    getService(Ci.nsIIOService);
+
+  // check if hostname is unescaped before applying IDNA
+  var newURI = ios.newURI("http://\u5341%2ecom/", null, null);
+  do_check_eq(newURI.asciiHost, "xn--kkr.com");
+
+  // escaped UTF8
+  newURI.spec = "http://%e5%8d%81.com";
+  do_check_eq(newURI.asciiHost, "xn--kkr.com");
+
+  // There should be only allowed characters in hostname after
+  // unescaping and attempting to apply IDNA. "\x80" is illegal in
+  // UTF-8, so IDNA fails, and 0x80 is illegal in DNS too.
+  Assert.throws(() => { newURI.spec = "http://%80.com"; }, "illegal UTF character");
+
+  // test parsing URL with all possible host terminators
+  newURI.spec = "http://example.com?foo";
+  do_check_eq(newURI.asciiHost, "example.com");
+
+  newURI.spec = "http://example.com#foo";
+  do_check_eq(newURI.asciiHost, "example.com");
+
+  newURI.spec = "http://example.com:80";
+  do_check_eq(newURI.asciiHost, "example.com");
+
+  newURI.spec = "http://example.com/foo";
+  do_check_eq(newURI.asciiHost, "example.com");
+
+  // Characters that are invalid in the host, shouldn't be decoded.
+  newURI.spec = "http://example.com%3ffoo";
+  do_check_eq(newURI.asciiHost, "example.com%3ffoo");
+  newURI.spec = "http://example.com%23foo";
+  do_check_eq(newURI.asciiHost, "example.com%23foo");
+  newURI.spec = "http://example.com%3bfoo";
+  do_check_eq(newURI.asciiHost, "example.com%3bfoo");
+  newURI.spec = "http://example.com%3a80";
+  do_check_eq(newURI.asciiHost, "example.com%3a80");
+  newURI.spec = "http://example.com%2ffoo";
+  do_check_eq(newURI.asciiHost, "example.com%2ffoo");
+  newURI.spec = "http://example.com%00";
+  do_check_eq(newURI.asciiHost, "example.com%00");
+}
--- a/netwerk/test/unit/test_standardurl.js
+++ b/netwerk/test/unit/test_standardurl.js
@ -263,6 +263,16 @@ function test_accentEncoding()
  do_check_eq(url.query, "hello=%2C");
 }

+function test_percentDecoding()
+{
+  var url = stringToURL("http://%70%61%73%74%65%62%69%6E.com");
+  do_check_eq(url.spec, "http://pastebin.com/");
+
+  // We shouldn't unescape characters that are not allowed in the hostname.
+  url = stringToURL("http://example.com%0a%23.google.com/");
+  do_check_eq(url.spec, "http://example.com%0a%23.google.com/");
+}
+
 function run_test()
 {
  test_setEmptyPath();
@ -274,4 +284,5 @@ function run_test()
  test_escapeBrackets();
  test_apostropheEncoding();
  test_accentEncoding();
+  test_percentDecoding();
 }
--- a/netwerk/test/unit/xpcshell.ini
+++ b/netwerk/test/unit/xpcshell.ini
@ -339,3 +339,5 @@ firefox-appdir = browser
 [test_bug1195415.js]
 [test_cookie_blacklist.js]
 [test_packaged_app_bug1214079.js]
+[test_bug412457.js]
+
--- a/xpcom/io/nsEscape.cpp
+++ b/xpcom/io/nsEscape.cpp
@ -548,28 +548,26 @@ NS_UnescapeURL(const char* aStr, int32_t aLen, uint32_t aFlags,
  bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
  bool writing = !!(aFlags & esc_AlwaysCopy);
  bool skipControl = !!(aFlags & esc_SkipControl);
+  bool skipInvalidHostChar = !!(aFlags & esc_Host);

  const char* last = aStr;
  const char* p = aStr;

  for (int i = 0; i < aLen; ++i, ++p) {
-    //printf("%c [i=%d of aLen=%d]\n", *p, i, aLen);
    if (*p == HEX_ESCAPE && i < aLen - 2) {
-      unsigned char* p1 = (unsigned char*)p + 1;
-      unsigned char* p2 = (unsigned char*)p + 2;
-      if (ISHEX(*p1) && ISHEX(*p2) &&
-          ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
+      unsigned char c1 = *((unsigned char*)p + 1);
+      unsigned char c2 = *((unsigned char*)p + 2);
+      unsigned char u = (UNHEX(c1) << 4) + UNHEX(c2);
+      if (ISHEX(c1) && ISHEX(c2) &&
+          (!skipInvalidHostChar || dontNeedEscape(u, aFlags) || c1 >= '8') &&
+          ((c1 < '8' && !ignoreAscii) || (c1 >= '8' && !ignoreNonAscii)) &&
          !(skipControl &&
-            (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
-        //printf("- p1=%c p2=%c\n", *p1, *p2);
+            (c1 < '2' || (c1 == '7' && (c2 == 'f' || c2 == 'F'))))) {
        writing = true;
        if (p > last) {
-          //printf("- p=%p, last=%p\n", p, last);
          aResult.Append(last, p - last);
          last = p;
        }
-        char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
-        //printf("- u=%c\n", u);
        aResult.Append(u);
        i += 2;
        p += 2;