Display URLs containing unassigned Unicode code points in punycode. Bug 479413, r=jduell, sr=jst

This commit is contained in:
Simon Montagu 2009-03-29 00:49:31 -07:00
parent 53571b6974
commit e4d0ea112b
3 changed files with 132 additions and 25 deletions

View File

@ -1,4 +1,4 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -159,6 +159,11 @@ nsIDNService::~nsIDNService()
/* ACString ConvertUTF8toACE (in AUTF8String input); */
NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace)
{
return UTF8toACE(input, ace, PR_TRUE);
}
nsresult nsIDNService::UTF8toACE(const nsACString & input, nsACString & ace, PRBool allowUnassigned)
{
nsresult rv;
NS_ConvertUTF8toUTF16 ustr(input);
@ -181,7 +186,8 @@ NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACStrin
while (start != end) {
len++;
if (*start++ == (PRUnichar)'.') {
rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf);
rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf,
allowUnassigned);
NS_ENSURE_SUCCESS(rv, rv);
ace.Append(encodedBuf);
@ -196,7 +202,8 @@ NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACStrin
ace.AppendLiteral("mltbd.");
// encode the last node if non ASCII
if (len) {
rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf);
rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf,
allowUnassigned);
NS_ENSURE_SUCCESS(rv, rv);
ace.Append(encodedBuf);
@ -207,6 +214,12 @@ NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACStrin
/* AUTF8String convertACEtoUTF8(in ACString input); */
NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval)
{
return ACEtoUTF8(input, _retval, PR_TRUE);
}
nsresult nsIDNService::ACEtoUTF8(const nsACString & input, nsACString & _retval,
PRBool allowUnassigned)
{
// RFC 3490 - 4.2 ToUnicode
// ToUnicode never fails. If any step fails, then the original input
@ -229,7 +242,8 @@ NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACStrin
while (start != end) {
len++;
if (*start++ == '.') {
if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf))) {
if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf,
allowUnassigned))) {
_retval.Assign(input);
return NS_OK;
}
@ -242,7 +256,8 @@ NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACStrin
}
// decode the last node
if (len) {
if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf)))
if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf,
allowUnassigned)))
_retval.Assign(input);
else
_retval.Append(decodedBuf);
@ -291,7 +306,7 @@ NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & out
while (start != end) {
len++;
if (*start++ == PRUnichar('.')) {
rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel);
rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel, PR_TRUE);
NS_ENSURE_SUCCESS(rv, rv);
outUTF16.Append(outLabel);
@ -301,7 +316,7 @@ NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & out
}
}
if (len) {
rv = stringPrep(Substring(inUTF16, offset, len), outLabel);
rv = stringPrep(Substring(inUTF16, offset, len), outLabel, PR_TRUE);
NS_ENSURE_SUCCESS(rv, rv);
outUTF16.Append(outLabel);
@ -330,9 +345,9 @@ NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString & input, PRBool
IsACE(_retval, &isACE);
if (isACE && !mShowPunycode && isInWhitelist(_retval)) {
// ConvertACEtoUTF8() can't fail, but might return the original ACE string
// ACEtoUTF8() can't fail, but might return the original ACE string
nsCAutoString temp(_retval);
ConvertACEtoUTF8(temp, _retval);
ACEtoUTF8(temp, _retval, PR_FALSE);
*_isASCII = IsASCII(_retval);
} else {
*_isASCII = PR_TRUE;
@ -488,7 +503,12 @@ static nsresult encodeToRACE(const char* prefix, const nsAString& in, nsACString
// for bidirectional strings. If the string does not satisfy the requirements
// for bidirectional strings, return an error. This is described in section 6.
//
nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out)
// 5) Check unassigned code points -- If allowUnassigned is false, check for
// any unassigned Unicode points and if any are found return an error.
// This is described in section 7.
//
nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
PRBool allowUnassigned)
{
if (!mNamePrepHandle || !mNormalizer)
return NS_ERROR_FAILURE;
@ -530,6 +550,14 @@ nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out)
if (idn_err != idn_success || found)
return NS_ERROR_FAILURE;
if (!allowUnassigned) {
// check unassigned code points
idn_err = idn_nameprep_isunassigned(mNamePrepHandle,
(const PRUint32 *) ucs4Buf, &found);
if (idn_err != idn_success || found)
return NS_ERROR_FAILURE;
}
// set the result string
out.Assign(normlizedStr);
@ -546,7 +574,8 @@ nsresult nsIDNService::encodeToACE(const nsAString& in, nsACString& out)
return punycode(mACEPrefix, in, out);
}
nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out)
nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
PRBool allowUnassigned)
{
nsresult rv = NS_OK;
@ -561,7 +590,7 @@ nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out)
LossyCopyUTF16toASCII(in, out);
else {
nsAutoString strPrep;
rv = stringPrep(in, strPrep);
rv = stringPrep(in, strPrep, allowUnassigned);
if (NS_SUCCEEDED(rv)) {
if (IsASCII(strPrep))
LossyCopyUTF16toASCII(strPrep, out);
@ -606,7 +635,8 @@ void nsIDNService::normalizeFullStops(nsAString& s)
}
}
nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
PRBool allowUnassigned)
{
PRBool isAce;
IsACE(in, &isAce);
@ -642,7 +672,7 @@ nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
// Validation: encode back to ACE and compare the strings
nsCAutoString ace;
nsresult rv = ConvertUTF8toACE(out, ace);
nsresult rv = UTF8toACE(out, ace, allowUnassigned);
NS_ENSURE_SUCCESS(rv, rv);
if (!ace.Equals(in, nsCaseInsensitiveCStringComparator()))
@ -654,8 +684,14 @@ nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
PRBool nsIDNService::isInWhitelist(const nsACString &host)
{
if (mIDNWhitelistPrefBranch) {
// truncate trailing dots first
nsCAutoString tld(host);
// make sure the host is ACE for lookup and check that there are no
// unassigned codepoints
if (!IsASCII(tld) && NS_FAILED(UTF8toACE(tld, tld, PR_FALSE))) {
return PR_FALSE;
}
// truncate trailing dots first
tld.Trim(".");
PRInt32 pos = tld.RFind(".");
if (pos == kNotFound)
@ -663,11 +699,6 @@ PRBool nsIDNService::isInWhitelist(const nsACString &host)
tld.Cut(0, pos + 1);
// make sure the TLD is ACE for lookup.
if (!IsASCII(tld) &&
NS_FAILED(ConvertUTF8toACE(tld, tld)))
return PR_FALSE;
PRBool safe;
if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe)))
return safe;

View File

@ -1,4 +1,4 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -71,14 +71,21 @@ public:
private:
void normalizeFullStops(nsAString& s);
nsresult stringPrepAndACE(const nsAString& in, nsACString& out);
nsresult stringPrepAndACE(const nsAString& in, nsACString& out,
PRBool allowUnassigned);
nsresult encodeToACE(const nsAString& in, nsACString& out);
nsresult stringPrep(const nsAString& in, nsAString& out);
nsresult decodeACE(const nsACString& in, nsACString& out);
nsresult stringPrep(const nsAString& in, nsAString& out,
PRBool allowUnassigned);
nsresult decodeACE(const nsACString& in, nsACString& out,
PRBool allowUnassigned);
nsresult UTF8toACE(const nsACString& in, nsACString& out,
PRBool allowUnassigned);
nsresult ACEtoUTF8(const nsACString& in, nsACString& out,
PRBool allowUnassigned);
PRBool isInWhitelist(const nsACString &host);
void prefsChanged(nsIPrefBranch *prefBranch, const PRUnichar *pref);
PRBool mMultilingualTestBed; // if true generates extra node for mulitlingual testbed
PRBool mMultilingualTestBed; // if true generates extra node for multilingual testbed
idn_nameprep_t mNamePrepHandle;
nsCOMPtr<nsIUnicodeNormalizer> mNormalizer;
char mACEPrefix[kACEPrefixLen+1];

View File

@ -0,0 +1,69 @@
/**
* Test for unassigned code points in IDNs (RFC 3454 section 7)
*/
const Cc = Components.classes;
const Ci = Components.interfaces;
var idnService;
function expected_pass(inputIDN)
{
var isASCII = {};
var displayIDN = idnService.convertToDisplayIDN(inputIDN, isASCII);
do_check_eq(displayIDN, inputIDN);
}
function expected_fail(inputIDN)
{
var isASCII = {};
var displayIDN = "";
try {
displayIDN = idnService.convertToDisplayIDN(inputIDN, isASCII);
}
catch(e) {}
do_check_neq(displayIDN, inputIDN);
}
function run_test() {
// add an IDN whitelist pref
var pbi = Cc["@mozilla.org/preferences-service;1"]
.getService(Ci.nsIPrefBranch2);
var whitelistPref = "network.IDN.whitelist.com";
var savedPrefValue = false;
var prefExists = false;
try {
savedPrefValue = pbi.getBoolPref(whitelistPref);
prefExists = true;
} catch(e) { }
pbi.setBoolPref(whitelistPref, true);
idnService = Cc["@mozilla.org/network/idn-service;1"]
.getService(Ci.nsIIDNService);
// assigned code point
expected_pass("foo\u0101bar.com");
// assigned code point in punycode. Should *fail* because the URL will be
// converted to Unicode for display
expected_fail("xn--foobar-5za.com");
// unassigned code point
expected_fail("foo\u3040bar.com");
// unassigned code point in punycode. Should *pass* because the URL will not
// be converted to Unicode
expected_pass("xn--foobar-533e.com");
// code point assigned since Unicode 3.0
// XXX This test will unexpectedly pass when we update to IDNAbis
expected_fail("foo\u0370bar.com");
// reset the pref
if (prefExists)
pbi.setBoolPref(whitelistPref, savedPrefValue);
else
pbi.clearUserPref(whitelistPref);
}