Bug 1502097 - (Part 2) Define IDN blocklist as ranges of characters [ {firstChar, lastChar}* ] r=jfkthame,dragana

* Changes the format of the blocklist from a list of characters to a list of
  character ranges. Binary search still works, and it is easier to include
  large ranges of characters in the blocklist.
* Moves logic for handling the blocklist to IDNBlocklistUtils.h/.cpp
* Changes NS_EscapeURL to take a function that determines if a character
  is blocked. This way the type of the array doesn't matter.

Differential Revision: https://phabricator.services.mozilla.com/D12210

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Valentin Gosu 2018-11-24 12:04:34 +00:00
parent 5f1a383a19
commit 66d5164c37
10 changed files with 257 additions and 132 deletions

View File

@ -14,10 +14,6 @@
using namespace mozilla;
static const char16_t sNetworkIDNBlocklistChars[] = {
#include "../../netwerk/dns/IDNCharacterBlocklist.inc"
};
nsTextToSubURI::~nsTextToSubURI()
{
}
@ -117,7 +113,6 @@ NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
const nsACString &aURIFragment,
nsAString &_retval)
{
nsresult rv;
nsAutoCString unescapedSpec;
// skip control octets (0x00 - 0x1f and 0x7f) when unescaping
NS_UnescapeURL(PromiseFlatCString(aURIFragment),
@ -134,33 +129,22 @@ NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
}
// If there are any characters that are unsafe for URIs, reescape those.
if (mUnsafeChars.IsEmpty()) {
mUnsafeChars.AppendElements(
sNetworkIDNBlocklistChars, ArrayLength(sNetworkIDNBlocklistChars));
nsAutoString extraAllowed;
Preferences::GetString("network.IDN.extra_allowed_chars",
extraAllowed);
if (mIDNBlocklist.IsEmpty()) {
mozilla::net::InitializeBlocklist(mIDNBlocklist);
// we allow SPACE and IDEOGRAPHIC SPACE in this method
extraAllowed.Append(u' ');
extraAllowed.Append(0x3000);
mUnsafeChars.RemoveElementsBy([&](char16_t c) {
return extraAllowed.FindChar(c, 0) != -1;
});
nsAutoString extraBlocked;
rv = Preferences::GetString("network.IDN.extra_blocked_chars",
extraBlocked);
if (NS_SUCCEEDED(rv) && !extraBlocked.IsEmpty()) {
mUnsafeChars.AppendElements(
static_cast<const char16_t*>(extraBlocked.Data()),
extraBlocked.Length());
mUnsafeChars.Sort();
}
mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist);
mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist);
}
MOZ_ASSERT(!mIDNBlocklist.IsEmpty());
const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
nsString reescapedSpec;
_retval = NS_EscapeURL(unescapedResult, mUnsafeChars, reescapedSpec);
_retval =
NS_EscapeURL(unescapedResult,
[&](char16_t aChar) -> bool {
return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist);
},
reescapedSpec);
return NS_OK;
}

View File

@ -9,6 +9,7 @@
#include "nsITextToSubURI.h"
#include "nsString.h"
#include "nsTArray.h"
#include "mozilla/net/IDNBlocklistUtils.h"
class nsTextToSubURI: public nsITextToSubURI
{
@ -25,7 +26,7 @@ private:
// Characters defined in netwerk/dns/IDNCharacterBlocklist.inc or via the
// network.IDN.extra_allowed_chars and network.IDN.extra_blocked_chars prefs.
nsTArray<char16_t> mUnsafeChars;
nsTArray<mozilla::net::BlocklistRange> mIDNBlocklist;
};
#endif // nsTextToSubURI_h__

View File

@ -0,0 +1,86 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "IDNBlocklistUtils.h"
namespace mozilla {
namespace net {
static constexpr char16_t sBlocklistPairs[][2] = {
#include "IDNCharacterBlocklist.inc"
};
void
RemoveCharFromBlocklist(char16_t aChar, nsTArray<BlocklistRange>& aBlocklist)
{
auto pos = aBlocklist.BinaryIndexOf(aChar, BlocklistPairToCharComparator());
if (pos == nsTArray<BlocklistRange>::NoIndex) {
return;
}
auto& pair = aBlocklist[pos];
// If the matched range has a length of one, we can just remove it
if (pair.second() == pair.first()) {
aBlocklist.RemoveElementAt(pos);
return;
}
// If the character matches the first element in the range, just update
// the range.
if (aChar == pair.first()) {
pair.first() = pair.first() + 1;
return;
}
// Also if it matches the last character in the range, we just update it.
if (aChar == pair.second()) {
pair.second() = pair.second() - 1;
return;
}
// Our character is in the middle of the range, splitting it in two.
// We update the matched range to reflect the values before the character,
// and insert a new range that represents the values after.
char16_t lastElement = pair.second();
pair.second() = aChar - 1;
aBlocklist.InsertElementAt(
pos + 1, mozilla::MakePair(char16_t(aChar + 1), lastElement));
}
void
InitializeBlocklist(nsTArray<BlocklistRange>& aBlocklist)
{
aBlocklist.Clear();
for (auto const& arr : sBlocklistPairs) {
// The hardcoded pairs are already sorted.
aBlocklist.AppendElement(mozilla::MakePair(arr[0], arr[1]));
}
nsAutoString extraAllowed;
nsresult rv =
Preferences::GetString("network.IDN.extra_allowed_chars", extraAllowed);
if (NS_SUCCEEDED(rv) && !extraAllowed.IsEmpty()) {
const char16_t* cur = extraAllowed.BeginReading();
const char16_t* end = extraAllowed.EndReading();
// Characters in the allowed list are removed from the blocklist.
for (; cur < end; ++cur) {
RemoveCharFromBlocklist(*cur, aBlocklist);
}
}
nsAutoString extraBlocked;
rv = Preferences::GetString("network.IDN.extra_blocked_chars", extraBlocked);
// We add each extra blocked character to the blocklist as a separate range.
if (NS_SUCCEEDED(rv) && !extraBlocked.IsEmpty()) {
for (size_t i = 0; i < extraBlocked.Length(); ++i) {
aBlocklist.AppendElement(
mozilla::MakePair(extraBlocked[i], extraBlocked[i]));
}
aBlocklist.Sort(BlocklistEntryComparator());
}
}
} // namespace net
} // namespace mozilla

View File

@ -0,0 +1,73 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef IDNBlocklistUtils_h__
#define IDNBlocklistUtils_h__
#include "mozilla/Pair.h"
#include "nsTArray.h"
namespace mozilla {
namespace net {
// A blocklist range is defined as all of the characters between:
// { firstCharacterInRange, lastCharacterInRange }
typedef mozilla::Pair<char16_t, char16_t> BlocklistRange;
// Used to perform a binary search of the needle in the sorted array of pairs
class BlocklistPairToCharComparator
{
public:
bool Equals(const BlocklistRange& pair, char16_t needle) const
{
// If the needle is between pair.first() and pair.second() it
// is part of the range.
return pair.first() <= needle && needle <= pair.second();
}
bool LessThan(const BlocklistRange& pair, char16_t needle) const
{
// The needle has to be larger than the second value,
// otherwise it may be equal.
return pair.second() < needle;
}
};
// Used to sort the array of pairs
class BlocklistEntryComparator
{
public:
bool Equals(const BlocklistRange& a, const BlocklistRange& b) const
{
return a.first() == b.first() && a.second() == b.second();
}
bool LessThan(const BlocklistRange& a, const BlocklistRange& b) const
{
return a.first() < b.first();
}
};
// Returns true if the char can be found in the blocklist
inline bool
CharInBlocklist(char16_t aChar, const nsTArray<BlocklistRange>& aBlocklist)
{
return aBlocklist.ContainsSorted(aChar, BlocklistPairToCharComparator());
}
// Initializes the blocklist based on the statically defined list and the
// values of the following preferences:
// - network.IDN.extra_allowed_chars
// - network.IDN.extra_blocked_chars
void
InitializeBlocklist(nsTArray<BlocklistRange>& aBlocklist);
void
RemoveCharFromBlocklist(char16_t aChar, nsTArray<BlocklistRange>& aBlocklist);
} // namespace net
} // namespace mozilla
#endif // IDNBlocklistUtils_h__

View File

@ -1,55 +1,62 @@
// This file contains the IDN character blocklist.
// Each entry represents a range of blocked characters.
// Ranges are defined as:
// { firstCharacterInRange, lastCharacterInRange }
// IMPORTANT: Make sure this list is sorted in ascending order
// ASCII Space
0x0020,
0x00A0,
0x00BC, 0x00BD, 0x00BE,
0x01C3,
0x02D0,
0x0337, 0x0338,
0x0589, 0x058A,
0x05C3,
0x05F4,
0x0609, 0x060A,
0x066A,
0x06D4,
0x0701, 0x0702, 0x0703, 0x0704,
0x115F, 0x1160,
0x1735,
0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B,
0x200E, 0x200F, 0x2010,
0x2019,
0x2024,
0x2027, 0x2028, 0x2029, 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x202F,
0x2039, 0x203A,
0x2041,
0x2044,
0x2052,
0x205F,
0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F,
0x2215,
0x2236,
0x23AE,
0x2571,
0x29F6,
0x29F8,
0x2AFB,
0x2AFD,
0x2FF0, 0x2FF1, 0x2FF2, 0x2FF3, 0x2FF4, 0x2FF5, 0x2FF6, 0x2FF7, 0x2FF8, 0x2FF9, 0x2FFA, 0x2FFB,
{ 0x0020, 0x0020 },
{ 0x00A0, 0x00A0 },
{ 0x00BC, 0x00BE },
{ 0x01C3, 0x01C3 },
{ 0x02D0, 0x02D0 },
{ 0x0337, 0x0338 },
{ 0x0589, 0x058A },
{ 0x05C3, 0x05C3 },
{ 0x05F4, 0x05F4 },
{ 0x0609, 0x060A },
{ 0x066A, 0x066A },
{ 0x06D4, 0x06D4 },
{ 0x0701, 0x0704 },
{ 0x115F, 0x1160 },
{ 0x1735, 0x1735 },
{ 0x2000, 0x200B },
{ 0x200E, 0x2010 },
{ 0x2019, 0x2019 },
{ 0x2024, 0x2024 },
{ 0x2027, 0x202F },
{ 0x2039, 0x203A },
{ 0x2041, 0x2041 },
{ 0x2044, 0x2044 },
{ 0x2052, 0x2052 },
{ 0x205F, 0x205F },
{ 0x2153, 0x215F },
{ 0x2215, 0x2215 },
{ 0x2236, 0x2236 },
{ 0x23AE, 0x23AE },
{ 0x2571, 0x2571 },
{ 0x29F6, 0x29F6 },
{ 0x29F8, 0x29F8 },
{ 0x2AFB, 0x2AFB },
{ 0x2AFD, 0x2AFD },
{ 0x2FF0, 0x2FFB },
// Ideographic Space
0x3000,
0x3002,
0x3014, 0x3015,
0x3033,
0x30A0,
0x3164,
0x321D, 0x321E,
0x33AE, 0x33AF,
0x33C6,
0x33DF,
0xFE14, 0xFE15,
0xFE3F,
0xFE5D, 0xFE5E,
0xFEFF,
0xFF0E, 0xFF0F,
0xFF61,
0xFFA0,
0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD
{ 0x3000, 0x3000 },
{ 0x3002, 0x3002 },
{ 0x3014, 0x3015 },
{ 0x3033, 0x3033 },
{ 0x30A0, 0x30A0 },
{ 0x3164, 0x3164 },
{ 0x321D, 0x321E },
{ 0x33AE, 0x33AF },
{ 0x33C6, 0x33C6 },
{ 0x33DF, 0x33DF },
{ 0xFE14, 0xFE15 },
{ 0xFE3F, 0xFE3F },
{ 0xFE5D, 0xFE5E },
{ 0xFEFF, 0xFEFF },
{ 0xFF0E, 0xFF0F },
{ 0xFF61, 0xFF61 },
{ 0xFFA0, 0xFFA0 },
{ 0xFFF9, 0xFFFD },

View File

@ -29,6 +29,7 @@ EXPORTS.mozilla.net += [
'DNSListenerProxy.h',
'DNSRequestChild.h',
'DNSRequestParent.h',
'IDNBlocklistUtils.h',
'PDNSParams.h',
'TRRService.h',
]
@ -45,6 +46,7 @@ UNIFIED_SOURCES += [
'DNSRequestChild.cpp',
'DNSRequestParent.cpp',
'GetAddrInfo.cpp',
'IDNBlocklistUtils.cpp',
'nsDNSService2.cpp',
'nsIDNService.cpp',
'punycode.c',

View File

@ -26,11 +26,8 @@ const bool kIDNA2008_TransitionalProcessing = false;
#include "ICUUtils.h"
#include "unicode/uscript.h"
static const char16_t sBlocklistChars[] = {
#include "IDNCharacterBlocklist.inc"
};
using namespace mozilla::unicode;
using namespace mozilla::net;
using mozilla::Preferences;
//-----------------------------------------------------------------------------
@ -50,18 +47,16 @@ static const char kACEPrefix[] = "xn--";
#define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
static inline bool
isOnlySafeChars(const nsString& in, const nsTArray<char16_t>& aBlockList)
isOnlySafeChars(const nsString& in, const nsTArray<BlocklistRange>& aBlocklist)
{
if (aBlockList.IsEmpty()) {
if (aBlocklist.IsEmpty()) {
return true;
}
const char16_t* cur = in.BeginReading();
const char16_t* end = in.EndReading();
for (; cur < end; ++cur) {
size_t unused;
if (mozilla::BinarySearch(aBlockList, 0, aBlockList.Length(), *cur,
&unused)) {
if (CharInBlocklist(*cur, aBlocklist)) {
return false;
}
}
@ -97,45 +92,21 @@ nsresult nsIDNService::Init()
Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
prefsChanged(nullptr);
InitializeBlocklist();
InitializeBlocklist(mIDNBlocklist);
return NS_OK;
}
void
nsIDNService::InitializeBlocklist()
{
mIDNBlocklist.Clear();
mIDNBlocklist.AppendElements(sBlocklistChars,
mozilla::ArrayLength(sBlocklistChars));
nsAutoString extraAllowed;
nsresult rv = Preferences::GetString(NS_NET_PREF_EXTRAALLOWED, extraAllowed);
if (NS_SUCCEEDED(rv) && !extraAllowed.IsEmpty()) {
mIDNBlocklist.RemoveElementsBy([&](char16_t c) {
return extraAllowed.FindChar(c, 0) != -1;
});
}
nsAutoString extraBlocked;
rv = Preferences::GetString(NS_NET_PREF_EXTRABLOCKED, extraBlocked);
if (NS_SUCCEEDED(rv) && !extraBlocked.IsEmpty()) {
mIDNBlocklist.AppendElements(
static_cast<const char16_t*>(extraBlocked.Data()), extraBlocked.Length());
mIDNBlocklist.Sort();
}
}
void nsIDNService::prefsChanged(const char *pref)
{
MOZ_ASSERT(NS_IsMainThread());
mLock.AssertCurrentThreadOwns();
if (pref && NS_LITERAL_CSTRING(NS_NET_PREF_EXTRAALLOWED).Equals(pref)) {
InitializeBlocklist();
InitializeBlocklist(mIDNBlocklist);
}
if (pref && NS_LITERAL_CSTRING(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
InitializeBlocklist();
InitializeBlocklist(mIDNBlocklist);
}
if (!pref || NS_LITERAL_CSTRING(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
bool val;

View File

@ -13,6 +13,7 @@
#include "nsWeakReference.h"
#include "unicode/uidna.h"
#include "mozilla/net/IDNBlocklistUtils.h"
#include "nsString.h"
@ -95,8 +96,6 @@ private:
nsresult ACEtoUTF8(const nsACString& input, nsACString& _retval,
stringPrepFlag flag);
void InitializeBlocklist();
bool isInWhitelist(const nsACString &host);
void prefsChanged(const char *pref);
@ -175,7 +174,7 @@ private:
mozilla::Mutex mLock;
// guarded by mLock
nsTArray<char16_t> mIDNBlocklist;
nsTArray<mozilla::net::BlocklistRange> mIDNBlocklist;
/**
* Flag set by the pref network.IDN_show_punycode. When it is true,

View File

@ -480,15 +480,15 @@ NS_EscapeURL(const nsAString& aStr, uint32_t aFlags, nsAString& aResult)
}
// Starting at aStr[aStart] find the first index in aStr that matches any
// character in aForbidden. Return false if not found.
// character that is forbidden by aFunction. Return false if not found.
static bool
FindFirstMatchFrom(const nsString& aStr, size_t aStart,
const nsTArray<char16_t>& aForbidden, size_t* aIndex)
FindFirstMatchFrom(const nsString& aStr,
size_t aStart,
const std::function<bool(char16_t)>& aFunction,
size_t* aIndex)
{
const size_t len = aForbidden.Length();
for (size_t j = aStart, l = aStr.Length(); j < l; ++j) {
size_t unused;
if (mozilla::BinarySearch(aForbidden, 0, len, aStr[j], &unused)) {
if (aFunction(aStr[j])) {
*aIndex = j;
return true;
}
@ -497,13 +497,14 @@ FindFirstMatchFrom(const nsString& aStr, size_t aStart,
}
const nsAString&
NS_EscapeURL(const nsString& aStr, const nsTArray<char16_t>& aForbidden,
NS_EscapeURL(const nsString& aStr,
const std::function<bool(char16_t)>& aFunction,
nsAString& aResult)
{
bool didEscape = false;
for (size_t i = 0, strLen = aStr.Length(); i < strLen; ) {
size_t j;
if (MOZ_UNLIKELY(FindFirstMatchFrom(aStr, i, aForbidden, &j))) {
if (MOZ_UNLIKELY(FindFirstMatchFrom(aStr, i, aFunction, &j))) {
if (i == 0) {
didEscape = true;
aResult.Truncate();

View File

@ -12,6 +12,7 @@
#include "nscore.h"
#include "nsError.h"
#include "nsString.h"
#include <functional>
/**
* Valid mask values for nsEscape
@ -211,14 +212,14 @@ NS_EscapeURL(const nsAString& aStr, uint32_t aFlags, nsAString& aResult);
/**
* Percent-escapes all characters in aStr that occurs in aForbidden.
* @param aStr the input URL string
* @param aForbidden the characters that should be escaped if found in aStr
* @note that aForbidden MUST be sorted (low to high)
* @param aFunction returns true for characters that should be escaped
* @param aResult the result if some characters were escaped
* @return aResult if some characters were escaped, or aStr otherwise (aResult
* is unmodified in that case)
*/
const nsAString&
NS_EscapeURL(const nsString& aStr, const nsTArray<char16_t>& aForbidden,
NS_EscapeURL(const nsString& aStr,
const std::function<bool(char16_t)>& aFunction,
nsAString& aResult);
/**