Bug 1369317 - Filter and escape URI string in only one pass r=mcmanus

MozReview-Commit-ID: KU4C4cS3jZC

--HG--
extra : rebase_source : 4995b4d8a133b8568af5b130be2077ee90f8b4e4
This commit is contained in:
Valentin Gosu 2017-08-13 10:03:34 +02:00
parent c499c0ebcb
commit 97a44a506c
5 changed files with 86 additions and 11 deletions

View File

@ -297,15 +297,10 @@ nsSimpleURI::SetSpec(const nsACString &aSpec)
}
ToLowerCase(mScheme);
// filter out unexpected chars "\r\n\t" if necessary
nsAutoCString filteredSpec;
net_FilterURIString(aSpec, filteredSpec);
// nsSimpleURI currently restricts the charset to US-ASCII
nsAutoCString spec;
rv = NS_EscapeURL(filteredSpec, esc_OnlyNonASCII, spec, fallible);
rv = net_FilterAndEscapeURI(aSpec, esc_OnlyNonASCII, spec);
if (NS_FAILED(rv)) {
return rv;
return rv;
}
int32_t colonPos = spec.FindChar(':');

View File

@ -19,6 +19,7 @@
#include "mozilla/Preferences.h"
#include "prnetdb.h"
#include "mozilla/Tokenizer.h"
#include "nsEscape.h"
using namespace mozilla;
@ -628,6 +629,26 @@ net_FilterURIString(const nsACString& input, nsACString& result)
}
}
nsresult
net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult)
{
aResult.Truncate();
auto start = aInput.BeginReading();
auto end = aInput.EndReading();
// Trim off leading and trailing invalid chars.
auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
auto newStart = std::find_if(start, end, charFilter);
auto newEnd = std::find_if(
std::reverse_iterator<decltype(end)>(end),
std::reverse_iterator<decltype(newStart)>(newStart),
charFilter).base();
const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab();
return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags,
&mask, aResult, fallible);
}
#if defined(XP_WIN)
bool

View File

@ -115,6 +115,18 @@ inline bool net_IsValidScheme(const nsCString& scheme)
*/
void net_FilterURIString(const nsACString& input, nsACString& result);
/**
* This function performs character stripping just like net_FilterURIString,
* with the added benefit of also performing percent escaping of dissallowed
* characters, all in one pass. Saving one pass is very important when operating
* on really large strings.
*
* @param aInput the URL spec we want to filter
* @param aFlags the flags which control which characters we escape
* @param aResult the out param to write to if filtering happens
*/
nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult);
#if defined(XP_WIN)
/**
* On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a

View File

@ -11,6 +11,7 @@
#include "nsTArray.h"
#include "nsCRT.h"
#include "plstr.h"
#include "nsASCIIMask.h"
static const char hexCharsUpper[] = "0123456789ABCDEF";
static const char hexCharsUpperLower[] = "0123456789ABCDEFabcdef";
@ -382,7 +383,8 @@ static uint16_t dontNeedEscape(uint16_t aChar, uint32_t aFlags)
template<class T>
static nsresult
T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
uint32_t aFlags, T& aResult, bool& aDidAppend)
uint32_t aFlags, const ASCIIMaskArray* aFilterMask,
T& aResult, bool& aDidAppend)
{
typedef nsCharTraits<typename T::char_type> traits;
typedef typename traits::unsigned_char_type unsigned_char_type;
@ -409,6 +411,19 @@ T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
for (size_t i = 0; i < aPartLen; ++i) {
unsigned_char_type c = *src++;
// If there is a filter, we wish to skip any characters which match it.
// This is needed so we don't perform an extra pass just to extract the
// filtered characters.
if (aFilterMask && ASCIIMask::IsMasked(*aFilterMask, c)) {
if (!writing) {
if (!aResult.Append(aPart, i, fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
writing = true;
}
continue;
}
// if the char has not to be escaped or whatever follows % is
// a valid escaped string, just copy the char.
//
@ -474,7 +489,7 @@ NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
}
bool result = false;
nsresult rv = T_EscapeURL(aPart, aPartLen, aFlags, aResult, result);
nsresult rv = T_EscapeURL(aPart, aPartLen, aFlags, nullptr, aResult, result);
if (NS_FAILED(rv)) {
::NS_ABORT_OOM(aResult.Length() * sizeof(nsACString::char_type));
}
@ -487,7 +502,26 @@ NS_EscapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult,
const mozilla::fallible_t&)
{
bool appended = false;
nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aResult, appended);
nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, appended);
if (NS_FAILED(rv)) {
aResult.Truncate();
return rv;
}
if (!appended) {
aResult = aStr;
}
return rv;
}
nsresult
NS_EscapeAndFilterURL(const nsACString& aStr, uint32_t aFlags,
const ASCIIMaskArray* aFilterMask,
nsACString& aResult, const mozilla::fallible_t&)
{
bool appended = false;
nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aFilterMask, aResult, appended);
if (NS_FAILED(rv)) {
aResult.Truncate();
return rv;
@ -504,7 +538,7 @@ const nsAString&
NS_EscapeURL(const nsAString& aStr, uint32_t aFlags, nsAString& aResult)
{
bool result = false;
nsresult rv = T_EscapeURL<nsAString>(aStr.Data(), aStr.Length(), aFlags, aResult, result);
nsresult rv = T_EscapeURL<nsAString>(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, result);
if (NS_FAILED(rv)) {
::NS_ABORT_OOM(aResult.Length() * sizeof(nsAString::char_type));

View File

@ -169,6 +169,19 @@ nsresult
NS_EscapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult,
const mozilla::fallible_t&);
// Forward declaration for nsASCIIMask.h
typedef std::array<bool, 128> ASCIIMaskArray;
/**
* The same as NS_EscapeURL, except it also filters out characters that match
* aFilterMask.
*/
nsresult
NS_EscapeAndFilterURL(const nsACString& aStr, uint32_t aFlags,
const ASCIIMaskArray* aFilterMask,
nsACString& aResult, const mozilla::fallible_t&);
inline const nsACString&
NS_UnescapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult)
{