Bug 415491, part 2 - Implement a char16_t version of NS_EscapeURL. r=bzbarsky

This commit is contained in:
Mats Palmgren 2014-11-08 02:42:04 +00:00
parent decd558ea2
commit e7f3e5289f
2 changed files with 128 additions and 77 deletions

View File

@ -4,14 +4,15 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
#include "nsEscape.h"
#include "nsMemory.h"
#include "nsCRT.h"
#include "nsReadableUtils.h"
const int netCharType[256] =
#include "mozilla/ArrayUtils.h"
#include "nsCRT.h"
#include "plstr.h"
static const char hexChars[] = "0123456789ABCDEF";
static const int netCharType[256] =
/* Bit 0 xalpha -- the alphas
** Bit 1 xpalpha -- as xalpha but
** converts spaces to plus and plus to %2B
@ -41,6 +42,31 @@ const int netCharType[256] =
#define IS_OK(C) (netCharType[((unsigned int)(C))] & (aFlags))
#define HEX_ESCAPE '%'
static uint32_t
AppendPercentHex(char* aBuffer, unsigned char aChar)
{
uint32_t i = 0;
aBuffer[i++] = '%';
aBuffer[i++] = hexChars[aChar >> 4]; // high nibble
aBuffer[i++] = hexChars[aChar & 0xF]; // low nibble
return i;
}
static uint32_t
AppendPercentHex(char16_t* aBuffer, char16_t aChar)
{
uint32_t i = 0;
aBuffer[i++] = '%';
if (aChar & 0xff00) {
aBuffer[i++] = 'u';
aBuffer[i++] = hexChars[aChar >> 12]; // high-byte high nibble
aBuffer[i++] = hexChars[(aChar >> 8) & 0xF]; // high-byte low nibble
}
aBuffer[i++] = hexChars[(aChar >> 4) & 0xF]; // low-byte high nibble
aBuffer[i++] = hexChars[aChar & 0xF]; // low-byte low nibble
return i;
}
//----------------------------------------------------------------------------------------
static char*
nsEscapeCount(const char* aStr, nsEscapeMask aFlags, size_t* aOutLen)
@ -52,7 +78,6 @@ nsEscapeCount(const char* aStr, nsEscapeMask aFlags, size_t* aOutLen)
size_t len = 0;
size_t charsToEscape = 0;
static const char hexChars[] = "0123456789ABCDEF";
const unsigned char* src = (const unsigned char*)aStr;
while (*src) {
@ -310,78 +335,78 @@ nsEscapeHTML2(const char16_t* aSourceBuffer, int32_t aSourceBufferLen)
}
//----------------------------------------------------------------------------------------
//
// The following table encodes which characters needs to be escaped for which
// parts of an URL. The bits are the "url components" in the enum EscapeMask,
// see nsEscape.h.
//
// esc_Scheme = 1
// esc_Username = 2
// esc_Password = 4
// esc_Host = 8
// esc_Directory = 16
// esc_FileBaseName = 32
// esc_FileExtension = 64
// esc_Param = 128
// esc_Query = 256
// esc_Ref = 512
const int EscapeChars[256] =
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
static const uint32_t EscapeChars[256] =
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 0, 896, 0, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
0 /* 8x DEL */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, // 2x !"#$%&'()*+,-./
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, // 3x 0123456789:;<=>?
1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, // 4x @ABCDEFGHIJKLMNO
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 0, 896, 0, 896,1023, // 5x PQRSTUVWXYZ[\]^_
0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, // 6x `abcdefghijklmno
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, // 7x pqrstuvwxyz{|}~ DEL
0 // 80 to FF are zero
};
#define NO_NEED_ESC(C) (EscapeChars[((unsigned int)(C))] & (aFlags))
static uint16_t dontNeedEscape(unsigned char aChar, uint32_t aFlags)
{
return EscapeChars[(uint32_t)aChar] & aFlags;
}
static uint16_t dontNeedEscape(uint16_t aChar, uint32_t aFlags)
{
return aChar < mozilla::ArrayLength(EscapeChars) ?
(EscapeChars[(uint32_t)aChar] & aFlags) : 0;
}
//----------------------------------------------------------------------------------------
/* returns an escaped string */
/* use the following flags to specify which
part of an URL you want to escape:
esc_Scheme = 1
esc_Username = 2
esc_Password = 4
esc_Host = 8
esc_Directory = 16
esc_FileBaseName = 32
esc_FileExtension = 64
esc_Param = 128
esc_Query = 256
esc_Ref = 512
*/
/* by default this function will not escape parts of a string
that already look escaped, which means it already includes
a valid hexcode. This is done to avoid multiple escapes of
a string. Use the following flags to force escaping of a
string:
esc_Forced = 1024
*/
bool
NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
nsACString& aResult)
template<class T>
static bool
T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
uint32_t aFlags, T& aResult)
{
typedef nsCharTraits<typename T::char_type> traits;
typedef typename traits::unsigned_char_type unsigned_char_type;
static_assert(sizeof(*aPart) == 1 || sizeof(*aPart) == 2,
"unexpected char type");
if (!aPart) {
NS_NOTREACHED("null pointer");
return false;
}
static const char hexChars[] = "0123456789ABCDEF";
if (aPartLen < 0) {
aPartLen = strlen(aPart);
}
const uint32_t ENCODE_MAX_LEN = 6; // %uABCD
bool forced = !!(aFlags & esc_Forced);
bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII);
bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
bool writing = !!(aFlags & esc_AlwaysCopy);
bool colon = !!(aFlags & esc_Colon);
const unsigned char* src = (const unsigned char*)aPart;
auto src = reinterpret_cast<const unsigned_char_type*>(aPart);
char tempBuffer[100];
typename T::char_type tempBuffer[100];
unsigned int tempBufferPos = 0;
bool previousIsNonASCII = false;
for (int i = 0; i < aPartLen; ++i) {
unsigned char c = *src++;
for (size_t i = 0; i < aPartLen; ++i) {
unsigned_char_type c = *src++;
// if the char has not to be escaped or whatever follows % is
// a valid escaped string, just copy the char.
@ -399,7 +424,7 @@ NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
//
// 0x20..0x7e are the valid ASCII characters. We also escape spaces
// (0x20) since they are not legal in URLs.
if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
if ((dontNeedEscape(c, aFlags) || (c == HEX_ESCAPE && !forced)
|| (c > 0x7f && ignoreNonAscii)
|| (c > 0x20 && c < 0x7f && ignoreAscii))
&& !(c == ':' && colon)
@ -412,27 +437,45 @@ NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
aResult.Append(aPart, i);
writing = true;
}
tempBuffer[tempBufferPos++] = HEX_ESCAPE;
tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
uint32_t len = ::AppendPercentHex(tempBuffer + tempBufferPos, c);
tempBufferPos += len;
MOZ_ASSERT(len <= ENCODE_MAX_LEN, "potential buffer overflow");
}
if (tempBufferPos >= sizeof(tempBuffer) - 4) {
// Flush the temp buffer if it doesnt't have room for another encoded char.
if (tempBufferPos >= mozilla::ArrayLength(tempBuffer) - ENCODE_MAX_LEN) {
NS_ASSERTION(writing, "should be writing");
tempBuffer[tempBufferPos] = '\0';
aResult += tempBuffer;
aResult.Append(tempBuffer, tempBufferPos);
tempBufferPos = 0;
}
previousIsNonASCII = (c > 0x7f);
}
if (writing) {
tempBuffer[tempBufferPos] = '\0';
aResult += tempBuffer;
aResult.Append(tempBuffer, tempBufferPos);
}
return writing;
}
bool
NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
nsACString& aResult)
{
if (aPartLen < 0) {
aPartLen = strlen(aPart);
}
return T_EscapeURL(aPart, aPartLen, aFlags, aResult);
}
const nsSubstring&
NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult)
{
if (T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aResult)) {
return aResult;
}
return aStr;
}
#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
bool

View File

@ -100,15 +100,19 @@ enum EscapeMask {
* NS_EscapeURL
*
* Escapes invalid char's in an URL segment. Has no side-effect if the URL
* segment is already escaped. Otherwise, the escaped URL segment is appended
* to |result|.
* segment is already escaped, unless aFlags has the esc_Forced bit in which
* case % will also be escaped. Iff some part of aStr is escaped is the
* final result appended to aResult. You can also request that aStr is
* always appended to aResult with esc_AlwaysCopy.
*
* @param str url segment string
* @param len url segment string length (-1 if unknown)
* @param flags url segment type flag
* @param result result buffer, untouched if part is already escaped
* @param aStr url segment string
* @param aLen url segment string length (-1 if unknown)
* @param aFlags url segment type flag (see EscapeMask above)
* @param aResult result buffer, untouched if aStr is already escaped unless
* aFlags has esc_AlwaysCopy
*
* @return TRUE if escaping was performed, FALSE otherwise.
* @return true if aResult was written to (i.e. at least one character was
* escaped or esc_AlwaysCopy was requested), false otherwise.
*/
bool NS_EscapeURL(const char* aStr,
int32_t aLen,
@ -118,13 +122,15 @@ bool NS_EscapeURL(const char* aStr,
/**
* Expands URL escape sequences... beware embedded null bytes!
*
* @param str url string to unescape
* @param len length of |str|
* @param flags only esc_OnlyNonASCII, esc_SkipControl and esc_AlwaysCopy
* @param aStr url string to unescape
* @param aLen length of aStr
* @param aFlags only esc_OnlyNonASCII, esc_SkipControl and esc_AlwaysCopy
* are recognized
* @param result result buffer, untouched if |str| is already unescaped
* @param aResult result buffer, untouched if aStr is already unescaped unless
* aFlags has esc_AlwaysCopy
*
* @return TRUE if unescaping was performed, FALSE otherwise.
* @return true if aResult was written to (i.e. at least one character was
* unescaped or esc_AlwaysCopy was requested), false otherwise.
*/
bool NS_UnescapeURL(const char* aStr,
int32_t aLen,
@ -157,6 +163,8 @@ NS_UnescapeURL(const nsCSubstring& aStr, uint32_t aFlags, nsCSubstring& aResult)
}
return aStr;
}
const nsSubstring&
NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult);
/**
* CString version of nsEscape. Returns true on success, false