mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-26 22:32:46 +00:00
Bug 415491, part 2 - Implement a char16_t version of NS_EscapeURL. r=bzbarsky
This commit is contained in:
parent
decd558ea2
commit
e7f3e5289f
@ -4,14 +4,15 @@
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
|
||||
|
||||
#include "nsEscape.h"
|
||||
#include "nsMemory.h"
|
||||
#include "nsCRT.h"
|
||||
#include "nsReadableUtils.h"
|
||||
|
||||
const int netCharType[256] =
|
||||
#include "mozilla/ArrayUtils.h"
|
||||
#include "nsCRT.h"
|
||||
#include "plstr.h"
|
||||
|
||||
static const char hexChars[] = "0123456789ABCDEF";
|
||||
|
||||
static const int netCharType[256] =
|
||||
/* Bit 0 xalpha -- the alphas
|
||||
** Bit 1 xpalpha -- as xalpha but
|
||||
** converts spaces to plus and plus to %2B
|
||||
@ -41,6 +42,31 @@ const int netCharType[256] =
|
||||
#define IS_OK(C) (netCharType[((unsigned int)(C))] & (aFlags))
|
||||
#define HEX_ESCAPE '%'
|
||||
|
||||
static uint32_t
|
||||
AppendPercentHex(char* aBuffer, unsigned char aChar)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
aBuffer[i++] = '%';
|
||||
aBuffer[i++] = hexChars[aChar >> 4]; // high nibble
|
||||
aBuffer[i++] = hexChars[aChar & 0xF]; // low nibble
|
||||
return i;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
AppendPercentHex(char16_t* aBuffer, char16_t aChar)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
aBuffer[i++] = '%';
|
||||
if (aChar & 0xff00) {
|
||||
aBuffer[i++] = 'u';
|
||||
aBuffer[i++] = hexChars[aChar >> 12]; // high-byte high nibble
|
||||
aBuffer[i++] = hexChars[(aChar >> 8) & 0xF]; // high-byte low nibble
|
||||
}
|
||||
aBuffer[i++] = hexChars[(aChar >> 4) & 0xF]; // low-byte high nibble
|
||||
aBuffer[i++] = hexChars[aChar & 0xF]; // low-byte low nibble
|
||||
return i;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
static char*
|
||||
nsEscapeCount(const char* aStr, nsEscapeMask aFlags, size_t* aOutLen)
|
||||
@ -52,7 +78,6 @@ nsEscapeCount(const char* aStr, nsEscapeMask aFlags, size_t* aOutLen)
|
||||
|
||||
size_t len = 0;
|
||||
size_t charsToEscape = 0;
|
||||
static const char hexChars[] = "0123456789ABCDEF";
|
||||
|
||||
const unsigned char* src = (const unsigned char*)aStr;
|
||||
while (*src) {
|
||||
@ -310,78 +335,78 @@ nsEscapeHTML2(const char16_t* aSourceBuffer, int32_t aSourceBufferLen)
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//
|
||||
// The following table encodes which characters needs to be escaped for which
|
||||
// parts of an URL. The bits are the "url components" in the enum EscapeMask,
|
||||
// see nsEscape.h.
|
||||
//
|
||||
// esc_Scheme = 1
|
||||
// esc_Username = 2
|
||||
// esc_Password = 4
|
||||
// esc_Host = 8
|
||||
// esc_Directory = 16
|
||||
// esc_FileBaseName = 32
|
||||
// esc_FileExtension = 64
|
||||
// esc_Param = 128
|
||||
// esc_Query = 256
|
||||
// esc_Ref = 512
|
||||
|
||||
const int EscapeChars[256] =
|
||||
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
||||
static const uint32_t EscapeChars[256] =
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
|
||||
0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
|
||||
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
|
||||
1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
|
||||
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 0, 896, 0, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
|
||||
0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
|
||||
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
|
||||
0 /* 8x DEL */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
||||
0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, // 2x !"#$%&'()*+,-./
|
||||
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, // 3x 0123456789:;<=>?
|
||||
1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, // 4x @ABCDEFGHIJKLMNO
|
||||
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 0, 896, 0, 896,1023, // 5x PQRSTUVWXYZ[\]^_
|
||||
0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, // 6x `abcdefghijklmno
|
||||
1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, // 7x pqrstuvwxyz{|}~ DEL
|
||||
0 // 80 to FF are zero
|
||||
};
|
||||
|
||||
#define NO_NEED_ESC(C) (EscapeChars[((unsigned int)(C))] & (aFlags))
|
||||
static uint16_t dontNeedEscape(unsigned char aChar, uint32_t aFlags)
|
||||
{
|
||||
return EscapeChars[(uint32_t)aChar] & aFlags;
|
||||
}
|
||||
static uint16_t dontNeedEscape(uint16_t aChar, uint32_t aFlags)
|
||||
{
|
||||
return aChar < mozilla::ArrayLength(EscapeChars) ?
|
||||
(EscapeChars[(uint32_t)aChar] & aFlags) : 0;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
|
||||
/* returns an escaped string */
|
||||
|
||||
/* use the following flags to specify which
|
||||
part of an URL you want to escape:
|
||||
|
||||
esc_Scheme = 1
|
||||
esc_Username = 2
|
||||
esc_Password = 4
|
||||
esc_Host = 8
|
||||
esc_Directory = 16
|
||||
esc_FileBaseName = 32
|
||||
esc_FileExtension = 64
|
||||
esc_Param = 128
|
||||
esc_Query = 256
|
||||
esc_Ref = 512
|
||||
*/
|
||||
|
||||
/* by default this function will not escape parts of a string
|
||||
that already look escaped, which means it already includes
|
||||
a valid hexcode. This is done to avoid multiple escapes of
|
||||
a string. Use the following flags to force escaping of a
|
||||
string:
|
||||
|
||||
esc_Forced = 1024
|
||||
*/
|
||||
|
||||
bool
|
||||
NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
|
||||
nsACString& aResult)
|
||||
template<class T>
|
||||
static bool
|
||||
T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
|
||||
uint32_t aFlags, T& aResult)
|
||||
{
|
||||
typedef nsCharTraits<typename T::char_type> traits;
|
||||
typedef typename traits::unsigned_char_type unsigned_char_type;
|
||||
static_assert(sizeof(*aPart) == 1 || sizeof(*aPart) == 2,
|
||||
"unexpected char type");
|
||||
|
||||
if (!aPart) {
|
||||
NS_NOTREACHED("null pointer");
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char hexChars[] = "0123456789ABCDEF";
|
||||
if (aPartLen < 0) {
|
||||
aPartLen = strlen(aPart);
|
||||
}
|
||||
const uint32_t ENCODE_MAX_LEN = 6; // %uABCD
|
||||
bool forced = !!(aFlags & esc_Forced);
|
||||
bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII);
|
||||
bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
|
||||
bool writing = !!(aFlags & esc_AlwaysCopy);
|
||||
bool colon = !!(aFlags & esc_Colon);
|
||||
|
||||
const unsigned char* src = (const unsigned char*)aPart;
|
||||
auto src = reinterpret_cast<const unsigned_char_type*>(aPart);
|
||||
|
||||
char tempBuffer[100];
|
||||
typename T::char_type tempBuffer[100];
|
||||
unsigned int tempBufferPos = 0;
|
||||
|
||||
bool previousIsNonASCII = false;
|
||||
for (int i = 0; i < aPartLen; ++i) {
|
||||
unsigned char c = *src++;
|
||||
for (size_t i = 0; i < aPartLen; ++i) {
|
||||
unsigned_char_type c = *src++;
|
||||
|
||||
// if the char has not to be escaped or whatever follows % is
|
||||
// a valid escaped string, just copy the char.
|
||||
@ -399,7 +424,7 @@ NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
|
||||
//
|
||||
// 0x20..0x7e are the valid ASCII characters. We also escape spaces
|
||||
// (0x20) since they are not legal in URLs.
|
||||
if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
|
||||
if ((dontNeedEscape(c, aFlags) || (c == HEX_ESCAPE && !forced)
|
||||
|| (c > 0x7f && ignoreNonAscii)
|
||||
|| (c > 0x20 && c < 0x7f && ignoreAscii))
|
||||
&& !(c == ':' && colon)
|
||||
@ -412,27 +437,45 @@ NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
|
||||
aResult.Append(aPart, i);
|
||||
writing = true;
|
||||
}
|
||||
tempBuffer[tempBufferPos++] = HEX_ESCAPE;
|
||||
tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
|
||||
tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
|
||||
uint32_t len = ::AppendPercentHex(tempBuffer + tempBufferPos, c);
|
||||
tempBufferPos += len;
|
||||
MOZ_ASSERT(len <= ENCODE_MAX_LEN, "potential buffer overflow");
|
||||
}
|
||||
|
||||
if (tempBufferPos >= sizeof(tempBuffer) - 4) {
|
||||
// Flush the temp buffer if it doesnt't have room for another encoded char.
|
||||
if (tempBufferPos >= mozilla::ArrayLength(tempBuffer) - ENCODE_MAX_LEN) {
|
||||
NS_ASSERTION(writing, "should be writing");
|
||||
tempBuffer[tempBufferPos] = '\0';
|
||||
aResult += tempBuffer;
|
||||
aResult.Append(tempBuffer, tempBufferPos);
|
||||
tempBufferPos = 0;
|
||||
}
|
||||
|
||||
previousIsNonASCII = (c > 0x7f);
|
||||
}
|
||||
if (writing) {
|
||||
tempBuffer[tempBufferPos] = '\0';
|
||||
aResult += tempBuffer;
|
||||
aResult.Append(tempBuffer, tempBufferPos);
|
||||
}
|
||||
return writing;
|
||||
}
|
||||
|
||||
bool
|
||||
NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
|
||||
nsACString& aResult)
|
||||
{
|
||||
if (aPartLen < 0) {
|
||||
aPartLen = strlen(aPart);
|
||||
}
|
||||
return T_EscapeURL(aPart, aPartLen, aFlags, aResult);
|
||||
}
|
||||
|
||||
const nsSubstring&
|
||||
NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult)
|
||||
{
|
||||
if (T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aResult)) {
|
||||
return aResult;
|
||||
}
|
||||
return aStr;
|
||||
}
|
||||
|
||||
#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
|
||||
|
||||
bool
|
||||
|
@ -100,15 +100,19 @@ enum EscapeMask {
|
||||
* NS_EscapeURL
|
||||
*
|
||||
* Escapes invalid char's in an URL segment. Has no side-effect if the URL
|
||||
* segment is already escaped. Otherwise, the escaped URL segment is appended
|
||||
* to |result|.
|
||||
* segment is already escaped, unless aFlags has the esc_Forced bit in which
|
||||
* case % will also be escaped. Iff some part of aStr is escaped is the
|
||||
* final result appended to aResult. You can also request that aStr is
|
||||
* always appended to aResult with esc_AlwaysCopy.
|
||||
*
|
||||
* @param str url segment string
|
||||
* @param len url segment string length (-1 if unknown)
|
||||
* @param flags url segment type flag
|
||||
* @param result result buffer, untouched if part is already escaped
|
||||
* @param aStr url segment string
|
||||
* @param aLen url segment string length (-1 if unknown)
|
||||
* @param aFlags url segment type flag (see EscapeMask above)
|
||||
* @param aResult result buffer, untouched if aStr is already escaped unless
|
||||
* aFlags has esc_AlwaysCopy
|
||||
*
|
||||
* @return TRUE if escaping was performed, FALSE otherwise.
|
||||
* @return true if aResult was written to (i.e. at least one character was
|
||||
* escaped or esc_AlwaysCopy was requested), false otherwise.
|
||||
*/
|
||||
bool NS_EscapeURL(const char* aStr,
|
||||
int32_t aLen,
|
||||
@ -118,13 +122,15 @@ bool NS_EscapeURL(const char* aStr,
|
||||
/**
|
||||
* Expands URL escape sequences... beware embedded null bytes!
|
||||
*
|
||||
* @param str url string to unescape
|
||||
* @param len length of |str|
|
||||
* @param flags only esc_OnlyNonASCII, esc_SkipControl and esc_AlwaysCopy
|
||||
* @param aStr url string to unescape
|
||||
* @param aLen length of aStr
|
||||
* @param aFlags only esc_OnlyNonASCII, esc_SkipControl and esc_AlwaysCopy
|
||||
* are recognized
|
||||
* @param result result buffer, untouched if |str| is already unescaped
|
||||
* @param aResult result buffer, untouched if aStr is already unescaped unless
|
||||
* aFlags has esc_AlwaysCopy
|
||||
*
|
||||
* @return TRUE if unescaping was performed, FALSE otherwise.
|
||||
* @return true if aResult was written to (i.e. at least one character was
|
||||
* unescaped or esc_AlwaysCopy was requested), false otherwise.
|
||||
*/
|
||||
bool NS_UnescapeURL(const char* aStr,
|
||||
int32_t aLen,
|
||||
@ -157,6 +163,8 @@ NS_UnescapeURL(const nsCSubstring& aStr, uint32_t aFlags, nsCSubstring& aResult)
|
||||
}
|
||||
return aStr;
|
||||
}
|
||||
const nsSubstring&
|
||||
NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult);
|
||||
|
||||
/**
|
||||
* CString version of nsEscape. Returns true on success, false
|
||||
|
Loading…
Reference in New Issue
Block a user