/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsEscape.h" #include "mozilla/ArrayUtils.h" #include "mozilla/BinarySearch.h" #include "nsTArray.h" #include "nsCRT.h" #include "plstr.h" static const char hexChars[] = "0123456789ABCDEF"; static const int netCharType[256] = /* Bit 0 xalpha -- the alphas ** Bit 1 xpalpha -- as xalpha but ** converts spaces to plus and plus to %2B ** Bit 3 ... path -- as xalphas but doesn't escape '/' */ /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */ 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */ 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */ /* bits for '@' changed from 7 to 0 so '@' can be escaped */ /* in usernames and passwords in publishing. */ 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */ 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */ 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */ 0, }; /* decode % escaped hex codes into character values */ #define UNHEX(C) \ ((C >= '0' && C <= '9') ? C - '0' : \ ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \ ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0))) #define IS_OK(C) (netCharType[((unsigned int)(C))] & (aFlags)) #define HEX_ESCAPE '%' static const uint32_t ENCODE_MAX_LEN = 6; // %uABCD static uint32_t AppendPercentHex(char* aBuffer, unsigned char aChar) { uint32_t i = 0; aBuffer[i++] = '%'; aBuffer[i++] = hexChars[aChar >> 4]; // high nibble aBuffer[i++] = hexChars[aChar & 0xF]; // low nibble return i; } static uint32_t AppendPercentHex(char16_t* aBuffer, char16_t aChar) { uint32_t i = 0; aBuffer[i++] = '%'; if (aChar & 0xff00) { aBuffer[i++] = 'u'; aBuffer[i++] = hexChars[aChar >> 12]; // high-byte high nibble aBuffer[i++] = hexChars[(aChar >> 8) & 0xF]; // high-byte low nibble } aBuffer[i++] = hexChars[(aChar >> 4) & 0xF]; // low-byte high nibble aBuffer[i++] = hexChars[aChar & 0xF]; // low-byte low nibble return i; } //---------------------------------------------------------------------------------------- static char* nsEscapeCount(const char* aStr, nsEscapeMask aFlags, size_t* aOutLen) //---------------------------------------------------------------------------------------- { if (!aStr) { return 0; } size_t len = 0; size_t charsToEscape = 0; const unsigned char* src = (const unsigned char*)aStr; while (*src) { len++; if (!IS_OK(*src++)) { charsToEscape++; } } // calculate how much memory should be allocated // original length + 2 bytes for each escaped character + terminating '\0' // do the sum in steps to check for overflow size_t dstSize = len + 1 + charsToEscape; if (dstSize <= len) { return 0; } dstSize += charsToEscape; if (dstSize < len) { return 0; } // fail if we need more than 4GB if (dstSize > UINT32_MAX) { return 0; } char* result = (char*)moz_xmalloc(dstSize); if (!result) { return 0; } unsigned char* dst = (unsigned char*)result; src = (const unsigned char*)aStr; if (aFlags == url_XPAlphas) { for (size_t i = 0; i < len; ++i) { unsigned char c = *src++; if (IS_OK(c)) { *dst++ = c; } else if (c == ' ') { *dst++ = '+'; /* convert spaces to pluses */ } else { *dst++ = HEX_ESCAPE; *dst++ = hexChars[c >> 4]; /* high nibble */ *dst++ = hexChars[c & 0x0f]; /* low nibble */ } } } else { for (size_t i = 0; i < len; ++i) { unsigned char c = *src++; if (IS_OK(c)) { *dst++ = c; } else { *dst++ = HEX_ESCAPE; *dst++ = hexChars[c >> 4]; /* high nibble */ *dst++ = hexChars[c & 0x0f]; /* low nibble */ } } } *dst = '\0'; /* tack on eos */ if (aOutLen) { *aOutLen = dst - (unsigned char*)result; } return result; } //---------------------------------------------------------------------------------------- char* nsEscape(const char* aStr, nsEscapeMask aFlags) //---------------------------------------------------------------------------------------- { if (!aStr) { return nullptr; } return nsEscapeCount(aStr, aFlags, nullptr); } //---------------------------------------------------------------------------------------- char* nsUnescape(char* aStr) //---------------------------------------------------------------------------------------- { nsUnescapeCount(aStr); return aStr; } //---------------------------------------------------------------------------------------- int32_t nsUnescapeCount(char* aStr) //---------------------------------------------------------------------------------------- { char* src = aStr; char* dst = aStr; static const char hexChars[] = "0123456789ABCDEFabcdef"; char c1[] = " "; char c2[] = " "; char* const pc1 = c1; char* const pc2 = c2; if (!*src) { // A null string was passed in. Nothing to escape. // Returns early as the string might not actually be mutable with // length 0. return 0; } while (*src) { c1[0] = *(src + 1); if (*(src + 1) == '\0') { c2[0] = '\0'; } else { c2[0] = *(src + 2); } if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 || PL_strpbrk(pc2, hexChars) == 0) { *dst++ = *src++; } else { src++; /* walk over escape */ if (*src) { *dst = UNHEX(*src) << 4; src++; } if (*src) { *dst = (*dst + UNHEX(*src)); src++; } dst++; } } *dst = 0; return (int)(dst - aStr); } /* NET_UnEscapeCnt */ char* nsEscapeHTML(const char* aString) { char* rv = nullptr; /* XXX Hardcoded max entity len. The +1 is for the trailing null. */ uint32_t len = strlen(aString); if (len >= (UINT32_MAX / 6)) { return nullptr; } rv = (char*)moz_xmalloc((6 * len) + 1); char* ptr = rv; if (rv) { for (; *aString != '\0'; ++aString) { if (*aString == '<') { *ptr++ = '&'; *ptr++ = 'l'; *ptr++ = 't'; *ptr++ = ';'; } else if (*aString == '>') { *ptr++ = '&'; *ptr++ = 'g'; *ptr++ = 't'; *ptr++ = ';'; } else if (*aString == '&') { *ptr++ = '&'; *ptr++ = 'a'; *ptr++ = 'm'; *ptr++ = 'p'; *ptr++ = ';'; } else if (*aString == '"') { *ptr++ = '&'; *ptr++ = 'q'; *ptr++ = 'u'; *ptr++ = 'o'; *ptr++ = 't'; *ptr++ = ';'; } else if (*aString == '\'') { *ptr++ = '&'; *ptr++ = '#'; *ptr++ = '3'; *ptr++ = '9'; *ptr++ = ';'; } else { *ptr++ = *aString; } } *ptr = '\0'; } return rv; } char16_t* nsEscapeHTML2(const char16_t* aSourceBuffer, int32_t aSourceBufferLen) { // Calculate the length, if the caller didn't. if (aSourceBufferLen < 0) { aSourceBufferLen = NS_strlen(aSourceBuffer); } /* XXX Hardcoded max entity len. */ if (uint32_t(aSourceBufferLen) >= ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t)))) { return nullptr; } char16_t* resultBuffer = (char16_t*)moz_xmalloc( aSourceBufferLen * 6 * sizeof(char16_t) + sizeof(char16_t('\0'))); char16_t* ptr = resultBuffer; if (resultBuffer) { int32_t i; for (i = 0; i < aSourceBufferLen; ++i) { if (aSourceBuffer[i] == '<') { *ptr++ = '&'; *ptr++ = 'l'; *ptr++ = 't'; *ptr++ = ';'; } else if (aSourceBuffer[i] == '>') { *ptr++ = '&'; *ptr++ = 'g'; *ptr++ = 't'; *ptr++ = ';'; } else if (aSourceBuffer[i] == '&') { *ptr++ = '&'; *ptr++ = 'a'; *ptr++ = 'm'; *ptr++ = 'p'; *ptr++ = ';'; } else if (aSourceBuffer[i] == '"') { *ptr++ = '&'; *ptr++ = 'q'; *ptr++ = 'u'; *ptr++ = 'o'; *ptr++ = 't'; *ptr++ = ';'; } else if (aSourceBuffer[i] == '\'') { *ptr++ = '&'; *ptr++ = '#'; *ptr++ = '3'; *ptr++ = '9'; *ptr++ = ';'; } else { *ptr++ = aSourceBuffer[i]; } } *ptr = 0; } return resultBuffer; } //---------------------------------------------------------------------------------------- // // The following table encodes which characters needs to be escaped for which // parts of an URL. The bits are the "url components" in the enum EscapeMask, // see nsEscape.h. // // esc_Scheme = 1 // esc_Username = 2 // esc_Password = 4 // esc_Host = 8 // esc_Directory = 16 // esc_FileBaseName = 32 // esc_FileExtension = 64 // esc_Param = 128 // esc_Query = 256 // esc_Ref = 512 static const uint32_t EscapeChars[256] = // 0 1 2 3 4 5 6 7 8 9 A B C D E F { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x 0,1023, 0, 512,1023, 0,1023, 112,1023,1023,1023,1023,1023,1023, 953, 784, // 2x !"#$%&'()*+,-./ 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, // 3x 0123456789:;<=>? 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, // 4x @ABCDEFGHIJKLMNO 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, // 5x PQRSTUVWXYZ[\]^_ 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, // 6x `abcdefghijklmno 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, // 7x pqrstuvwxyz{|}~ DEL 0 // 80 to FF are zero }; static uint16_t dontNeedEscape(unsigned char aChar, uint32_t aFlags) { return EscapeChars[(uint32_t)aChar] & aFlags; } static uint16_t dontNeedEscape(uint16_t aChar, uint32_t aFlags) { return aChar < mozilla::ArrayLength(EscapeChars) ? (EscapeChars[(uint32_t)aChar] & aFlags) : 0; } //---------------------------------------------------------------------------------------- template static bool T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen, uint32_t aFlags, T& aResult) { typedef nsCharTraits traits; typedef typename traits::unsigned_char_type unsigned_char_type; static_assert(sizeof(*aPart) == 1 || sizeof(*aPart) == 2, "unexpected char type"); if (!aPart) { NS_NOTREACHED("null pointer"); return false; } bool forced = !!(aFlags & esc_Forced); bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII); bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII); bool writing = !!(aFlags & esc_AlwaysCopy); bool colon = !!(aFlags & esc_Colon); auto src = reinterpret_cast(aPart); typename T::char_type tempBuffer[100]; unsigned int tempBufferPos = 0; bool previousIsNonASCII = false; for (size_t i = 0; i < aPartLen; ++i) { unsigned_char_type c = *src++; // if the char has not to be escaped or whatever follows % is // a valid escaped string, just copy the char. // // Also the % will not be escaped until forced // See bugzilla bug 61269 for details why we changed this // // And, we will not escape non-ascii characters if requested. // On special request we will also escape the colon even when // not covered by the matrix. // ignoreAscii is not honored for control characters (C0 and DEL) // // And, we should escape the '|' character when it occurs after any // non-ASCII character as it may be aPart of a multi-byte character. // // 0x20..0x7e are the valid ASCII characters. We also escape spaces // (0x20) since they are not legal in URLs. if ((dontNeedEscape(c, aFlags) || (c == HEX_ESCAPE && !forced) || (c > 0x7f && ignoreNonAscii) || (c > 0x20 && c < 0x7f && ignoreAscii)) && !(c == ':' && colon) && !(previousIsNonASCII && c == '|' && !ignoreNonAscii)) { if (writing) { tempBuffer[tempBufferPos++] = c; } } else { /* do the escape magic */ if (!writing) { aResult.Append(aPart, i); writing = true; } uint32_t len = ::AppendPercentHex(tempBuffer + tempBufferPos, c); tempBufferPos += len; MOZ_ASSERT(len <= ENCODE_MAX_LEN, "potential buffer overflow"); } // Flush the temp buffer if it doesnt't have room for another encoded char. if (tempBufferPos >= mozilla::ArrayLength(tempBuffer) - ENCODE_MAX_LEN) { NS_ASSERTION(writing, "should be writing"); aResult.Append(tempBuffer, tempBufferPos); tempBufferPos = 0; } previousIsNonASCII = (c > 0x7f); } if (writing) { aResult.Append(tempBuffer, tempBufferPos); } return writing; } bool NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags, nsACString& aResult) { if (aPartLen < 0) { aPartLen = strlen(aPart); } return T_EscapeURL(aPart, aPartLen, aFlags, aResult); } const nsSubstring& NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult) { if (T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aResult)) { return aResult; } return aStr; } // Starting at aStr[aStart] find the first index in aStr that matches any // character in aForbidden. Return false if not found. static bool FindFirstMatchFrom(const nsAFlatString& aStr, size_t aStart, const nsTArray& aForbidden, size_t* aIndex) { const size_t len = aForbidden.Length(); for (size_t j = aStart, l = aStr.Length(); j < l; ++j) { size_t unused; if (mozilla::BinarySearch(aForbidden, 0, len, aStr[j], &unused)) { *aIndex = j; return true; } } return false; } const nsSubstring& NS_EscapeURL(const nsAFlatString& aStr, const nsTArray& aForbidden, nsSubstring& aResult) { bool didEscape = false; for (size_t i = 0, len = aStr.Length(); i < len; ) { size_t j; if (MOZ_UNLIKELY(FindFirstMatchFrom(aStr, i, aForbidden, &j))) { if (i == 0) { didEscape = true; aResult.Truncate(); aResult.SetCapacity(aStr.Length()); } if (j != i) { // The substring from 'i' up to 'j' that needs no escaping. aResult.Append(nsDependentSubstring(aStr, i, j - i)); } char16_t buffer[ENCODE_MAX_LEN]; uint32_t len = ::AppendPercentHex(buffer, aStr[j]); MOZ_ASSERT(len <= ENCODE_MAX_LEN, "buffer overflow"); aResult.Append(buffer, len); i = j + 1; } else { if (MOZ_UNLIKELY(didEscape)) { // The tail of the string that needs no escaping. aResult.Append(nsDependentSubstring(aStr, i, len - i)); } break; } } if (MOZ_UNLIKELY(didEscape)) { return aResult; } return aStr; } #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1) bool NS_UnescapeURL(const char* aStr, int32_t aLen, uint32_t aFlags, nsACString& aResult) { if (!aStr) { NS_NOTREACHED("null pointer"); return false; } if (aLen < 0) { aLen = strlen(aStr); } bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII); bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII); bool writing = !!(aFlags & esc_AlwaysCopy); bool skipControl = !!(aFlags & esc_SkipControl); static const char hexChars[] = "0123456789ABCDEFabcdef"; const char* last = aStr; const char* p = aStr; for (int i = 0; i < aLen; ++i, ++p) { //printf("%c [i=%d of aLen=%d]\n", *p, i, aLen); if (*p == HEX_ESCAPE && i < aLen - 2) { unsigned char* p1 = (unsigned char*)p + 1; unsigned char* p2 = (unsigned char*)p + 2; if (ISHEX(*p1) && ISHEX(*p2) && ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) && !(skipControl && (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) { //printf("- p1=%c p2=%c\n", *p1, *p2); writing = true; if (p > last) { //printf("- p=%p, last=%p\n", p, last); aResult.Append(last, p - last); last = p; } char u = (UNHEX(*p1) << 4) + UNHEX(*p2); //printf("- u=%c\n", u); aResult.Append(u); i += 2; p += 2; last += 3; } } } if (writing && last < aStr + aLen) { aResult.Append(last, aStr + aLen - last); } return writing; }