mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-08 22:08:16 +00:00
c193518677
Differential Revision: https://phabricator.services.mozilla.com/D43957 --HG-- extra : moz-landing-system : lando
1319 lines
41 KiB
C++
1319 lines
41 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set sw=2 ts=8 et tw=80 : */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include <string.h>
|
|
#include "prprf.h"
|
|
#include "plstr.h"
|
|
#include "prmem.h"
|
|
#include "plbase64.h"
|
|
#include "nsCRT.h"
|
|
#include "nsMemory.h"
|
|
#include "nsTArray.h"
|
|
#include "nsCOMPtr.h"
|
|
#include "nsEscape.h"
|
|
#include "nsIServiceManager.h"
|
|
#include "nsMIMEHeaderParamImpl.h"
|
|
#include "nsReadableUtils.h"
|
|
#include "nsNativeCharsetUtils.h"
|
|
#include "nsError.h"
|
|
#include "mozilla/Encoding.h"
|
|
#include "mozilla/TextUtils.h"
|
|
#include "mozilla/Utf8.h"
|
|
|
|
using mozilla::Encoding;
|
|
using mozilla::IsAscii;
|
|
using mozilla::IsUtf8;
|
|
|
|
// static functions declared below are moved from mailnews/mime/src/comi18n.cpp
|
|
|
|
static char* DecodeQ(const char*, uint32_t);
|
|
static bool Is7bitNonAsciiString(const char*, uint32_t);
|
|
static void CopyRawHeader(const char*, uint32_t, const nsACString&,
|
|
nsACString&);
|
|
static nsresult DecodeRFC2047Str(const char*, const nsACString&, bool,
|
|
nsACString&);
|
|
static nsresult internalDecodeParameter(const nsACString&, const nsACString&,
|
|
const nsACString&, bool, bool,
|
|
nsACString&);
|
|
|
|
static nsresult ToUTF8(const nsACString& aString, const nsACString& aCharset,
|
|
bool aAllowSubstitution, nsACString& aResult) {
|
|
if (aCharset.IsEmpty()) {
|
|
return NS_ERROR_INVALID_ARG;
|
|
}
|
|
|
|
auto encoding = Encoding::ForLabelNoReplacement(aCharset);
|
|
if (!encoding) {
|
|
return NS_ERROR_UCONV_NOCONV;
|
|
}
|
|
if (aAllowSubstitution) {
|
|
nsresult rv = encoding->DecodeWithoutBOMHandling(aString, aResult);
|
|
if (NS_SUCCEEDED(rv)) {
|
|
return NS_OK;
|
|
}
|
|
return rv;
|
|
}
|
|
return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aString,
|
|
aResult);
|
|
}
|
|
|
|
static nsresult ConvertStringToUTF8(const nsACString& aString,
|
|
const nsACString& aCharset, bool aSkipCheck,
|
|
bool aAllowSubstitution,
|
|
nsACString& aUTF8String) {
|
|
// return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
|
|
// check is requested. It may not be asked for if a caller suspects
|
|
// that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
|
|
// it's in a charset other than UTF-8 that can be mistaken for UTF-8.
|
|
if (!aSkipCheck && (IsAscii(aString) || IsUtf8(aString))) {
|
|
aUTF8String = aString;
|
|
return NS_OK;
|
|
}
|
|
|
|
aUTF8String.Truncate();
|
|
|
|
nsresult rv = ToUTF8(aString, aCharset, aAllowSubstitution, aUTF8String);
|
|
|
|
// additional protection for cases where check is skipped and the input
|
|
// is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
|
|
// was wrong.) We don't check ASCIIness assuming there's no charset
|
|
// incompatible with ASCII (we don't support EBCDIC).
|
|
if (aSkipCheck && NS_FAILED(rv) && IsUtf8(aString)) {
|
|
aUTF8String = aString;
|
|
return NS_OK;
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
// XXX The chance of UTF-7 being used in the message header is really
|
|
// low, but in theory it's possible.
|
|
#define IS_7BIT_NON_ASCII_CHARSET(cset) \
|
|
(!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
|
|
!nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
|
|
!nsCRT::strncasecmp((cset), "UTF-7", 5))
|
|
|
|
NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
|
|
|
|
NS_IMETHODIMP
|
|
nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
|
|
const char* aParamName,
|
|
const nsACString& aFallbackCharset,
|
|
bool aTryLocaleCharset, char** aLang,
|
|
nsAString& aResult) {
|
|
return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
|
|
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
|
|
const char* aParamName,
|
|
const nsACString& aFallbackCharset,
|
|
bool aTryLocaleCharset, char** aLang,
|
|
nsAString& aResult) {
|
|
return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
|
|
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
|
|
}
|
|
|
|
/* static */
|
|
nsresult nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
|
|
const char* aParamName,
|
|
nsAString& aResult) {
|
|
return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
|
|
EmptyCString(), false, nullptr, aResult);
|
|
}
|
|
|
|
// XXX : aTryLocaleCharset is not yet effective.
|
|
/* static */
|
|
nsresult nsMIMEHeaderParamImpl::DoGetParameter(
|
|
const nsACString& aHeaderVal, const char* aParamName,
|
|
ParamDecoding aDecoding, const nsACString& aFallbackCharset,
|
|
bool aTryLocaleCharset, char** aLang, nsAString& aResult) {
|
|
aResult.Truncate();
|
|
nsresult rv;
|
|
|
|
// get parameter (decode RFC 2231/5987 when applicable, as specified by
|
|
// aDecoding (5987 being a subset of 2231) and return charset.)
|
|
nsCString med;
|
|
nsCString charset;
|
|
rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
|
|
aDecoding, getter_Copies(charset), aLang,
|
|
getter_Copies(med));
|
|
if (NS_FAILED(rv)) return rv;
|
|
|
|
// convert to UTF-8 after charset conversion and RFC 2047 decoding
|
|
// if necessary.
|
|
|
|
nsAutoCString str1;
|
|
rv = internalDecodeParameter(med, charset, EmptyCString(), false,
|
|
// was aDecoding == MIME_FIELD_ENCODING
|
|
// see bug 875615
|
|
true, str1);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
if (!aFallbackCharset.IsEmpty()) {
|
|
const Encoding* encoding = Encoding::ForLabel(aFallbackCharset);
|
|
nsAutoCString str2;
|
|
if (NS_SUCCEEDED(ConvertStringToUTF8(str1, aFallbackCharset, false,
|
|
encoding != UTF_8_ENCODING, str2))) {
|
|
CopyUTF8toUTF16(str2, aResult);
|
|
return NS_OK;
|
|
}
|
|
}
|
|
|
|
if (IsUtf8(str1)) {
|
|
CopyUTF8toUTF16(str1, aResult);
|
|
return NS_OK;
|
|
}
|
|
|
|
if (aTryLocaleCharset && !NS_IsNativeUTF8())
|
|
return NS_CopyNativeToUnicode(str1, aResult);
|
|
|
|
CopyASCIItoUTF16(str1, aResult);
|
|
return NS_OK;
|
|
}
|
|
|
|
// remove backslash-encoded sequences from quoted-strings
|
|
// modifies string in place, potentially shortening it
|
|
void RemoveQuotedStringEscapes(char* src) {
|
|
char* dst = src;
|
|
|
|
for (char* c = src; *c; ++c) {
|
|
if (c[0] == '\\' && c[1]) {
|
|
// skip backslash if not at end
|
|
++c;
|
|
}
|
|
*dst++ = *c;
|
|
}
|
|
*dst = 0;
|
|
}
|
|
|
|
// true is character is a hex digit
|
|
bool IsHexDigit(char aChar) {
|
|
char c = aChar;
|
|
|
|
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') ||
|
|
(c >= '0' && c <= '9');
|
|
}
|
|
|
|
// validate that a C String containing %-escapes is syntactically valid
|
|
bool IsValidPercentEscaped(const char* aValue, int32_t len) {
|
|
for (int32_t i = 0; i < len; i++) {
|
|
if (aValue[i] == '%') {
|
|
if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Support for continuations (RFC 2231, Section 3)
|
|
|
|
// only a sane number supported
|
|
#define MAX_CONTINUATIONS 999
|
|
|
|
// part of a continuation
|
|
|
|
class Continuation {
|
|
public:
|
|
Continuation(const char* aValue, uint32_t aLength, bool aNeedsPercentDecoding,
|
|
bool aWasQuotedString) {
|
|
value = aValue;
|
|
length = aLength;
|
|
needsPercentDecoding = aNeedsPercentDecoding;
|
|
wasQuotedString = aWasQuotedString;
|
|
}
|
|
Continuation() {
|
|
// empty constructor needed for nsTArray
|
|
value = nullptr;
|
|
length = 0;
|
|
needsPercentDecoding = false;
|
|
wasQuotedString = false;
|
|
}
|
|
~Continuation() = default;
|
|
|
|
const char* value;
|
|
uint32_t length;
|
|
bool needsPercentDecoding;
|
|
bool wasQuotedString;
|
|
};
|
|
|
|
// combine segments into a single string, returning the allocated string
|
|
// (or nullptr) while emptying the list
|
|
char* combineContinuations(nsTArray<Continuation>& aArray) {
|
|
// Sanity check
|
|
if (aArray.Length() == 0) return nullptr;
|
|
|
|
// Get an upper bound for the length
|
|
uint32_t length = 0;
|
|
for (uint32_t i = 0; i < aArray.Length(); i++) {
|
|
length += aArray[i].length;
|
|
}
|
|
|
|
// Allocate
|
|
char* result = (char*)moz_xmalloc(length + 1);
|
|
|
|
// Concatenate
|
|
*result = '\0';
|
|
|
|
for (uint32_t i = 0; i < aArray.Length(); i++) {
|
|
Continuation cont = aArray[i];
|
|
if (!cont.value) break;
|
|
|
|
char* c = result + strlen(result);
|
|
strncat(result, cont.value, cont.length);
|
|
if (cont.needsPercentDecoding) {
|
|
nsUnescape(c);
|
|
}
|
|
if (cont.wasQuotedString) {
|
|
RemoveQuotedStringEscapes(c);
|
|
}
|
|
}
|
|
|
|
// return null if empty value
|
|
if (*result == '\0') {
|
|
free(result);
|
|
result = nullptr;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// add a continuation, return false on error if segment already has been seen
|
|
bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
|
|
const char* aValue, uint32_t aLength,
|
|
bool aNeedsPercentDecoding, bool aWasQuotedString) {
|
|
if (aIndex < aArray.Length() && aArray[aIndex].value) {
|
|
NS_WARNING("duplicate RC2231 continuation segment #\n");
|
|
return false;
|
|
}
|
|
|
|
if (aIndex > MAX_CONTINUATIONS) {
|
|
NS_WARNING("RC2231 continuation segment # exceeds limit\n");
|
|
return false;
|
|
}
|
|
|
|
if (aNeedsPercentDecoding && aWasQuotedString) {
|
|
NS_WARNING(
|
|
"RC2231 continuation segment can't use percent encoding and quoted "
|
|
"string form at the same time\n");
|
|
return false;
|
|
}
|
|
|
|
Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
|
|
|
|
if (aArray.Length() <= aIndex) {
|
|
aArray.SetLength(aIndex + 1);
|
|
}
|
|
aArray[aIndex] = cont;
|
|
|
|
return true;
|
|
}
|
|
|
|
// parse a segment number; return -1 on error
|
|
int32_t parseSegmentNumber(const char* aValue, int32_t aLen) {
|
|
if (aLen < 1) {
|
|
NS_WARNING("segment number missing\n");
|
|
return -1;
|
|
}
|
|
|
|
if (aLen > 1 && aValue[0] == '0') {
|
|
NS_WARNING("leading '0' not allowed in segment number\n");
|
|
return -1;
|
|
}
|
|
|
|
int32_t segmentNumber = 0;
|
|
|
|
for (int32_t i = 0; i < aLen; i++) {
|
|
if (!(aValue[i] >= '0' && aValue[i] <= '9')) {
|
|
NS_WARNING("invalid characters in segment number\n");
|
|
return -1;
|
|
}
|
|
|
|
segmentNumber *= 10;
|
|
segmentNumber += aValue[i] - '0';
|
|
if (segmentNumber > MAX_CONTINUATIONS) {
|
|
NS_WARNING("Segment number exceeds sane size\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return segmentNumber;
|
|
}
|
|
|
|
// validate a given octet sequence for compliance with the specified
|
|
// encoding
|
|
bool IsValidOctetSequenceForCharset(const nsACString& aCharset,
|
|
const char* aOctets) {
|
|
nsAutoCString tmpRaw;
|
|
tmpRaw.Assign(aOctets);
|
|
nsAutoCString tmpDecoded;
|
|
|
|
nsresult rv = ConvertStringToUTF8(tmpRaw, aCharset, false, false, tmpDecoded);
|
|
|
|
if (rv != NS_OK) {
|
|
// we can't decode; charset may be unsupported, or the octet sequence
|
|
// is broken (illegal or incomplete octet sequence contained)
|
|
NS_WARNING(
|
|
"RFC2231/5987 parameter value does not decode according to specified "
|
|
"charset\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// moved almost verbatim from mimehdrs.cpp
|
|
// char *
|
|
// MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
|
|
// char **charset, char **language)
|
|
//
|
|
// The format of these header lines is
|
|
// <token> [ ';' <token> '=' <token-or-quoted-string> ]*
|
|
NS_IMETHODIMP
|
|
nsMIMEHeaderParamImpl::GetParameterInternal(const char* aHeaderValue,
|
|
const char* aParamName,
|
|
char** aCharset, char** aLang,
|
|
char** aResult) {
|
|
return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
|
|
aCharset, aLang, aResult);
|
|
}
|
|
|
|
/* static */
|
|
nsresult nsMIMEHeaderParamImpl::DoParameterInternal(
|
|
const char* aHeaderValue, const char* aParamName, ParamDecoding aDecoding,
|
|
char** aCharset, char** aLang, char** aResult) {
|
|
if (!aHeaderValue || !*aHeaderValue || !aResult) return NS_ERROR_INVALID_ARG;
|
|
|
|
*aResult = nullptr;
|
|
|
|
if (aCharset) *aCharset = nullptr;
|
|
if (aLang) *aLang = nullptr;
|
|
|
|
nsAutoCString charset;
|
|
|
|
// change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
|
|
// them for HTTP header fields later on, see bug 776324
|
|
bool acceptContinuations = true;
|
|
|
|
const char* str = aHeaderValue;
|
|
|
|
// skip leading white space.
|
|
for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
|
|
;
|
|
const char* start = str;
|
|
|
|
// aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
|
|
// For instance, return 'inline' in the following case:
|
|
// Content-Disposition: inline; filename=.....
|
|
if (!aParamName || !*aParamName) {
|
|
for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
|
|
;
|
|
if (str == start) return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
|
|
|
|
*aResult = (char*)moz_xmemdup(start, (str - start) + 1);
|
|
(*aResult)[str - start] = '\0'; // null-terminate
|
|
return NS_OK;
|
|
}
|
|
|
|
/* Skip forward to first ';' */
|
|
for (; *str && *str != ';' && *str != ','; ++str)
|
|
;
|
|
if (*str) str++;
|
|
/* Skip over following whitespace */
|
|
for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
|
|
;
|
|
|
|
// Some broken http servers just specify parameters
|
|
// like 'filename' without specifying disposition
|
|
// method. Rewind to the first non-white-space
|
|
// character.
|
|
|
|
if (!*str) str = start;
|
|
|
|
// RFC2231 - The legitimate parm format can be:
|
|
// A. title=ThisIsTitle
|
|
// B. title*=us-ascii'en-us'This%20is%20wierd.
|
|
// C. title*0*=us-ascii'en'This%20is%20wierd.%20We
|
|
// title*1*=have%20to%20support%20this.
|
|
// title*2="Else..."
|
|
// D. title*0="Hey, what you think you are doing?"
|
|
// title*1="There is no charset and lang info."
|
|
// RFC5987: only A and B
|
|
|
|
// collect results for the different algorithms (plain filename,
|
|
// RFC5987/2231-encoded filename, + continuations) separately and decide
|
|
// which to use at the end
|
|
char* caseAResult = nullptr;
|
|
char* caseBResult = nullptr;
|
|
char* caseCDResult = nullptr;
|
|
|
|
// collect continuation segments
|
|
nsTArray<Continuation> segments;
|
|
|
|
// our copies of the charset parameter, kept separately as they might
|
|
// differ for the two formats
|
|
nsDependentCSubstring charsetB, charsetCD;
|
|
|
|
nsDependentCSubstring lang;
|
|
|
|
int32_t paramLen = strlen(aParamName);
|
|
|
|
while (*str) {
|
|
// find name/value
|
|
|
|
const char* nameStart = str;
|
|
const char* nameEnd = nullptr;
|
|
const char* valueStart = nullptr;
|
|
const char* valueEnd = nullptr;
|
|
bool isQuotedString = false;
|
|
|
|
NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
|
|
|
|
// Skip forward to the end of this token.
|
|
for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';';
|
|
str++)
|
|
;
|
|
nameEnd = str;
|
|
|
|
int32_t nameLen = nameEnd - nameStart;
|
|
|
|
// Skip over whitespace, '=', and whitespace
|
|
while (nsCRT::IsAsciiSpace(*str)) ++str;
|
|
if (!*str) {
|
|
break;
|
|
}
|
|
if (*str != '=') {
|
|
// don't accept parameters without "="
|
|
goto increment_str;
|
|
}
|
|
// Skip over '=' only if it was actually there
|
|
str++;
|
|
while (nsCRT::IsAsciiSpace(*str)) ++str;
|
|
|
|
if (*str != '"') {
|
|
// The value is a token, not a quoted string.
|
|
valueStart = str;
|
|
for (valueEnd = str;
|
|
*valueEnd && !nsCRT::IsAsciiSpace(*valueEnd) && *valueEnd != ';';
|
|
valueEnd++)
|
|
;
|
|
str = valueEnd;
|
|
} else {
|
|
isQuotedString = true;
|
|
|
|
++str;
|
|
valueStart = str;
|
|
for (valueEnd = str; *valueEnd; ++valueEnd) {
|
|
if (*valueEnd == '\\' && *(valueEnd + 1))
|
|
++valueEnd;
|
|
else if (*valueEnd == '"')
|
|
break;
|
|
}
|
|
str = valueEnd;
|
|
// *valueEnd != null means that *valueEnd is quote character.
|
|
if (*valueEnd) str++;
|
|
}
|
|
|
|
// See if this is the simplest case (case A above),
|
|
// a 'single' line value with no charset and lang.
|
|
// If so, copy it and return.
|
|
if (nameLen == paramLen &&
|
|
!nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
|
|
if (caseAResult) {
|
|
// we already have one caseA result, ignore subsequent ones
|
|
goto increment_str;
|
|
}
|
|
|
|
// if the parameter spans across multiple lines we have to strip out the
|
|
// line continuation -- jht 4/29/98
|
|
nsAutoCString tempStr(valueStart, valueEnd - valueStart);
|
|
tempStr.StripCRLF();
|
|
char* res = ToNewCString(tempStr);
|
|
NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
|
|
|
|
if (isQuotedString) RemoveQuotedStringEscapes(res);
|
|
|
|
caseAResult = res;
|
|
// keep going, we may find a RFC 2231/5987 encoded alternative
|
|
}
|
|
// case B, C, and D
|
|
else if (nameLen > paramLen &&
|
|
!nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
|
|
*(nameStart + paramLen) == '*') {
|
|
// 1st char past '*'
|
|
const char* cp = nameStart + paramLen + 1;
|
|
|
|
// if param name ends in "*" we need do to RFC5987 "ext-value" decoding
|
|
bool needExtDecoding = *(nameEnd - 1) == '*';
|
|
|
|
bool caseB = nameLen == paramLen + 1;
|
|
bool caseCStart = (*cp == '0') && needExtDecoding;
|
|
|
|
// parse the segment number
|
|
int32_t segmentNumber = -1;
|
|
if (!caseB) {
|
|
int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
|
|
segmentNumber = parseSegmentNumber(cp, segLen);
|
|
|
|
if (segmentNumber == -1) {
|
|
acceptContinuations = false;
|
|
goto increment_str;
|
|
}
|
|
}
|
|
|
|
// CaseB and start of CaseC: requires charset and optional language
|
|
// in quotes (quotes required even if lang is blank)
|
|
if (caseB || (caseCStart && acceptContinuations)) {
|
|
// look for single quotation mark(')
|
|
const char* sQuote1 = PL_strchr(valueStart, 0x27);
|
|
const char* sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
|
|
|
|
// Two single quotation marks must be present even in
|
|
// absence of charset and lang.
|
|
if (!sQuote1 || !sQuote2) {
|
|
NS_WARNING(
|
|
"Mandatory two single quotes are missing in header parameter\n");
|
|
}
|
|
|
|
const char* charsetStart = nullptr;
|
|
int32_t charsetLength = 0;
|
|
const char* langStart = nullptr;
|
|
int32_t langLength = 0;
|
|
const char* rawValStart = nullptr;
|
|
int32_t rawValLength = 0;
|
|
|
|
if (sQuote2 && sQuote1) {
|
|
// both delimiters present: charSet'lang'rawVal
|
|
rawValStart = sQuote2 + 1;
|
|
rawValLength = valueEnd - rawValStart;
|
|
|
|
langStart = sQuote1 + 1;
|
|
langLength = sQuote2 - langStart;
|
|
|
|
charsetStart = valueStart;
|
|
charsetLength = sQuote1 - charsetStart;
|
|
} else if (sQuote1) {
|
|
// one delimiter; assume charset'rawVal
|
|
rawValStart = sQuote1 + 1;
|
|
rawValLength = valueEnd - rawValStart;
|
|
|
|
charsetStart = valueStart;
|
|
charsetLength = sQuote1 - valueStart;
|
|
} else {
|
|
// no delimiter: just rawVal
|
|
rawValStart = valueStart;
|
|
rawValLength = valueEnd - valueStart;
|
|
}
|
|
|
|
if (langLength != 0) {
|
|
lang.Assign(langStart, langLength);
|
|
}
|
|
|
|
// keep the charset for later
|
|
if (caseB) {
|
|
charsetB.Assign(charsetStart, charsetLength);
|
|
} else {
|
|
// if caseCorD
|
|
charsetCD.Assign(charsetStart, charsetLength);
|
|
}
|
|
|
|
// non-empty value part
|
|
if (rawValLength > 0) {
|
|
if (!caseBResult && caseB) {
|
|
if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
|
|
goto increment_str;
|
|
}
|
|
|
|
// allocate buffer for the raw value
|
|
char* tmpResult = (char*)moz_xmemdup(rawValStart, rawValLength + 1);
|
|
*(tmpResult + rawValLength) = 0;
|
|
|
|
nsUnescape(tmpResult);
|
|
caseBResult = tmpResult;
|
|
} else {
|
|
// caseC
|
|
bool added = addContinuation(segments, 0, rawValStart, rawValLength,
|
|
needExtDecoding, isQuotedString);
|
|
|
|
if (!added) {
|
|
// continuation not added, stop processing them
|
|
acceptContinuations = false;
|
|
}
|
|
}
|
|
}
|
|
} // end of if-block : title*0*= or title*=
|
|
// caseD: a line of multiline param with no need for unescaping :
|
|
// title*[0-9]= or 2nd or later lines of a caseC param : title*[1-9]*=
|
|
else if (acceptContinuations && segmentNumber != -1) {
|
|
uint32_t valueLength = valueEnd - valueStart;
|
|
|
|
bool added =
|
|
addContinuation(segments, segmentNumber, valueStart, valueLength,
|
|
needExtDecoding, isQuotedString);
|
|
|
|
if (!added) {
|
|
// continuation not added, stop processing them
|
|
acceptContinuations = false;
|
|
}
|
|
} // end of if-block : title*[0-9]= or title*[1-9]*=
|
|
}
|
|
|
|
// str now points after the end of the value.
|
|
// skip over whitespace, ';', whitespace.
|
|
increment_str:
|
|
while (nsCRT::IsAsciiSpace(*str)) ++str;
|
|
if (*str == ';') {
|
|
++str;
|
|
} else {
|
|
// stop processing the header field; either we are done or the
|
|
// separator was missing
|
|
break;
|
|
}
|
|
while (nsCRT::IsAsciiSpace(*str)) ++str;
|
|
}
|
|
|
|
caseCDResult = combineContinuations(segments);
|
|
|
|
if (caseBResult && !charsetB.IsEmpty()) {
|
|
// check that the 2231/5987 result decodes properly given the
|
|
// specified character set
|
|
if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
|
|
caseBResult = nullptr;
|
|
}
|
|
|
|
if (caseCDResult && !charsetCD.IsEmpty()) {
|
|
// check that the 2231/5987 result decodes properly given the
|
|
// specified character set
|
|
if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
|
|
caseCDResult = nullptr;
|
|
}
|
|
|
|
if (caseBResult) {
|
|
// prefer simple 5987 format over 2231 with continuations
|
|
*aResult = caseBResult;
|
|
caseBResult = nullptr;
|
|
charset.Assign(charsetB);
|
|
} else if (caseCDResult) {
|
|
// prefer 2231/5987 with or without continuations over plain format
|
|
*aResult = caseCDResult;
|
|
caseCDResult = nullptr;
|
|
charset.Assign(charsetCD);
|
|
} else if (caseAResult) {
|
|
*aResult = caseAResult;
|
|
caseAResult = nullptr;
|
|
}
|
|
|
|
// free unused stuff
|
|
free(caseAResult);
|
|
free(caseBResult);
|
|
free(caseCDResult);
|
|
|
|
// if we have a result
|
|
if (*aResult) {
|
|
// then return charset and lang as well
|
|
if (aLang && !lang.IsEmpty()) {
|
|
uint32_t len = lang.Length();
|
|
*aLang = (char*)moz_xmemdup(lang.BeginReading(), len + 1);
|
|
*(*aLang + len) = 0;
|
|
}
|
|
if (aCharset && !charset.IsEmpty()) {
|
|
uint32_t len = charset.Length();
|
|
*aCharset = (char*)moz_xmemdup(charset.BeginReading(), len + 1);
|
|
*(*aCharset + len) = 0;
|
|
}
|
|
}
|
|
|
|
return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
|
|
}
|
|
|
|
nsresult internalDecodeRFC2047Header(const char* aHeaderVal,
|
|
const nsACString& aDefaultCharset,
|
|
bool aOverrideCharset,
|
|
bool aEatContinuations,
|
|
nsACString& aResult) {
|
|
aResult.Truncate();
|
|
if (!aHeaderVal) return NS_ERROR_INVALID_ARG;
|
|
if (!*aHeaderVal) return NS_OK;
|
|
|
|
// If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
|
|
// aDefaultCharset is specified, decodes RFC 2047 encoding and converts
|
|
// to UTF-8. Otherwise, just strips away CRLF.
|
|
if (PL_strstr(aHeaderVal, "=?") ||
|
|
(!aDefaultCharset.IsEmpty() &&
|
|
(!IsUtf8(nsDependentCString(aHeaderVal)) ||
|
|
Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
|
|
DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
|
|
} else if (aEatContinuations &&
|
|
(PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
|
|
aResult = aHeaderVal;
|
|
} else {
|
|
aEatContinuations = false;
|
|
aResult = aHeaderVal;
|
|
}
|
|
|
|
if (aEatContinuations) {
|
|
nsAutoCString temp(aResult);
|
|
temp.ReplaceSubstring("\n\t", " ");
|
|
temp.ReplaceSubstring("\r\t", " ");
|
|
temp.StripCRLF();
|
|
aResult = temp;
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
|
|
const char* aDefaultCharset,
|
|
bool aOverrideCharset,
|
|
bool aEatContinuations,
|
|
nsACString& aResult) {
|
|
return internalDecodeRFC2047Header(aHeaderVal, nsCString(aDefaultCharset),
|
|
aOverrideCharset, aEatContinuations,
|
|
aResult);
|
|
}
|
|
|
|
// true if the character is allowed in a RFC 5987 value
|
|
// see RFC 5987, Section 3.2.1, "attr-char"
|
|
bool IsRFC5987AttrChar(char aChar) {
|
|
char c = aChar;
|
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
|
(c >= '0' && c <= '9') ||
|
|
(c == '!' || c == '#' || c == '$' || c == '&' || c == '+' ||
|
|
c == '-' || c == '.' || c == '^' || c == '_' || c == '`' ||
|
|
c == '|' || c == '~');
|
|
}
|
|
|
|
// percent-decode a value
|
|
// returns false on failure
|
|
bool PercentDecode(nsACString& aValue) {
|
|
char* c = (char*)moz_xmalloc(aValue.Length() + 1);
|
|
|
|
strcpy(c, PromiseFlatCString(aValue).get());
|
|
nsUnescape(c);
|
|
aValue.Assign(c);
|
|
free(c);
|
|
|
|
return true;
|
|
}
|
|
|
|
// Decode a parameter value using the encoding defined in RFC 5987
|
|
//
|
|
// charset "'" [ language ] "'" value-chars
|
|
NS_IMETHODIMP
|
|
nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
|
|
nsACString& aLang,
|
|
nsAString& aResult) {
|
|
nsAutoCString charset;
|
|
nsAutoCString language;
|
|
nsAutoCString value;
|
|
|
|
uint32_t delimiters = 0;
|
|
const nsCString& encoded = PromiseFlatCString(aParamVal);
|
|
const char* c = encoded.get();
|
|
|
|
while (*c) {
|
|
char tc = *c++;
|
|
|
|
if (tc == '\'') {
|
|
// single quote
|
|
delimiters++;
|
|
} else if (((unsigned char)tc) >= 128) {
|
|
// fail early, not ASCII
|
|
NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
|
|
return NS_ERROR_INVALID_ARG;
|
|
} else {
|
|
if (delimiters == 0) {
|
|
// valid characters are checked later implicitly
|
|
charset.Append(tc);
|
|
} else if (delimiters == 1) {
|
|
// no value checking for now
|
|
language.Append(tc);
|
|
} else if (delimiters == 2) {
|
|
if (IsRFC5987AttrChar(tc)) {
|
|
value.Append(tc);
|
|
} else if (tc == '%') {
|
|
if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
|
|
// we expect two more characters
|
|
NS_WARNING("broken %-escape in RFC5987-encoded param");
|
|
return NS_ERROR_INVALID_ARG;
|
|
}
|
|
value.Append(tc);
|
|
// we consume two more
|
|
value.Append(*c++);
|
|
value.Append(*c++);
|
|
} else {
|
|
// character not allowed here
|
|
NS_WARNING("invalid character in RFC5987-encoded param");
|
|
return NS_ERROR_INVALID_ARG;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (delimiters != 2) {
|
|
NS_WARNING("missing delimiters in RFC5987-encoded param");
|
|
return NS_ERROR_INVALID_ARG;
|
|
}
|
|
|
|
// abort early for unsupported encodings
|
|
if (!charset.LowerCaseEqualsLiteral("utf-8")) {
|
|
NS_WARNING("unsupported charset in RFC5987-encoded param");
|
|
return NS_ERROR_INVALID_ARG;
|
|
}
|
|
|
|
// percent-decode
|
|
if (!PercentDecode(value)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
// return the encoding
|
|
aLang.Assign(language);
|
|
|
|
// finally convert octet sequence to UTF-8 and be done
|
|
nsAutoCString utf8;
|
|
nsresult rv = ConvertStringToUTF8(value, charset, true, false, utf8);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
CopyUTF8toUTF16(utf8, aResult);
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult internalDecodeParameter(const nsACString& aParamValue,
|
|
const nsACString& aCharset,
|
|
const nsACString& aDefaultCharset,
|
|
bool aOverrideCharset, bool aDecode2047,
|
|
nsACString& aResult) {
|
|
aResult.Truncate();
|
|
// If aCharset is given, aParamValue was obtained from RFC2231/5987
|
|
// encoding and we're pretty sure that it's in aCharset.
|
|
if (!aCharset.IsEmpty()) {
|
|
return ConvertStringToUTF8(aParamValue, aCharset, true, true, aResult);
|
|
}
|
|
|
|
const nsCString& param = PromiseFlatCString(aParamValue);
|
|
nsAutoCString unQuoted;
|
|
nsACString::const_iterator s, e;
|
|
param.BeginReading(s);
|
|
param.EndReading(e);
|
|
|
|
// strip '\' when used to quote CR, LF, '"' and '\'
|
|
for (; s != e; ++s) {
|
|
if ((*s == '\\')) {
|
|
if (++s == e) {
|
|
--s; // '\' is at the end. move back and append '\'.
|
|
} else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' &&
|
|
*s != '\\') {
|
|
--s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
|
|
}
|
|
// else : skip '\' and append the quoted character.
|
|
}
|
|
unQuoted.Append(*s);
|
|
}
|
|
|
|
aResult = unQuoted;
|
|
nsresult rv = NS_OK;
|
|
|
|
if (aDecode2047) {
|
|
nsAutoCString decoded;
|
|
|
|
// Try RFC 2047 encoding, instead.
|
|
rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
|
|
aOverrideCharset, true, decoded);
|
|
|
|
if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) aResult = decoded;
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
|
|
const char* aCharset,
|
|
const char* aDefaultCharset,
|
|
bool aOverrideCharset,
|
|
nsACString& aResult) {
|
|
return internalDecodeParameter(aParamValue, nsCString(aCharset),
|
|
nsCString(aDefaultCharset), aOverrideCharset,
|
|
true, aResult);
|
|
}
|
|
|
|
#define ISHEXCHAR(c) \
|
|
((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
|
|
(0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
|
|
(0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
|
|
|
|
// Decode Q encoding (RFC 2047).
|
|
// static
|
|
char* DecodeQ(const char* in, uint32_t length) {
|
|
char *out, *dest = nullptr;
|
|
|
|
out = dest = (char*)calloc(length + 1, sizeof(char));
|
|
if (dest == nullptr) return nullptr;
|
|
while (length > 0) {
|
|
unsigned c = 0;
|
|
switch (*in) {
|
|
case '=':
|
|
// check if |in| in the form of '=hh' where h is [0-9a-fA-F].
|
|
if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
|
|
goto badsyntax;
|
|
PR_sscanf(in + 1, "%2X", &c);
|
|
*out++ = (char)c;
|
|
in += 3;
|
|
length -= 3;
|
|
break;
|
|
|
|
case '_':
|
|
*out++ = ' ';
|
|
in++;
|
|
length--;
|
|
break;
|
|
|
|
default:
|
|
if (*in & 0x80) goto badsyntax;
|
|
*out++ = *in++;
|
|
length--;
|
|
}
|
|
}
|
|
*out++ = '\0';
|
|
|
|
for (out = dest; *out; ++out) {
|
|
if (*out == '\t') *out = ' ';
|
|
}
|
|
|
|
return dest;
|
|
|
|
badsyntax:
|
|
free(dest);
|
|
return nullptr;
|
|
}
|
|
|
|
// check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
|
|
// or has ESC which may be an indication that it's in one of many ISO
|
|
// 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
|
|
// static
|
|
bool Is7bitNonAsciiString(const char* input, uint32_t len) {
|
|
int32_t c;
|
|
|
|
enum {
|
|
hz_initial, // No HZ seen yet
|
|
hz_escaped, // Inside an HZ ~{ escape sequence
|
|
hz_seen, // Have seen at least one complete HZ sequence
|
|
hz_notpresent // Have seen something that is not legal HZ
|
|
} hz_state;
|
|
|
|
hz_state = hz_initial;
|
|
while (len) {
|
|
c = uint8_t(*input++);
|
|
len--;
|
|
if (c & 0x80) return false;
|
|
if (c == 0x1B) return true;
|
|
if (c == '~') {
|
|
switch (hz_state) {
|
|
case hz_initial:
|
|
case hz_seen:
|
|
if (*input == '{') {
|
|
hz_state = hz_escaped;
|
|
} else if (*input == '~') {
|
|
// ~~ is the HZ encoding of ~. Skip over second ~ as well
|
|
hz_state = hz_seen;
|
|
input++;
|
|
len--;
|
|
} else {
|
|
hz_state = hz_notpresent;
|
|
}
|
|
break;
|
|
|
|
case hz_escaped:
|
|
if (*input == '}') hz_state = hz_seen;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return hz_state == hz_seen;
|
|
}
|
|
|
|
#define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
|
|
|
|
// copy 'raw' sequences of octets in aInput to aOutput.
|
|
// If aDefaultCharset is specified, the input is assumed to be in the
|
|
// charset and converted to UTF-8. Otherwise, a blind copy is made.
|
|
// If aDefaultCharset is specified, but the conversion to UTF-8
|
|
// is not successful, each octet is replaced by Unicode replacement
|
|
// chars. *aOutput is advanced by the number of output octets.
|
|
// static
|
|
void CopyRawHeader(const char* aInput, uint32_t aLen,
|
|
const nsACString& aDefaultCharset, nsACString& aOutput) {
|
|
int32_t c;
|
|
|
|
// If aDefaultCharset is not specified, make a blind copy.
|
|
if (aDefaultCharset.IsEmpty()) {
|
|
aOutput.Append(aInput, aLen);
|
|
return;
|
|
}
|
|
|
|
// Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
|
|
// A ~ may indicate it is HZ
|
|
while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
|
|
aOutput.Append(char(c));
|
|
aLen--;
|
|
}
|
|
if (!aLen) {
|
|
return;
|
|
}
|
|
aInput--;
|
|
|
|
// skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
|
|
// string and aDefaultCharset is a 7bit non-ascii charset.
|
|
bool skipCheck =
|
|
(c == 0x1B || c == '~') &&
|
|
IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aDefaultCharset).get());
|
|
|
|
// If not UTF-8, treat as default charset
|
|
nsAutoCString utf8Text;
|
|
if (NS_SUCCEEDED(ConvertStringToUTF8(Substring(aInput, aInput + aLen),
|
|
PromiseFlatCString(aDefaultCharset),
|
|
skipCheck, true, utf8Text))) {
|
|
aOutput.Append(utf8Text);
|
|
} else { // replace each octet with Unicode replacement char in UTF-8.
|
|
for (uint32_t i = 0; i < aLen; i++) {
|
|
c = uint8_t(*aInput++);
|
|
if (c & 0x80)
|
|
aOutput.Append(REPLACEMENT_CHAR);
|
|
else
|
|
aOutput.Append(char(c));
|
|
}
|
|
}
|
|
}
|
|
|
|
nsresult DecodeQOrBase64Str(const char* aEncoded, size_t aLen, char aQOrBase64,
|
|
const nsACString& aCharset, nsACString& aResult) {
|
|
char* decodedText;
|
|
bool b64alloc = false;
|
|
NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
|
|
if (aQOrBase64 == 'Q')
|
|
decodedText = DecodeQ(aEncoded, aLen);
|
|
else if (aQOrBase64 == 'B') {
|
|
decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
|
|
b64alloc = true;
|
|
} else {
|
|
return NS_ERROR_INVALID_ARG;
|
|
}
|
|
|
|
if (!decodedText) {
|
|
return NS_ERROR_INVALID_ARG;
|
|
}
|
|
|
|
nsAutoCString utf8Text;
|
|
// skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
|
|
nsresult rv = ConvertStringToUTF8(
|
|
nsDependentCString(decodedText), aCharset,
|
|
IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aCharset).get()), true,
|
|
utf8Text);
|
|
if (b64alloc) {
|
|
PR_Free(decodedText);
|
|
} else {
|
|
free(decodedText);
|
|
}
|
|
if (NS_FAILED(rv)) {
|
|
return rv;
|
|
}
|
|
aResult.Append(utf8Text);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
static const char especials[] = R"(()<>@,;:\"/[]?.=)";
|
|
|
|
// |decode_mime_part2_str| taken from comi18n.c
|
|
// Decode RFC2047-encoded words in the input and convert the result to UTF-8.
|
|
// If aOverrideCharset is true, charset in RFC2047-encoded words is
|
|
// ignored and aDefaultCharset is assumed, instead. aDefaultCharset
|
|
// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
|
|
// static
|
|
nsresult DecodeRFC2047Str(const char* aHeader,
|
|
const nsACString& aDefaultCharset,
|
|
bool aOverrideCharset, nsACString& aResult) {
|
|
const char *p, *q = nullptr, *r;
|
|
const char* begin; // tracking pointer for where we are in the input buffer
|
|
int32_t isLastEncodedWord = 0;
|
|
const char *charsetStart, *charsetEnd;
|
|
nsAutoCString prevCharset, curCharset;
|
|
nsAutoCString encodedText;
|
|
char prevEncoding = '\0', curEncoding;
|
|
nsresult rv;
|
|
|
|
begin = aHeader;
|
|
|
|
// To avoid buffer realloc, if possible, set capacity in advance. No
|
|
// matter what, more than 3x expansion can never happen for all charsets
|
|
// supported by Mozilla. SCSU/BCSU with the sliding window set to a
|
|
// non-BMP block may be exceptions, but Mozilla does not support them.
|
|
// Neither any known mail/news program use them. Even if there's, we're
|
|
// safe because we don't use a raw *char any more.
|
|
aResult.SetCapacity(3 * strlen(aHeader));
|
|
|
|
while ((p = PL_strstr(begin, "=?")) != nullptr) {
|
|
if (isLastEncodedWord) {
|
|
// See if it's all whitespace.
|
|
for (q = begin; q < p; ++q) {
|
|
if (!PL_strchr(" \t\r\n", *q)) break;
|
|
}
|
|
}
|
|
|
|
if (!isLastEncodedWord || q < p) {
|
|
if (!encodedText.IsEmpty()) {
|
|
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
|
|
prevEncoding, prevCharset, aResult);
|
|
if (NS_FAILED(rv)) {
|
|
aResult.Append(encodedText);
|
|
}
|
|
encodedText.Truncate();
|
|
prevCharset.Truncate();
|
|
prevEncoding = '\0';
|
|
}
|
|
// copy the part before the encoded-word
|
|
CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
|
|
begin = p;
|
|
}
|
|
|
|
p += 2;
|
|
|
|
// Get charset info
|
|
charsetStart = p;
|
|
charsetEnd = nullptr;
|
|
for (q = p; *q != '?'; q++) {
|
|
if (*q <= ' ' || PL_strchr(especials, *q)) {
|
|
goto badsyntax;
|
|
}
|
|
|
|
// RFC 2231 section 5
|
|
if (!charsetEnd && *q == '*') {
|
|
charsetEnd = q;
|
|
}
|
|
}
|
|
if (!charsetEnd) {
|
|
charsetEnd = q;
|
|
}
|
|
|
|
q++;
|
|
curEncoding = nsCRT::ToUpper(*q);
|
|
if (curEncoding != 'Q' && curEncoding != 'B') goto badsyntax;
|
|
|
|
if (q[1] != '?') goto badsyntax;
|
|
|
|
// loop-wise, keep going until we hit "?=". the inner check handles the
|
|
// nul terminator should the string terminate before we hit the right
|
|
// marker. (And the r[1] will never reach beyond the end of the string
|
|
// because *r != '?' is true if r is the nul character.)
|
|
for (r = q + 2; *r != '?' || r[1] != '='; r++) {
|
|
if (*r < ' ') goto badsyntax;
|
|
}
|
|
if (r == q + 2) {
|
|
// it's empty, skip
|
|
begin = r + 2;
|
|
isLastEncodedWord = 1;
|
|
continue;
|
|
}
|
|
|
|
curCharset.Assign(charsetStart, charsetEnd - charsetStart);
|
|
// Override charset if requested. Never override labeled UTF-8.
|
|
// Use default charset instead of UNKNOWN-8BIT
|
|
if ((aOverrideCharset &&
|
|
0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) ||
|
|
(!aDefaultCharset.IsEmpty() &&
|
|
0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))) {
|
|
curCharset = aDefaultCharset;
|
|
}
|
|
|
|
const char* R;
|
|
R = r;
|
|
if (curEncoding == 'B') {
|
|
// bug 227290. ignore an extraneous '=' at the end.
|
|
// (# of characters in B-encoded part has to be a multiple of 4)
|
|
int32_t n = r - (q + 2);
|
|
R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
|
|
}
|
|
// Bug 493544. Don't decode the encoded text until it ends
|
|
if (R[-1] != '=' &&
|
|
(prevCharset.IsEmpty() ||
|
|
(curCharset == prevCharset && curEncoding == prevEncoding))) {
|
|
encodedText.Append(q + 2, R - (q + 2));
|
|
prevCharset = curCharset;
|
|
prevEncoding = curEncoding;
|
|
|
|
begin = r + 2;
|
|
isLastEncodedWord = 1;
|
|
continue;
|
|
}
|
|
|
|
bool bDecoded; // If the current line has been decoded.
|
|
bDecoded = false;
|
|
if (!encodedText.IsEmpty()) {
|
|
if (curCharset == prevCharset && curEncoding == prevEncoding) {
|
|
encodedText.Append(q + 2, R - (q + 2));
|
|
bDecoded = true;
|
|
}
|
|
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
|
|
prevEncoding, prevCharset, aResult);
|
|
if (NS_FAILED(rv)) {
|
|
aResult.Append(encodedText);
|
|
}
|
|
encodedText.Truncate();
|
|
prevCharset.Truncate();
|
|
prevEncoding = '\0';
|
|
}
|
|
if (!bDecoded) {
|
|
rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, curCharset,
|
|
aResult);
|
|
if (NS_FAILED(rv)) {
|
|
aResult.Append(encodedText);
|
|
}
|
|
}
|
|
|
|
begin = r + 2;
|
|
isLastEncodedWord = 1;
|
|
continue;
|
|
|
|
badsyntax:
|
|
if (!encodedText.IsEmpty()) {
|
|
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
|
|
prevEncoding, prevCharset, aResult);
|
|
if (NS_FAILED(rv)) {
|
|
aResult.Append(encodedText);
|
|
}
|
|
encodedText.Truncate();
|
|
prevCharset.Truncate();
|
|
}
|
|
// copy the part before the encoded-word
|
|
aResult.Append(begin, p - begin);
|
|
begin = p;
|
|
isLastEncodedWord = 0;
|
|
}
|
|
|
|
if (!encodedText.IsEmpty()) {
|
|
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
|
|
prevEncoding, prevCharset, aResult);
|
|
if (NS_FAILED(rv)) {
|
|
aResult.Append(encodedText);
|
|
}
|
|
}
|
|
|
|
// put the tail back
|
|
CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
|
|
|
|
nsAutoCString tempStr(aResult);
|
|
tempStr.ReplaceChar('\t', ' ');
|
|
aResult = tempStr;
|
|
|
|
return NS_OK;
|
|
}
|