gecko-dev/netwerk/mime/nsMIMEHeaderParamImpl.cpp
Gabriele Svelto 5dc21d568c Bug 1600545 - Remove useless inclusions of header files generated from IDL files in modules/, netwerk/, parser/, security/, startupcache/, storage/, toolkit/, tools/, uriloader/, widget/, xpcom/ and xpfe/ r=Ehsan
The inclusions were removed with the following very crude script and the
resulting breakage was fixed up by hand. The manual fixups did either
revert the changes done by the script, replace a generic header with a more
specific one or replace a header with a forward declaration.

find . -name "*.idl" | grep -v web-platform | grep -v third_party | while read path; do
    interfaces=$(grep "^\(class\|interface\).*:.*" "$path" | cut -d' ' -f2)
    if [ -n "$interfaces" ]; then
        if [[ "$interfaces" == *$'\n'* ]]; then
          regexp="\("
          for i in $interfaces; do regexp="$regexp$i\|"; done
          regexp="${regexp%%\\\|}\)"
        else
          regexp="$interfaces"
        fi
        interface=$(basename "$path")
        rg -l "#include.*${interface%%.idl}.h" . | while read path2; do
            hits=$(grep -v "#include.*${interface%%.idl}.h" "$path2" | grep -c "$regexp" )
            if [ $hits -eq 0 ]; then
                echo "Removing ${interface} from ${path2}"
                grep -v "#include.*${interface%%.idl}.h" "$path2" > "$path2".tmp
                mv -f "$path2".tmp "$path2"
            fi
        done
    fi
done

Differential Revision: https://phabricator.services.mozilla.com/D55444

--HG--
extra : moz-landing-system : lando
2019-12-06 09:17:57 +00:00

1318 lines
41 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set sw=2 ts=8 et tw=80 : */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <string.h>
#include "prprf.h"
#include "plstr.h"
#include "prmem.h"
#include "plbase64.h"
#include "nsCRT.h"
#include "nsMemory.h"
#include "nsTArray.h"
#include "nsCOMPtr.h"
#include "nsEscape.h"
#include "nsMIMEHeaderParamImpl.h"
#include "nsReadableUtils.h"
#include "nsNativeCharsetUtils.h"
#include "nsError.h"
#include "mozilla/Encoding.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
using mozilla::Encoding;
using mozilla::IsAscii;
using mozilla::IsUtf8;
// static functions declared below are moved from mailnews/mime/src/comi18n.cpp
static char* DecodeQ(const char*, uint32_t);
static bool Is7bitNonAsciiString(const char*, uint32_t);
static void CopyRawHeader(const char*, uint32_t, const nsACString&,
nsACString&);
static nsresult DecodeRFC2047Str(const char*, const nsACString&, bool,
nsACString&);
static nsresult internalDecodeParameter(const nsACString&, const nsACString&,
const nsACString&, bool, bool,
nsACString&);
static nsresult ToUTF8(const nsACString& aString, const nsACString& aCharset,
bool aAllowSubstitution, nsACString& aResult) {
if (aCharset.IsEmpty()) {
return NS_ERROR_INVALID_ARG;
}
auto encoding = Encoding::ForLabelNoReplacement(aCharset);
if (!encoding) {
return NS_ERROR_UCONV_NOCONV;
}
if (aAllowSubstitution) {
nsresult rv = encoding->DecodeWithoutBOMHandling(aString, aResult);
if (NS_SUCCEEDED(rv)) {
return NS_OK;
}
return rv;
}
return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aString,
aResult);
}
static nsresult ConvertStringToUTF8(const nsACString& aString,
const nsACString& aCharset, bool aSkipCheck,
bool aAllowSubstitution,
nsACString& aUTF8String) {
// return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
// check is requested. It may not be asked for if a caller suspects
// that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
// it's in a charset other than UTF-8 that can be mistaken for UTF-8.
if (!aSkipCheck && (IsAscii(aString) || IsUtf8(aString))) {
aUTF8String = aString;
return NS_OK;
}
aUTF8String.Truncate();
nsresult rv = ToUTF8(aString, aCharset, aAllowSubstitution, aUTF8String);
// additional protection for cases where check is skipped and the input
// is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
// was wrong.) We don't check ASCIIness assuming there's no charset
// incompatible with ASCII (we don't support EBCDIC).
if (aSkipCheck && NS_FAILED(rv) && IsUtf8(aString)) {
aUTF8String = aString;
return NS_OK;
}
return rv;
}
// XXX The chance of UTF-7 being used in the message header is really
// low, but in theory it's possible.
#define IS_7BIT_NON_ASCII_CHARSET(cset) \
(!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
!nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
!nsCRT::strncasecmp((cset), "UTF-7", 5))
NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
NS_IMETHODIMP
nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
const char* aParamName,
const nsACString& aFallbackCharset,
bool aTryLocaleCharset, char** aLang,
nsAString& aResult) {
return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
const char* aParamName,
const nsACString& aFallbackCharset,
bool aTryLocaleCharset, char** aLang,
nsAString& aResult) {
return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
aFallbackCharset, aTryLocaleCharset, aLang, aResult);
}
/* static */
nsresult nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
const char* aParamName,
nsAString& aResult) {
return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
EmptyCString(), false, nullptr, aResult);
}
// XXX : aTryLocaleCharset is not yet effective.
/* static */
nsresult nsMIMEHeaderParamImpl::DoGetParameter(
const nsACString& aHeaderVal, const char* aParamName,
ParamDecoding aDecoding, const nsACString& aFallbackCharset,
bool aTryLocaleCharset, char** aLang, nsAString& aResult) {
aResult.Truncate();
nsresult rv;
// get parameter (decode RFC 2231/5987 when applicable, as specified by
// aDecoding (5987 being a subset of 2231) and return charset.)
nsCString med;
nsCString charset;
rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
aDecoding, getter_Copies(charset), aLang,
getter_Copies(med));
if (NS_FAILED(rv)) return rv;
// convert to UTF-8 after charset conversion and RFC 2047 decoding
// if necessary.
nsAutoCString str1;
rv = internalDecodeParameter(med, charset, EmptyCString(), false,
// was aDecoding == MIME_FIELD_ENCODING
// see bug 875615
true, str1);
NS_ENSURE_SUCCESS(rv, rv);
if (!aFallbackCharset.IsEmpty()) {
const Encoding* encoding = Encoding::ForLabel(aFallbackCharset);
nsAutoCString str2;
if (NS_SUCCEEDED(ConvertStringToUTF8(str1, aFallbackCharset, false,
encoding != UTF_8_ENCODING, str2))) {
CopyUTF8toUTF16(str2, aResult);
return NS_OK;
}
}
if (IsUtf8(str1)) {
CopyUTF8toUTF16(str1, aResult);
return NS_OK;
}
if (aTryLocaleCharset && !NS_IsNativeUTF8())
return NS_CopyNativeToUnicode(str1, aResult);
CopyASCIItoUTF16(str1, aResult);
return NS_OK;
}
// remove backslash-encoded sequences from quoted-strings
// modifies string in place, potentially shortening it
void RemoveQuotedStringEscapes(char* src) {
char* dst = src;
for (char* c = src; *c; ++c) {
if (c[0] == '\\' && c[1]) {
// skip backslash if not at end
++c;
}
*dst++ = *c;
}
*dst = 0;
}
// true is character is a hex digit
bool IsHexDigit(char aChar) {
char c = aChar;
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') ||
(c >= '0' && c <= '9');
}
// validate that a C String containing %-escapes is syntactically valid
bool IsValidPercentEscaped(const char* aValue, int32_t len) {
for (int32_t i = 0; i < len; i++) {
if (aValue[i] == '%') {
if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
return false;
}
}
}
return true;
}
// Support for continuations (RFC 2231, Section 3)
// only a sane number supported
#define MAX_CONTINUATIONS 999
// part of a continuation
class Continuation {
public:
Continuation(const char* aValue, uint32_t aLength, bool aNeedsPercentDecoding,
bool aWasQuotedString) {
value = aValue;
length = aLength;
needsPercentDecoding = aNeedsPercentDecoding;
wasQuotedString = aWasQuotedString;
}
Continuation() {
// empty constructor needed for nsTArray
value = nullptr;
length = 0;
needsPercentDecoding = false;
wasQuotedString = false;
}
~Continuation() = default;
const char* value;
uint32_t length;
bool needsPercentDecoding;
bool wasQuotedString;
};
// combine segments into a single string, returning the allocated string
// (or nullptr) while emptying the list
char* combineContinuations(nsTArray<Continuation>& aArray) {
// Sanity check
if (aArray.Length() == 0) return nullptr;
// Get an upper bound for the length
uint32_t length = 0;
for (uint32_t i = 0; i < aArray.Length(); i++) {
length += aArray[i].length;
}
// Allocate
char* result = (char*)moz_xmalloc(length + 1);
// Concatenate
*result = '\0';
for (uint32_t i = 0; i < aArray.Length(); i++) {
Continuation cont = aArray[i];
if (!cont.value) break;
char* c = result + strlen(result);
strncat(result, cont.value, cont.length);
if (cont.needsPercentDecoding) {
nsUnescape(c);
}
if (cont.wasQuotedString) {
RemoveQuotedStringEscapes(c);
}
}
// return null if empty value
if (*result == '\0') {
free(result);
result = nullptr;
}
return result;
}
// add a continuation, return false on error if segment already has been seen
bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
const char* aValue, uint32_t aLength,
bool aNeedsPercentDecoding, bool aWasQuotedString) {
if (aIndex < aArray.Length() && aArray[aIndex].value) {
NS_WARNING("duplicate RC2231 continuation segment #\n");
return false;
}
if (aIndex > MAX_CONTINUATIONS) {
NS_WARNING("RC2231 continuation segment # exceeds limit\n");
return false;
}
if (aNeedsPercentDecoding && aWasQuotedString) {
NS_WARNING(
"RC2231 continuation segment can't use percent encoding and quoted "
"string form at the same time\n");
return false;
}
Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
if (aArray.Length() <= aIndex) {
aArray.SetLength(aIndex + 1);
}
aArray[aIndex] = cont;
return true;
}
// parse a segment number; return -1 on error
int32_t parseSegmentNumber(const char* aValue, int32_t aLen) {
if (aLen < 1) {
NS_WARNING("segment number missing\n");
return -1;
}
if (aLen > 1 && aValue[0] == '0') {
NS_WARNING("leading '0' not allowed in segment number\n");
return -1;
}
int32_t segmentNumber = 0;
for (int32_t i = 0; i < aLen; i++) {
if (!(aValue[i] >= '0' && aValue[i] <= '9')) {
NS_WARNING("invalid characters in segment number\n");
return -1;
}
segmentNumber *= 10;
segmentNumber += aValue[i] - '0';
if (segmentNumber > MAX_CONTINUATIONS) {
NS_WARNING("Segment number exceeds sane size\n");
return -1;
}
}
return segmentNumber;
}
// validate a given octet sequence for compliance with the specified
// encoding
bool IsValidOctetSequenceForCharset(const nsACString& aCharset,
const char* aOctets) {
nsAutoCString tmpRaw;
tmpRaw.Assign(aOctets);
nsAutoCString tmpDecoded;
nsresult rv = ConvertStringToUTF8(tmpRaw, aCharset, false, false, tmpDecoded);
if (rv != NS_OK) {
// we can't decode; charset may be unsupported, or the octet sequence
// is broken (illegal or incomplete octet sequence contained)
NS_WARNING(
"RFC2231/5987 parameter value does not decode according to specified "
"charset\n");
return false;
}
return true;
}
// moved almost verbatim from mimehdrs.cpp
// char *
// MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
// char **charset, char **language)
//
// The format of these header lines is
// <token> [ ';' <token> '=' <token-or-quoted-string> ]*
NS_IMETHODIMP
nsMIMEHeaderParamImpl::GetParameterInternal(const char* aHeaderValue,
const char* aParamName,
char** aCharset, char** aLang,
char** aResult) {
return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
aCharset, aLang, aResult);
}
/* static */
nsresult nsMIMEHeaderParamImpl::DoParameterInternal(
const char* aHeaderValue, const char* aParamName, ParamDecoding aDecoding,
char** aCharset, char** aLang, char** aResult) {
if (!aHeaderValue || !*aHeaderValue || !aResult) return NS_ERROR_INVALID_ARG;
*aResult = nullptr;
if (aCharset) *aCharset = nullptr;
if (aLang) *aLang = nullptr;
nsAutoCString charset;
// change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
// them for HTTP header fields later on, see bug 776324
bool acceptContinuations = true;
const char* str = aHeaderValue;
// skip leading white space.
for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
;
const char* start = str;
// aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
// For instance, return 'inline' in the following case:
// Content-Disposition: inline; filename=.....
if (!aParamName || !*aParamName) {
for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
;
if (str == start) return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
*aResult = (char*)moz_xmemdup(start, (str - start) + 1);
(*aResult)[str - start] = '\0'; // null-terminate
return NS_OK;
}
/* Skip forward to first ';' */
for (; *str && *str != ';' && *str != ','; ++str)
;
if (*str) str++;
/* Skip over following whitespace */
for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
;
// Some broken http servers just specify parameters
// like 'filename' without specifying disposition
// method. Rewind to the first non-white-space
// character.
if (!*str) str = start;
// RFC2231 - The legitimate parm format can be:
// A. title=ThisIsTitle
// B. title*=us-ascii'en-us'This%20is%20wierd.
// C. title*0*=us-ascii'en'This%20is%20wierd.%20We
// title*1*=have%20to%20support%20this.
// title*2="Else..."
// D. title*0="Hey, what you think you are doing?"
// title*1="There is no charset and lang info."
// RFC5987: only A and B
// collect results for the different algorithms (plain filename,
// RFC5987/2231-encoded filename, + continuations) separately and decide
// which to use at the end
char* caseAResult = nullptr;
char* caseBResult = nullptr;
char* caseCDResult = nullptr;
// collect continuation segments
nsTArray<Continuation> segments;
// our copies of the charset parameter, kept separately as they might
// differ for the two formats
nsDependentCSubstring charsetB, charsetCD;
nsDependentCSubstring lang;
int32_t paramLen = strlen(aParamName);
while (*str) {
// find name/value
const char* nameStart = str;
const char* nameEnd = nullptr;
const char* valueStart = nullptr;
const char* valueEnd = nullptr;
bool isQuotedString = false;
NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
// Skip forward to the end of this token.
for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';';
str++)
;
nameEnd = str;
int32_t nameLen = nameEnd - nameStart;
// Skip over whitespace, '=', and whitespace
while (nsCRT::IsAsciiSpace(*str)) ++str;
if (!*str) {
break;
}
if (*str != '=') {
// don't accept parameters without "="
goto increment_str;
}
// Skip over '=' only if it was actually there
str++;
while (nsCRT::IsAsciiSpace(*str)) ++str;
if (*str != '"') {
// The value is a token, not a quoted string.
valueStart = str;
for (valueEnd = str;
*valueEnd && !nsCRT::IsAsciiSpace(*valueEnd) && *valueEnd != ';';
valueEnd++)
;
str = valueEnd;
} else {
isQuotedString = true;
++str;
valueStart = str;
for (valueEnd = str; *valueEnd; ++valueEnd) {
if (*valueEnd == '\\' && *(valueEnd + 1))
++valueEnd;
else if (*valueEnd == '"')
break;
}
str = valueEnd;
// *valueEnd != null means that *valueEnd is quote character.
if (*valueEnd) str++;
}
// See if this is the simplest case (case A above),
// a 'single' line value with no charset and lang.
// If so, copy it and return.
if (nameLen == paramLen &&
!nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
if (caseAResult) {
// we already have one caseA result, ignore subsequent ones
goto increment_str;
}
// if the parameter spans across multiple lines we have to strip out the
// line continuation -- jht 4/29/98
nsAutoCString tempStr(valueStart, valueEnd - valueStart);
tempStr.StripCRLF();
char* res = ToNewCString(tempStr);
NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
if (isQuotedString) RemoveQuotedStringEscapes(res);
caseAResult = res;
// keep going, we may find a RFC 2231/5987 encoded alternative
}
// case B, C, and D
else if (nameLen > paramLen &&
!nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
*(nameStart + paramLen) == '*') {
// 1st char past '*'
const char* cp = nameStart + paramLen + 1;
// if param name ends in "*" we need do to RFC5987 "ext-value" decoding
bool needExtDecoding = *(nameEnd - 1) == '*';
bool caseB = nameLen == paramLen + 1;
bool caseCStart = (*cp == '0') && needExtDecoding;
// parse the segment number
int32_t segmentNumber = -1;
if (!caseB) {
int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
segmentNumber = parseSegmentNumber(cp, segLen);
if (segmentNumber == -1) {
acceptContinuations = false;
goto increment_str;
}
}
// CaseB and start of CaseC: requires charset and optional language
// in quotes (quotes required even if lang is blank)
if (caseB || (caseCStart && acceptContinuations)) {
// look for single quotation mark(')
const char* sQuote1 = PL_strchr(valueStart, 0x27);
const char* sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
// Two single quotation marks must be present even in
// absence of charset and lang.
if (!sQuote1 || !sQuote2) {
NS_WARNING(
"Mandatory two single quotes are missing in header parameter\n");
}
const char* charsetStart = nullptr;
int32_t charsetLength = 0;
const char* langStart = nullptr;
int32_t langLength = 0;
const char* rawValStart = nullptr;
int32_t rawValLength = 0;
if (sQuote2 && sQuote1) {
// both delimiters present: charSet'lang'rawVal
rawValStart = sQuote2 + 1;
rawValLength = valueEnd - rawValStart;
langStart = sQuote1 + 1;
langLength = sQuote2 - langStart;
charsetStart = valueStart;
charsetLength = sQuote1 - charsetStart;
} else if (sQuote1) {
// one delimiter; assume charset'rawVal
rawValStart = sQuote1 + 1;
rawValLength = valueEnd - rawValStart;
charsetStart = valueStart;
charsetLength = sQuote1 - valueStart;
} else {
// no delimiter: just rawVal
rawValStart = valueStart;
rawValLength = valueEnd - valueStart;
}
if (langLength != 0) {
lang.Assign(langStart, langLength);
}
// keep the charset for later
if (caseB) {
charsetB.Assign(charsetStart, charsetLength);
} else {
// if caseCorD
charsetCD.Assign(charsetStart, charsetLength);
}
// non-empty value part
if (rawValLength > 0) {
if (!caseBResult && caseB) {
if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
goto increment_str;
}
// allocate buffer for the raw value
char* tmpResult = (char*)moz_xmemdup(rawValStart, rawValLength + 1);
*(tmpResult + rawValLength) = 0;
nsUnescape(tmpResult);
caseBResult = tmpResult;
} else {
// caseC
bool added = addContinuation(segments, 0, rawValStart, rawValLength,
needExtDecoding, isQuotedString);
if (!added) {
// continuation not added, stop processing them
acceptContinuations = false;
}
}
}
} // end of if-block : title*0*= or title*=
// caseD: a line of multiline param with no need for unescaping :
// title*[0-9]= or 2nd or later lines of a caseC param : title*[1-9]*=
else if (acceptContinuations && segmentNumber != -1) {
uint32_t valueLength = valueEnd - valueStart;
bool added =
addContinuation(segments, segmentNumber, valueStart, valueLength,
needExtDecoding, isQuotedString);
if (!added) {
// continuation not added, stop processing them
acceptContinuations = false;
}
} // end of if-block : title*[0-9]= or title*[1-9]*=
}
// str now points after the end of the value.
// skip over whitespace, ';', whitespace.
increment_str:
while (nsCRT::IsAsciiSpace(*str)) ++str;
if (*str == ';') {
++str;
} else {
// stop processing the header field; either we are done or the
// separator was missing
break;
}
while (nsCRT::IsAsciiSpace(*str)) ++str;
}
caseCDResult = combineContinuations(segments);
if (caseBResult && !charsetB.IsEmpty()) {
// check that the 2231/5987 result decodes properly given the
// specified character set
if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
caseBResult = nullptr;
}
if (caseCDResult && !charsetCD.IsEmpty()) {
// check that the 2231/5987 result decodes properly given the
// specified character set
if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
caseCDResult = nullptr;
}
if (caseBResult) {
// prefer simple 5987 format over 2231 with continuations
*aResult = caseBResult;
caseBResult = nullptr;
charset.Assign(charsetB);
} else if (caseCDResult) {
// prefer 2231/5987 with or without continuations over plain format
*aResult = caseCDResult;
caseCDResult = nullptr;
charset.Assign(charsetCD);
} else if (caseAResult) {
*aResult = caseAResult;
caseAResult = nullptr;
}
// free unused stuff
free(caseAResult);
free(caseBResult);
free(caseCDResult);
// if we have a result
if (*aResult) {
// then return charset and lang as well
if (aLang && !lang.IsEmpty()) {
uint32_t len = lang.Length();
*aLang = (char*)moz_xmemdup(lang.BeginReading(), len + 1);
*(*aLang + len) = 0;
}
if (aCharset && !charset.IsEmpty()) {
uint32_t len = charset.Length();
*aCharset = (char*)moz_xmemdup(charset.BeginReading(), len + 1);
*(*aCharset + len) = 0;
}
}
return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
}
nsresult internalDecodeRFC2047Header(const char* aHeaderVal,
const nsACString& aDefaultCharset,
bool aOverrideCharset,
bool aEatContinuations,
nsACString& aResult) {
aResult.Truncate();
if (!aHeaderVal) return NS_ERROR_INVALID_ARG;
if (!*aHeaderVal) return NS_OK;
// If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
// aDefaultCharset is specified, decodes RFC 2047 encoding and converts
// to UTF-8. Otherwise, just strips away CRLF.
if (PL_strstr(aHeaderVal, "=?") ||
(!aDefaultCharset.IsEmpty() &&
(!IsUtf8(nsDependentCString(aHeaderVal)) ||
Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
} else if (aEatContinuations &&
(PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
aResult = aHeaderVal;
} else {
aEatContinuations = false;
aResult = aHeaderVal;
}
if (aEatContinuations) {
nsAutoCString temp(aResult);
temp.ReplaceSubstring("\n\t", " ");
temp.ReplaceSubstring("\r\t", " ");
temp.StripCRLF();
aResult = temp;
}
return NS_OK;
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
const char* aDefaultCharset,
bool aOverrideCharset,
bool aEatContinuations,
nsACString& aResult) {
return internalDecodeRFC2047Header(aHeaderVal, nsCString(aDefaultCharset),
aOverrideCharset, aEatContinuations,
aResult);
}
// true if the character is allowed in a RFC 5987 value
// see RFC 5987, Section 3.2.1, "attr-char"
bool IsRFC5987AttrChar(char aChar) {
char c = aChar;
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
(c == '!' || c == '#' || c == '$' || c == '&' || c == '+' ||
c == '-' || c == '.' || c == '^' || c == '_' || c == '`' ||
c == '|' || c == '~');
}
// percent-decode a value
// returns false on failure
bool PercentDecode(nsACString& aValue) {
char* c = (char*)moz_xmalloc(aValue.Length() + 1);
strcpy(c, PromiseFlatCString(aValue).get());
nsUnescape(c);
aValue.Assign(c);
free(c);
return true;
}
// Decode a parameter value using the encoding defined in RFC 5987
//
// charset "'" [ language ] "'" value-chars
NS_IMETHODIMP
nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
nsACString& aLang,
nsAString& aResult) {
nsAutoCString charset;
nsAutoCString language;
nsAutoCString value;
uint32_t delimiters = 0;
const nsCString& encoded = PromiseFlatCString(aParamVal);
const char* c = encoded.get();
while (*c) {
char tc = *c++;
if (tc == '\'') {
// single quote
delimiters++;
} else if (((unsigned char)tc) >= 128) {
// fail early, not ASCII
NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
return NS_ERROR_INVALID_ARG;
} else {
if (delimiters == 0) {
// valid characters are checked later implicitly
charset.Append(tc);
} else if (delimiters == 1) {
// no value checking for now
language.Append(tc);
} else if (delimiters == 2) {
if (IsRFC5987AttrChar(tc)) {
value.Append(tc);
} else if (tc == '%') {
if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
// we expect two more characters
NS_WARNING("broken %-escape in RFC5987-encoded param");
return NS_ERROR_INVALID_ARG;
}
value.Append(tc);
// we consume two more
value.Append(*c++);
value.Append(*c++);
} else {
// character not allowed here
NS_WARNING("invalid character in RFC5987-encoded param");
return NS_ERROR_INVALID_ARG;
}
}
}
}
if (delimiters != 2) {
NS_WARNING("missing delimiters in RFC5987-encoded param");
return NS_ERROR_INVALID_ARG;
}
// abort early for unsupported encodings
if (!charset.LowerCaseEqualsLiteral("utf-8")) {
NS_WARNING("unsupported charset in RFC5987-encoded param");
return NS_ERROR_INVALID_ARG;
}
// percent-decode
if (!PercentDecode(value)) {
return NS_ERROR_OUT_OF_MEMORY;
}
// return the encoding
aLang.Assign(language);
// finally convert octet sequence to UTF-8 and be done
nsAutoCString utf8;
nsresult rv = ConvertStringToUTF8(value, charset, true, false, utf8);
NS_ENSURE_SUCCESS(rv, rv);
CopyUTF8toUTF16(utf8, aResult);
return NS_OK;
}
nsresult internalDecodeParameter(const nsACString& aParamValue,
const nsACString& aCharset,
const nsACString& aDefaultCharset,
bool aOverrideCharset, bool aDecode2047,
nsACString& aResult) {
aResult.Truncate();
// If aCharset is given, aParamValue was obtained from RFC2231/5987
// encoding and we're pretty sure that it's in aCharset.
if (!aCharset.IsEmpty()) {
return ConvertStringToUTF8(aParamValue, aCharset, true, true, aResult);
}
const nsCString& param = PromiseFlatCString(aParamValue);
nsAutoCString unQuoted;
nsACString::const_iterator s, e;
param.BeginReading(s);
param.EndReading(e);
// strip '\' when used to quote CR, LF, '"' and '\'
for (; s != e; ++s) {
if ((*s == '\\')) {
if (++s == e) {
--s; // '\' is at the end. move back and append '\'.
} else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' &&
*s != '\\') {
--s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
}
// else : skip '\' and append the quoted character.
}
unQuoted.Append(*s);
}
aResult = unQuoted;
nsresult rv = NS_OK;
if (aDecode2047) {
nsAutoCString decoded;
// Try RFC 2047 encoding, instead.
rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
aOverrideCharset, true, decoded);
if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) aResult = decoded;
}
return rv;
}
NS_IMETHODIMP
nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
const char* aCharset,
const char* aDefaultCharset,
bool aOverrideCharset,
nsACString& aResult) {
return internalDecodeParameter(aParamValue, nsCString(aCharset),
nsCString(aDefaultCharset), aOverrideCharset,
true, aResult);
}
#define ISHEXCHAR(c) \
((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
(0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
(0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
// Decode Q encoding (RFC 2047).
// static
char* DecodeQ(const char* in, uint32_t length) {
char *out, *dest = nullptr;
out = dest = (char*)calloc(length + 1, sizeof(char));
if (dest == nullptr) return nullptr;
while (length > 0) {
unsigned c = 0;
switch (*in) {
case '=':
// check if |in| in the form of '=hh' where h is [0-9a-fA-F].
if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
goto badsyntax;
PR_sscanf(in + 1, "%2X", &c);
*out++ = (char)c;
in += 3;
length -= 3;
break;
case '_':
*out++ = ' ';
in++;
length--;
break;
default:
if (*in & 0x80) goto badsyntax;
*out++ = *in++;
length--;
}
}
*out++ = '\0';
for (out = dest; *out; ++out) {
if (*out == '\t') *out = ' ';
}
return dest;
badsyntax:
free(dest);
return nullptr;
}
// check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
// or has ESC which may be an indication that it's in one of many ISO
// 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
// static
bool Is7bitNonAsciiString(const char* input, uint32_t len) {
int32_t c;
enum {
hz_initial, // No HZ seen yet
hz_escaped, // Inside an HZ ~{ escape sequence
hz_seen, // Have seen at least one complete HZ sequence
hz_notpresent // Have seen something that is not legal HZ
} hz_state;
hz_state = hz_initial;
while (len) {
c = uint8_t(*input++);
len--;
if (c & 0x80) return false;
if (c == 0x1B) return true;
if (c == '~') {
switch (hz_state) {
case hz_initial:
case hz_seen:
if (*input == '{') {
hz_state = hz_escaped;
} else if (*input == '~') {
// ~~ is the HZ encoding of ~. Skip over second ~ as well
hz_state = hz_seen;
input++;
len--;
} else {
hz_state = hz_notpresent;
}
break;
case hz_escaped:
if (*input == '}') hz_state = hz_seen;
break;
default:
break;
}
}
}
return hz_state == hz_seen;
}
#define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
// copy 'raw' sequences of octets in aInput to aOutput.
// If aDefaultCharset is specified, the input is assumed to be in the
// charset and converted to UTF-8. Otherwise, a blind copy is made.
// If aDefaultCharset is specified, but the conversion to UTF-8
// is not successful, each octet is replaced by Unicode replacement
// chars. *aOutput is advanced by the number of output octets.
// static
void CopyRawHeader(const char* aInput, uint32_t aLen,
const nsACString& aDefaultCharset, nsACString& aOutput) {
int32_t c;
// If aDefaultCharset is not specified, make a blind copy.
if (aDefaultCharset.IsEmpty()) {
aOutput.Append(aInput, aLen);
return;
}
// Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
// A ~ may indicate it is HZ
while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
aOutput.Append(char(c));
aLen--;
}
if (!aLen) {
return;
}
aInput--;
// skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
// string and aDefaultCharset is a 7bit non-ascii charset.
bool skipCheck =
(c == 0x1B || c == '~') &&
IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aDefaultCharset).get());
// If not UTF-8, treat as default charset
nsAutoCString utf8Text;
if (NS_SUCCEEDED(ConvertStringToUTF8(Substring(aInput, aInput + aLen),
PromiseFlatCString(aDefaultCharset),
skipCheck, true, utf8Text))) {
aOutput.Append(utf8Text);
} else { // replace each octet with Unicode replacement char in UTF-8.
for (uint32_t i = 0; i < aLen; i++) {
c = uint8_t(*aInput++);
if (c & 0x80)
aOutput.Append(REPLACEMENT_CHAR);
else
aOutput.Append(char(c));
}
}
}
nsresult DecodeQOrBase64Str(const char* aEncoded, size_t aLen, char aQOrBase64,
const nsACString& aCharset, nsACString& aResult) {
char* decodedText;
bool b64alloc = false;
NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
if (aQOrBase64 == 'Q')
decodedText = DecodeQ(aEncoded, aLen);
else if (aQOrBase64 == 'B') {
decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
b64alloc = true;
} else {
return NS_ERROR_INVALID_ARG;
}
if (!decodedText) {
return NS_ERROR_INVALID_ARG;
}
nsAutoCString utf8Text;
// skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
nsresult rv = ConvertStringToUTF8(
nsDependentCString(decodedText), aCharset,
IS_7BIT_NON_ASCII_CHARSET(PromiseFlatCString(aCharset).get()), true,
utf8Text);
if (b64alloc) {
PR_Free(decodedText);
} else {
free(decodedText);
}
if (NS_FAILED(rv)) {
return rv;
}
aResult.Append(utf8Text);
return NS_OK;
}
static const char especials[] = R"(()<>@,;:\"/[]?.=)";
// |decode_mime_part2_str| taken from comi18n.c
// Decode RFC2047-encoded words in the input and convert the result to UTF-8.
// If aOverrideCharset is true, charset in RFC2047-encoded words is
// ignored and aDefaultCharset is assumed, instead. aDefaultCharset
// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
// static
nsresult DecodeRFC2047Str(const char* aHeader,
const nsACString& aDefaultCharset,
bool aOverrideCharset, nsACString& aResult) {
const char *p, *q = nullptr, *r;
const char* begin; // tracking pointer for where we are in the input buffer
int32_t isLastEncodedWord = 0;
const char *charsetStart, *charsetEnd;
nsAutoCString prevCharset, curCharset;
nsAutoCString encodedText;
char prevEncoding = '\0', curEncoding;
nsresult rv;
begin = aHeader;
// To avoid buffer realloc, if possible, set capacity in advance. No
// matter what, more than 3x expansion can never happen for all charsets
// supported by Mozilla. SCSU/BCSU with the sliding window set to a
// non-BMP block may be exceptions, but Mozilla does not support them.
// Neither any known mail/news program use them. Even if there's, we're
// safe because we don't use a raw *char any more.
aResult.SetCapacity(3 * strlen(aHeader));
while ((p = PL_strstr(begin, "=?")) != nullptr) {
if (isLastEncodedWord) {
// See if it's all whitespace.
for (q = begin; q < p; ++q) {
if (!PL_strchr(" \t\r\n", *q)) break;
}
}
if (!isLastEncodedWord || q < p) {
if (!encodedText.IsEmpty()) {
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
prevEncoding, prevCharset, aResult);
if (NS_FAILED(rv)) {
aResult.Append(encodedText);
}
encodedText.Truncate();
prevCharset.Truncate();
prevEncoding = '\0';
}
// copy the part before the encoded-word
CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
begin = p;
}
p += 2;
// Get charset info
charsetStart = p;
charsetEnd = nullptr;
for (q = p; *q != '?'; q++) {
if (*q <= ' ' || PL_strchr(especials, *q)) {
goto badsyntax;
}
// RFC 2231 section 5
if (!charsetEnd && *q == '*') {
charsetEnd = q;
}
}
if (!charsetEnd) {
charsetEnd = q;
}
q++;
curEncoding = nsCRT::ToUpper(*q);
if (curEncoding != 'Q' && curEncoding != 'B') goto badsyntax;
if (q[1] != '?') goto badsyntax;
// loop-wise, keep going until we hit "?=". the inner check handles the
// nul terminator should the string terminate before we hit the right
// marker. (And the r[1] will never reach beyond the end of the string
// because *r != '?' is true if r is the nul character.)
for (r = q + 2; *r != '?' || r[1] != '='; r++) {
if (*r < ' ') goto badsyntax;
}
if (r == q + 2) {
// it's empty, skip
begin = r + 2;
isLastEncodedWord = 1;
continue;
}
curCharset.Assign(charsetStart, charsetEnd - charsetStart);
// Override charset if requested. Never override labeled UTF-8.
// Use default charset instead of UNKNOWN-8BIT
if ((aOverrideCharset &&
0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) ||
(!aDefaultCharset.IsEmpty() &&
0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))) {
curCharset = aDefaultCharset;
}
const char* R;
R = r;
if (curEncoding == 'B') {
// bug 227290. ignore an extraneous '=' at the end.
// (# of characters in B-encoded part has to be a multiple of 4)
int32_t n = r - (q + 2);
R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
}
// Bug 493544. Don't decode the encoded text until it ends
if (R[-1] != '=' &&
(prevCharset.IsEmpty() ||
(curCharset == prevCharset && curEncoding == prevEncoding))) {
encodedText.Append(q + 2, R - (q + 2));
prevCharset = curCharset;
prevEncoding = curEncoding;
begin = r + 2;
isLastEncodedWord = 1;
continue;
}
bool bDecoded; // If the current line has been decoded.
bDecoded = false;
if (!encodedText.IsEmpty()) {
if (curCharset == prevCharset && curEncoding == prevEncoding) {
encodedText.Append(q + 2, R - (q + 2));
bDecoded = true;
}
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
prevEncoding, prevCharset, aResult);
if (NS_FAILED(rv)) {
aResult.Append(encodedText);
}
encodedText.Truncate();
prevCharset.Truncate();
prevEncoding = '\0';
}
if (!bDecoded) {
rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, curCharset,
aResult);
if (NS_FAILED(rv)) {
aResult.Append(encodedText);
}
}
begin = r + 2;
isLastEncodedWord = 1;
continue;
badsyntax:
if (!encodedText.IsEmpty()) {
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
prevEncoding, prevCharset, aResult);
if (NS_FAILED(rv)) {
aResult.Append(encodedText);
}
encodedText.Truncate();
prevCharset.Truncate();
}
// copy the part before the encoded-word
aResult.Append(begin, p - begin);
begin = p;
isLastEncodedWord = 0;
}
if (!encodedText.IsEmpty()) {
rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
prevEncoding, prevCharset, aResult);
if (NS_FAILED(rv)) {
aResult.Append(encodedText);
}
}
// put the tail back
CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
nsAutoCString tempStr(aResult);
tempStr.ReplaceChar('\t', ' ');
aResult = tempStr;
return NS_OK;
}