mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-05 00:25:27 +00:00
70553064dd
--HG-- rename : intl/uconv/src/8859-1.uf => intl/uconv/8859-1.uf rename : intl/uconv/src/8859-1.ut => intl/uconv/8859-1.ut rename : intl/uconv/src/cp1252.uf => intl/uconv/cp1252.uf rename : intl/uconv/src/cp1252.ut => intl/uconv/cp1252.ut rename : intl/uconv/src/macroman.uf => intl/uconv/macroman.uf rename : intl/uconv/src/macroman.ut => intl/uconv/macroman.ut rename : intl/uconv/src/nsCP1252ToUnicode.cpp => intl/uconv/nsCP1252ToUnicode.cpp rename : intl/uconv/src/nsCP1252ToUnicode.h => intl/uconv/nsCP1252ToUnicode.h rename : intl/uconv/src/nsConverterInputStream.cpp => intl/uconv/nsConverterInputStream.cpp rename : intl/uconv/src/nsConverterInputStream.h => intl/uconv/nsConverterInputStream.h rename : intl/uconv/src/nsConverterOutputStream.cpp => intl/uconv/nsConverterOutputStream.cpp rename : intl/uconv/src/nsConverterOutputStream.h => intl/uconv/nsConverterOutputStream.h rename : intl/uconv/src/nsISO88591ToUnicode.cpp => intl/uconv/nsISO88591ToUnicode.cpp rename : intl/uconv/src/nsISO88591ToUnicode.h => intl/uconv/nsISO88591ToUnicode.h rename : intl/uconv/src/nsMacRomanToUnicode.cpp => intl/uconv/nsMacRomanToUnicode.cpp rename : intl/uconv/src/nsMacRomanToUnicode.h => intl/uconv/nsMacRomanToUnicode.h rename : intl/uconv/src/nsReplacementToUnicode.cpp => intl/uconv/nsReplacementToUnicode.cpp rename : intl/uconv/src/nsReplacementToUnicode.h => intl/uconv/nsReplacementToUnicode.h rename : intl/uconv/src/nsScriptableUConv.cpp => intl/uconv/nsScriptableUConv.cpp rename : intl/uconv/src/nsScriptableUConv.h => intl/uconv/nsScriptableUConv.h rename : intl/uconv/src/nsTextToSubURI.cpp => intl/uconv/nsTextToSubURI.cpp rename : intl/uconv/src/nsTextToSubURI.h => intl/uconv/nsTextToSubURI.h rename : intl/uconv/src/nsUConvModule.cpp => intl/uconv/nsUConvModule.cpp rename : intl/uconv/src/nsUTF8ConverterService.cpp => intl/uconv/nsUTF8ConverterService.cpp rename : intl/uconv/src/nsUTF8ConverterService.h => intl/uconv/nsUTF8ConverterService.h rename : intl/uconv/src/nsUTF8ToUnicode.cpp => intl/uconv/nsUTF8ToUnicode.cpp rename : intl/uconv/src/nsUTF8ToUnicode.h => intl/uconv/nsUTF8ToUnicode.h rename : intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp => intl/uconv/nsUTF8ToUnicodeSSE2.cpp rename : intl/uconv/src/nsUnicodeToCP1252.cpp => intl/uconv/nsUnicodeToCP1252.cpp rename : intl/uconv/src/nsUnicodeToCP1252.h => intl/uconv/nsUnicodeToCP1252.h rename : intl/uconv/src/nsUnicodeToISO88591.cpp => intl/uconv/nsUnicodeToISO88591.cpp rename : intl/uconv/src/nsUnicodeToISO88591.h => intl/uconv/nsUnicodeToISO88591.h rename : intl/uconv/src/nsUnicodeToMacRoman.cpp => intl/uconv/nsUnicodeToMacRoman.cpp rename : intl/uconv/src/nsUnicodeToMacRoman.h => intl/uconv/nsUnicodeToMacRoman.h rename : intl/uconv/src/nsUnicodeToUTF8.cpp => intl/uconv/nsUnicodeToUTF8.cpp rename : intl/uconv/src/nsUnicodeToUTF8.h => intl/uconv/nsUnicodeToUTF8.h
122 lines
4.1 KiB
C++
122 lines
4.1 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim:expandtab:shiftwidth=2:tabstop=4:
|
|
*/
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
#include "nsString.h"
|
|
#include "nsUTF8ConverterService.h"
|
|
#include "nsEscape.h"
|
|
#include "nsAutoPtr.h"
|
|
#include "nsIUnicodeDecoder.h"
|
|
#include "mozilla/dom/EncodingUtils.h"
|
|
|
|
using mozilla::dom::EncodingUtils;
|
|
|
|
NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService)
|
|
|
|
static nsresult
|
|
ToUTF8(const nsACString &aString, const char *aCharset,
|
|
bool aAllowSubstitution, nsACString &aResult)
|
|
{
|
|
nsresult rv;
|
|
if (!aCharset || !*aCharset)
|
|
return NS_ERROR_INVALID_ARG;
|
|
|
|
nsDependentCString label(aCharset);
|
|
nsAutoCString encoding;
|
|
if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
|
|
return NS_ERROR_UCONV_NOCONV;
|
|
}
|
|
nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder =
|
|
EncodingUtils::DecoderForEncoding(encoding);
|
|
|
|
if (!aAllowSubstitution)
|
|
unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
|
|
|
|
int32_t srcLen = aString.Length();
|
|
int32_t dstLen;
|
|
const nsAFlatCString& inStr = PromiseFlatCString(aString);
|
|
rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]);
|
|
NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
|
|
|
|
rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen);
|
|
if (NS_SUCCEEDED(rv)){
|
|
// Tru64 Cxx needs an explicit get()
|
|
CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult);
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString,
|
|
const char *aCharset,
|
|
bool aSkipCheck,
|
|
bool aAllowSubstitution,
|
|
uint8_t aOptionalArgc,
|
|
nsACString &aUTF8String)
|
|
{
|
|
bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true;
|
|
|
|
// return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
|
|
// check is requested. It may not be asked for if a caller suspects
|
|
// that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
|
|
// it's in a charset other than UTF-8 that can be mistaken for UTF-8.
|
|
if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
|
|
aUTF8String = aString;
|
|
return NS_OK;
|
|
}
|
|
|
|
aUTF8String.Truncate();
|
|
|
|
nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String);
|
|
|
|
// additional protection for cases where check is skipped and the input
|
|
// is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
|
|
// was wrong.) We don't check ASCIIness assuming there's no charset
|
|
// incompatible with ASCII (we don't support EBCDIC).
|
|
if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
|
|
aUTF8String = aString;
|
|
return NS_OK;
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec,
|
|
const char *aCharset,
|
|
nsACString &aUTF8Spec)
|
|
{
|
|
// assume UTF-8 if the spec contains unescaped non-ASCII characters.
|
|
// No valid spec in Mozilla would break this assumption.
|
|
if (!IsASCII(aSpec)) {
|
|
aUTF8Spec = aSpec;
|
|
return NS_OK;
|
|
}
|
|
|
|
aUTF8Spec.Truncate();
|
|
|
|
nsAutoCString unescapedSpec;
|
|
// NS_UnescapeURL does not fill up unescapedSpec unless there's at least
|
|
// one character to unescape.
|
|
bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(),
|
|
esc_OnlyNonASCII, unescapedSpec);
|
|
|
|
if (!written) {
|
|
aUTF8Spec = aSpec;
|
|
return NS_OK;
|
|
}
|
|
// return if ASCII only or escaped UTF-8
|
|
if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
|
|
aUTF8Spec = unescapedSpec;
|
|
return NS_OK;
|
|
}
|
|
|
|
return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec);
|
|
}
|
|
|