mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-10 01:08:21 +00:00
Bug 1072187 - Remove HZ-GB-2312. r=emk.
This commit is contained in:
parent
c02f4630b6
commit
eac446fa48
@ -56,6 +56,8 @@ EncodingUtils::FindEncodingForLabelNoReplacement(const nsACString& aLabel,
|
||||
bool
|
||||
EncodingUtils::IsAsciiCompatible(const nsACString& aPreferredName)
|
||||
{
|
||||
// HZ and UTF-7 are no longer in mozilla-central, but keeping them here
|
||||
// just in case for the benefit of comm-central.
|
||||
return !(aPreferredName.LowerCaseEqualsLiteral("utf-16") ||
|
||||
aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
|
||||
aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
|
||||
|
@ -10,7 +10,6 @@ EUC-JP=ja
|
||||
EUC-KR=ko
|
||||
gb18030=zh-CN
|
||||
gbk=zh-CN
|
||||
HZ-GB-2312=zh-CN
|
||||
IBM866=x-cyrillic
|
||||
ISO-2022-JP=ja
|
||||
ISO-8859-3=x-western
|
||||
|
@ -50,9 +50,7 @@ UNIFIED_SOURCES += [
|
||||
UNIFIED_SOURCES += [
|
||||
'ucvcn/nsGBKConvUtil.cpp',
|
||||
'ucvcn/nsGBKToUnicode.cpp',
|
||||
'ucvcn/nsHZToUnicode.cpp',
|
||||
'ucvcn/nsUnicodeToGBK.cpp',
|
||||
'ucvcn/nsUnicodeToHZ.cpp',
|
||||
]
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
|
@ -135,6 +135,9 @@ NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(
|
||||
|
||||
static bool statefulCharset(const char *charset)
|
||||
{
|
||||
// HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
|
||||
// mozilla-central but keeping them here just in case for the benefit of
|
||||
// comm-central.
|
||||
if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
|
||||
!nsCRT::strcasecmp(charset, "UTF-7") ||
|
||||
!nsCRT::strcasecmp(charset, "HZ-GB-2312"))
|
||||
|
@ -122,8 +122,6 @@
|
||||
|
||||
// ucvcn
|
||||
#include "nsUCvCnCID.h"
|
||||
#include "nsHZToUnicode.h"
|
||||
#include "nsUnicodeToHZ.h"
|
||||
#include "nsGBKToUnicode.h"
|
||||
#include "nsUnicodeToGBK.h"
|
||||
#include "gbku.h"
|
||||
@ -193,7 +191,6 @@ NS_UCONV_REG_UNREG("EUC-KR", NS_EUCKRTOUNICODE_CID, NS_UNICODETOEUCKR_CID)
|
||||
|
||||
// ucvcn
|
||||
NS_UCONV_REG_UNREG("gbk", NS_GBKTOUNICODE_CID, NS_UNICODETOGBK_CID)
|
||||
NS_UCONV_REG_UNREG("HZ-GB-2312", NS_HZTOUNICODE_CID, NS_UNICODETOHZ_CID)
|
||||
NS_UCONV_REG_UNREG("gb18030", NS_GB18030TOUNICODE_CID, NS_UNICODETOGB18030_CID)
|
||||
|
||||
NS_CONVERTER_REGISTRY_END
|
||||
@ -222,8 +219,6 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToISO2022JP)
|
||||
|
||||
// ucvcn
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToGBK)
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsHZToUnicode)
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToHZ)
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsGB18030ToUnicode)
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToGB18030)
|
||||
|
||||
@ -388,8 +383,6 @@ NS_DEFINE_NAMED_CID(NS_EUCKRTOUNICODE_CID);
|
||||
NS_DEFINE_NAMED_CID(NS_UNICODETOEUCKR_CID);
|
||||
NS_DEFINE_NAMED_CID(NS_GBKTOUNICODE_CID);
|
||||
NS_DEFINE_NAMED_CID(NS_UNICODETOGBK_CID);
|
||||
NS_DEFINE_NAMED_CID(NS_HZTOUNICODE_CID);
|
||||
NS_DEFINE_NAMED_CID(NS_UNICODETOHZ_CID);
|
||||
NS_DEFINE_NAMED_CID(NS_GB18030TOUNICODE_CID);
|
||||
NS_DEFINE_NAMED_CID(NS_UNICODETOGB18030_CID);
|
||||
|
||||
@ -494,8 +487,6 @@ static const mozilla::Module::CIDEntry kUConvCIDs[] = {
|
||||
{ &kNS_UNICODETOEUCKR_CID, false, nullptr, nsUnicodeToCP949Constructor },
|
||||
{ &kNS_GBKTOUNICODE_CID, false, nullptr, nsGB18030ToUnicodeConstructor },
|
||||
{ &kNS_UNICODETOGBK_CID, false, nullptr, nsUnicodeToGBKConstructor },
|
||||
{ &kNS_HZTOUNICODE_CID, false, nullptr, nsHZToUnicodeConstructor },
|
||||
{ &kNS_UNICODETOHZ_CID, false, nullptr, nsUnicodeToHZConstructor },
|
||||
{ &kNS_GB18030TOUNICODE_CID, false, nullptr, nsGB18030ToUnicodeConstructor },
|
||||
{ &kNS_UNICODETOGB18030_CID, false, nullptr, nsUnicodeToGB18030Constructor },
|
||||
{ nullptr },
|
||||
@ -602,8 +593,6 @@ static const mozilla::Module::ContractIDEntry kUConvContracts[] = {
|
||||
{ NS_UNICODEENCODER_CONTRACTID_BASE "EUC-KR", &kNS_UNICODETOEUCKR_CID },
|
||||
{ NS_UNICODEDECODER_CONTRACTID_BASE "gbk", &kNS_GBKTOUNICODE_CID },
|
||||
{ NS_UNICODEENCODER_CONTRACTID_BASE "gbk", &kNS_UNICODETOGBK_CID },
|
||||
{ NS_UNICODEDECODER_CONTRACTID_BASE "HZ-GB-2312", &kNS_HZTOUNICODE_CID },
|
||||
{ NS_UNICODEENCODER_CONTRACTID_BASE "HZ-GB-2312", &kNS_UNICODETOHZ_CID },
|
||||
{ NS_UNICODEDECODER_CONTRACTID_BASE "gb18030", &kNS_GB18030TOUNICODE_CID },
|
||||
{ NS_UNICODEENCODER_CONTRACTID_BASE "gb18030", &kNS_UNICODETOGB18030_CID },
|
||||
{ nullptr }
|
||||
|
@ -1,30 +0,0 @@
|
||||
/* Tests conversion from Unicode to HZ-GB-2312 (bug 367026)
|
||||
*
|
||||
* Notes:
|
||||
* HZ-GB-2312 is a 7-bit encoding of the GB2312 simplified Chinese character
|
||||
* set. It uses the escape sequences "~{" to mark the start of GB encoded text
|
||||
* and "~}" to mark the end.
|
||||
*
|
||||
* See http://www.ietf.org/rfc/rfc1843.txt
|
||||
*/
|
||||
|
||||
load('CharsetConversionTests.js');
|
||||
|
||||
const inASCII = "Hello World";
|
||||
const inHanzi = "\u4E00";
|
||||
const inMixed = "Hello \u4E00 World";
|
||||
|
||||
const expectedASCII = "Hello World";
|
||||
const expectedHanzi = "~{R;~}";
|
||||
const expectedMixed = "Hello ~{R;~} World";
|
||||
|
||||
const charset = "HZ-GB-2312";
|
||||
|
||||
function run_test() {
|
||||
var converter = CreateScriptableConverter();
|
||||
converter.isInternal = true;
|
||||
|
||||
checkEncode(converter, charset, inASCII, expectedASCII);
|
||||
checkEncode(converter, charset, inMixed, expectedMixed);
|
||||
checkEncode(converter, charset, inHanzi, expectedHanzi);
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
const charset = "HZ-GB-2312";
|
||||
|
||||
function dumpStrings(inString, outString) {
|
||||
var dispIn = "";
|
||||
var dispOut = "";
|
||||
var i;
|
||||
for (i = 0; i < inString.length; ++i) {
|
||||
dispIn += " x" + inString.charCodeAt(i).toString(16);
|
||||
}
|
||||
if (outString.length == 0) {
|
||||
dispOut = "<empty>";
|
||||
} else {
|
||||
for (i = 0; i < outString.length; ++i) {
|
||||
dispOut += " x" + outString.charCodeAt(i).toString(16);
|
||||
}
|
||||
}
|
||||
dump("\"" + dispIn + "\" ==> \"" + dispOut + "\"\n");
|
||||
}
|
||||
|
||||
function error(inString, outString, msg){
|
||||
dumpStrings(inString, outString);
|
||||
do_throw("security risk: " + msg);
|
||||
}
|
||||
|
||||
function run_test() {
|
||||
var ScriptableUnicodeConverter =
|
||||
Components.Constructor("@mozilla.org/intl/scriptableunicodeconverter",
|
||||
"nsIScriptableUnicodeConverter");
|
||||
|
||||
var converter = new ScriptableUnicodeConverter();
|
||||
converter.isInternal = true;
|
||||
converter.charset = charset;
|
||||
|
||||
var leadByte, trailByte;
|
||||
var inString;
|
||||
for (leadByte = 1; leadByte < 0x100; ++leadByte) {
|
||||
for (trailByte = 1; trailByte < 0x100; ++trailByte) {
|
||||
if (leadByte == 0x7e) {
|
||||
if (trailByte == 0x7b ||
|
||||
trailByte == 0xa ||
|
||||
trailByte == 0x7e) {
|
||||
// ignore escape sequences:
|
||||
// ~{ (sets HZ-GB mode)
|
||||
// ~\n ( ==> \n)
|
||||
// ~~ ( ==> ~)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
inString = String.fromCharCode(leadByte, trailByte, 65);
|
||||
var outString = converter.ConvertToUnicode(inString) +
|
||||
converter.Finish();
|
||||
switch (outString.length) {
|
||||
case 1:
|
||||
error(inString, outString, "2 byte sequence eaten");
|
||||
break;
|
||||
case 2:
|
||||
if (outString.charCodeAt(0) < 0x80 &&
|
||||
outString.charCodeAt(1) < 0x80) {
|
||||
error(inString, outString,
|
||||
"2 byte sequence converted to 1 ASCII");
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (outString != inString &&
|
||||
outString.charCodeAt(0) < 0x80 &&
|
||||
outString.charCodeAt(1) < 0x80) {
|
||||
error(inString, outString,
|
||||
"2 byte sequence converted to 2 ASCII");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -14,13 +14,11 @@ support-files =
|
||||
[test_bug321379.js]
|
||||
[test_bug340714.js]
|
||||
[test_bug365345.js]
|
||||
[test_bug367026.js]
|
||||
[test_bug381412.Big5-HKSCS.js]
|
||||
[test_bug381412.Big5.js]
|
||||
[test_bug381412.euc-kr.js]
|
||||
[test_bug381412.euc_jp.js]
|
||||
[test_bug381412.gb2312.js]
|
||||
[test_bug381412.hk.gb2312.js]
|
||||
[test_bug381412.js]
|
||||
[test_bug396637.js]
|
||||
[test_bug399257.js]
|
||||
|
@ -1,176 +0,0 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
/**
|
||||
* A character set converter from HZ to Unicode.
|
||||
*
|
||||
*
|
||||
* @created 08/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
*
|
||||
* Note: in this HZ-GB-2312 converter, we accept a string composed of 7-bit HZ
|
||||
* encoded Chinese chars,as it is defined in RFC1843 available at
|
||||
* http://www.cis.ohio-state.edu/htbin/rfc/rfc1843.html
|
||||
* and RFC1842 available at http://www.cis.ohio-state.edu/htbin/rfc/rfc1842.html.
|
||||
*
|
||||
* Earlier versions of the converter said:
|
||||
* "In an effort to match the similar extended capability of Microsoft
|
||||
* Internet Explorer 5.0. We also accept the 8-bit GB encoded chars
|
||||
* mixed in a HZ string.
|
||||
* But this should not be a recommendedd practice for HTML authors."
|
||||
* However, testing in current versions of IE shows that it only accepts
|
||||
* 8-bit characters when the converter is in GB state, and when in ASCII
|
||||
* state each single 8-bit character is converted to U+FFFD
|
||||
*
|
||||
* The priority of converting are as follows: first convert 8-bit GB code; then,
|
||||
* consume HZ ESC sequences such as '~{', '~}', '~~'; then, depending on the current
|
||||
* state ( default to ASCII state ) of the string, each 7-bit char is converted as an
|
||||
* ASCII, or two 7-bit chars are converted into a Chinese character.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "nsHZToUnicode.h"
|
||||
#include "gbku.h"
|
||||
#include "mozilla/Telemetry.h"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsHZToUnicode [implementation]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Subclassing of nsTablesDecoderSupport class [implementation]
|
||||
|
||||
#define HZ_STATE_GB 1
|
||||
#define HZ_STATE_ASCII 2
|
||||
#define HZ_STATE_ODD_BYTE_FLAG 0x80
|
||||
#define HZLEAD1 '~'
|
||||
#define HZLEAD2 '{'
|
||||
#define HZLEAD3 '}'
|
||||
#define HZ_ODD_BYTE_STATE (mHZState & (HZ_STATE_ODD_BYTE_FLAG))
|
||||
#define HZ_ENCODING_STATE (mHZState & ~(HZ_STATE_ODD_BYTE_FLAG))
|
||||
|
||||
using namespace mozilla;
|
||||
|
||||
nsHZToUnicode::nsHZToUnicode() : nsBufferDecoderSupport(1)
|
||||
{
|
||||
mHZState = HZ_STATE_ASCII; // per HZ spec, default to ASCII state
|
||||
mRunLength = 0;
|
||||
mOddByte = 0;
|
||||
Telemetry::Accumulate(Telemetry::DECODER_INSTANTIATED_HZ, true);
|
||||
}
|
||||
|
||||
//Overwriting the ConvertNoBuff() in nsUCvCnSupport.cpp.
|
||||
NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
|
||||
const char* aSrc,
|
||||
int32_t * aSrcLength,
|
||||
char16_t *aDest,
|
||||
int32_t * aDestLength)
|
||||
{
|
||||
int32_t i=0;
|
||||
int32_t iSrcLength = *aSrcLength;
|
||||
int32_t iDestlen = 0;
|
||||
*aSrcLength=0;
|
||||
nsresult res = NS_OK;
|
||||
char oddByte = mOddByte;
|
||||
|
||||
for (i=0; i<iSrcLength; i++) {
|
||||
if (iDestlen >= (*aDestLength)) {
|
||||
res = NS_OK_UDEC_MOREOUTPUT;
|
||||
break;
|
||||
}
|
||||
|
||||
char srcByte = *aSrc++;
|
||||
(*aSrcLength)++;
|
||||
|
||||
if (!HZ_ODD_BYTE_STATE) {
|
||||
if (srcByte == HZLEAD1 ||
|
||||
(HZ_ENCODING_STATE == HZ_STATE_GB &&
|
||||
(UINT8_IN_RANGE(0x21, srcByte, 0x7E) ||
|
||||
UINT8_IN_RANGE(0x81, srcByte, 0xFE)))) {
|
||||
oddByte = srcByte;
|
||||
mHZState |= HZ_STATE_ODD_BYTE_FLAG;
|
||||
} else {
|
||||
*aDest++ = (srcByte & 0x80) ? UCS2_NO_MAPPING :
|
||||
CAST_CHAR_TO_UNICHAR(srcByte);
|
||||
iDestlen++;
|
||||
}
|
||||
} else {
|
||||
if (oddByte & 0x80) {
|
||||
// Accept legal 8-bit GB 2312-80 sequences in GB mode only
|
||||
NS_ASSERTION(HZ_ENCODING_STATE == HZ_STATE_GB,
|
||||
"Invalid lead byte in ASCII mode");
|
||||
*aDest++ = (UINT8_IN_RANGE(0x81, oddByte, 0xFE) &&
|
||||
UINT8_IN_RANGE(0x40, srcByte, 0xFE)) ?
|
||||
mUtil.GBKCharToUnicode(oddByte, srcByte) : UCS2_NO_MAPPING;
|
||||
mRunLength++;
|
||||
iDestlen++;
|
||||
// otherwise, it is a 7-bit byte
|
||||
// The source will be an ASCII or a 7-bit HZ code depending on oddByte
|
||||
} else if (oddByte == HZLEAD1) { // if it is lead by '~'
|
||||
switch (srcByte) {
|
||||
case HZLEAD2:
|
||||
// we got a '~{'
|
||||
// we are switching to HZ state
|
||||
mHZState = HZ_STATE_GB;
|
||||
mRunLength = 0;
|
||||
break;
|
||||
|
||||
case HZLEAD3:
|
||||
// we got a '~}'
|
||||
// we are switching to ASCII state
|
||||
mHZState = HZ_STATE_ASCII;
|
||||
if (mRunLength == 0) {
|
||||
*aDest++ = UCS2_NO_MAPPING;
|
||||
iDestlen++;
|
||||
}
|
||||
mRunLength = 0;
|
||||
break;
|
||||
|
||||
case HZLEAD1:
|
||||
// we got a '~~', process like an ASCII, but no state change
|
||||
*aDest++ = CAST_CHAR_TO_UNICHAR(srcByte);
|
||||
iDestlen++;
|
||||
mRunLength++;
|
||||
break;
|
||||
|
||||
default:
|
||||
// Undefined ESC sequence '~X': treat as an error if X is a
|
||||
// printable character or we are in ASCII mode, and resynchronize
|
||||
// on the second character.
|
||||
//
|
||||
// N.B. For compatibility with other implementations, we treat '~\n'
|
||||
// as an illegal sequence even though RFC1843 permits it, and for
|
||||
// the same reason we pass through control characters including '\n'
|
||||
// and ' ' even in GB mode.
|
||||
if (srcByte > 0x20 || HZ_ENCODING_STATE == HZ_STATE_ASCII) {
|
||||
*aDest++ = UCS2_NO_MAPPING;
|
||||
iDestlen++;
|
||||
}
|
||||
aSrc--;
|
||||
(*aSrcLength)--;
|
||||
i--;
|
||||
break;
|
||||
}
|
||||
} else if (HZ_ENCODING_STATE == HZ_STATE_GB) {
|
||||
*aDest++ = (UINT8_IN_RANGE(0x21, oddByte, 0x7E) &&
|
||||
UINT8_IN_RANGE(0x21, srcByte, 0x7E)) ?
|
||||
mUtil.GBKCharToUnicode(oddByte|0x80, srcByte|0x80) :
|
||||
UCS2_NO_MAPPING;
|
||||
mRunLength++;
|
||||
iDestlen++;
|
||||
} else {
|
||||
NS_NOTREACHED("2-byte sequence that we don't know how to handle");
|
||||
*aDest++ = UCS2_NO_MAPPING;
|
||||
iDestlen++;
|
||||
}
|
||||
oddByte = 0;
|
||||
mHZState &= ~HZ_STATE_ODD_BYTE_FLAG;
|
||||
}
|
||||
} // for loop
|
||||
mOddByte = HZ_ODD_BYTE_STATE ? oddByte : 0;
|
||||
*aDestLength = iDestlen;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -1,46 +0,0 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef nsHZToUnicode_h___
|
||||
#define nsHZToUnicode_h___
|
||||
|
||||
#include "nsUCSupport.h"
|
||||
#include "nsGBKConvUtil.h"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsHZToUnicode [declaration]
|
||||
|
||||
/**
|
||||
* A character set converter from GBK to Unicode.
|
||||
*
|
||||
*
|
||||
* @created 08/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
*/
|
||||
class nsHZToUnicode : public nsBufferDecoderSupport
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Class constructor.
|
||||
*/
|
||||
nsHZToUnicode();
|
||||
|
||||
protected:
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
// Subclassing of nsDecoderSupport class [declaration]
|
||||
NS_IMETHOD ConvertNoBuff(const char* aSrc, int32_t * aSrcLength,
|
||||
char16_t *aDest, int32_t * aDestLength);
|
||||
nsGBKConvUtil mUtil;
|
||||
|
||||
private:
|
||||
int16_t mHZState;
|
||||
uint32_t mRunLength; // length of a run of 8-bit GB-encoded characters
|
||||
char mOddByte; // first byte of a multi-byte sequence from a previous buffer
|
||||
|
||||
};
|
||||
|
||||
#endif /* nsHZToUnicode_h___ */
|
@ -8,11 +8,6 @@
|
||||
|
||||
#include "nsISupports.h"
|
||||
|
||||
// Class ID for our HZToUnicode charset converter
|
||||
// {BA61519A-1DFA-11d3-B3BF-00805F8A6670}
|
||||
#define NS_HZTOUNICODE_CID \
|
||||
{ 0xba61519a, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
|
||||
|
||||
// Class ID for our GBKToUnicode charset converter
|
||||
// {BA61519E-1DFA-11d3-B3BF-00805F8A6670}
|
||||
#define NS_GBKTOUNICODE_CID \
|
||||
@ -23,11 +18,6 @@
|
||||
#define NS_UNICODETOGBK_CID \
|
||||
{ 0xba61519b, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
|
||||
|
||||
// Class ID for our UnicodeToHZ charset converter
|
||||
// {BA61519D-1DFA-11d3-B3BF-00805F8A6670}
|
||||
#define NS_UNICODETOHZ_CID \
|
||||
{ 0xba61519d, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}}
|
||||
|
||||
// Class ID for our UnicodeToGB18030 charset converter
|
||||
// {A59DA932-4091-11d5-A145-005004832142}
|
||||
#define NS_UNICODETOGB18030_CID \
|
||||
|
@ -1,116 +0,0 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
/**
|
||||
* A character set converter from Unicode to HZ.
|
||||
*
|
||||
*
|
||||
* @created 08/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
* Revision History
|
||||
* 04/Oct/1999. Yueheng Xu: Fixed line continuation problem when line
|
||||
* ended by '~';
|
||||
* Used table UnicodeToGBK[] to speed up the mapping.
|
||||
*/
|
||||
#include "nsUnicodeToHZ.h"
|
||||
#include "gbku.h"
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsUnicodeToGBK [implementation]
|
||||
#define HZ_STATE_GB 1
|
||||
#define HZ_STATE_ASCII 2
|
||||
#define HZ_STATE_TILD 3
|
||||
#define HZLEAD1 '~'
|
||||
#define HZLEAD2 '{'
|
||||
#define HZLEAD3 '}'
|
||||
#define UNICODE_TILD 0x007E
|
||||
nsUnicodeToHZ::nsUnicodeToHZ() : nsEncoderSupport(6)
|
||||
{
|
||||
mHZState = HZ_STATE_ASCII; // per HZ spec, default to HZ mode
|
||||
}
|
||||
NS_IMETHODIMP nsUnicodeToHZ::ConvertNoBuff(
|
||||
const char16_t * aSrc,
|
||||
int32_t * aSrcLength,
|
||||
char * aDest,
|
||||
int32_t * aDestLength)
|
||||
{
|
||||
int32_t i=0;
|
||||
int32_t iSrcLength = *aSrcLength;
|
||||
int32_t iDestLength = 0;
|
||||
|
||||
for (i=0;i< iSrcLength;i++)
|
||||
{
|
||||
if(! IS_ASCII(*aSrc))
|
||||
{
|
||||
// hi byte has something, it is not ASCII, process as a GB
|
||||
if ( mHZState != HZ_STATE_GB )
|
||||
{
|
||||
// we are adding a '~{' ESC sequence to star a HZ string
|
||||
mHZState = HZ_STATE_GB;
|
||||
aDest[0] = '~';
|
||||
aDest[1] = '{';
|
||||
aDest += 2; // increment 2 bytes
|
||||
iDestLength +=2;
|
||||
}
|
||||
if(mUtil.UnicodeToGBKChar(*aSrc, true, &aDest[0], &aDest[1])) {
|
||||
aDest += 2; // increment 2 bytes
|
||||
iDestLength +=2;
|
||||
} else {
|
||||
// some thing that we cannot convert
|
||||
// xxx fix me ftang
|
||||
// error handling here
|
||||
}
|
||||
} else {
|
||||
// this is an ASCII
|
||||
|
||||
// if we are in HZ mode, end it by adding a '~}' ESC sequence
|
||||
if ( mHZState == HZ_STATE_GB )
|
||||
{
|
||||
mHZState = HZ_STATE_ASCII;
|
||||
aDest[0] = '~';
|
||||
aDest[1] = '}';
|
||||
aDest += 2; // increment 2 bytes
|
||||
iDestLength +=2;
|
||||
}
|
||||
|
||||
// if this is a regular char '~' , convert it to two '~'
|
||||
if ( *aSrc == UNICODE_TILD )
|
||||
{
|
||||
aDest[0] = '~';
|
||||
aDest[1] = '~';
|
||||
aDest += 2; // increment 2 bytes
|
||||
iDestLength +=2;
|
||||
} else {
|
||||
// other regular ASCII chars convert by normal ways
|
||||
|
||||
// Is this works for both little endian and big endian machines ?
|
||||
*aDest = (char) ( (char16_t)(*aSrc) );
|
||||
aDest++; // increment 1 byte
|
||||
iDestLength +=1;
|
||||
}
|
||||
}
|
||||
aSrc++; // increment 2 bytes
|
||||
if ( iDestLength >= (*aDestLength) )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
*aDestLength = iDestLength;
|
||||
*aSrcLength = i;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP nsUnicodeToHZ::FinishNoBuff(char * aDest, int32_t * aDestLength)
|
||||
{
|
||||
if ( mHZState == HZ_STATE_GB )
|
||||
{
|
||||
// if we are in HZ mode, end it by adding a '~}' ESC sequence
|
||||
mHZState = HZ_STATE_ASCII;
|
||||
aDest[0] = '~';
|
||||
aDest[1] = '}';
|
||||
*aDestLength = 2;
|
||||
} else {
|
||||
*aDestLength = 0;
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
@ -1,55 +0,0 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/**
|
||||
* A character set converter from Unicode to GBK.
|
||||
*
|
||||
*
|
||||
* @created 08/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
*/
|
||||
|
||||
#ifndef nsUnicodeToHZ_h___
|
||||
#define nsUnicodeToHZ_h___
|
||||
|
||||
#include "nsUCSupport.h"
|
||||
#include "nsGBKConvUtil.h"
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsUnicodeToHZ [declaration]
|
||||
|
||||
class nsUnicodeToHZ: public nsEncoderSupport
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Class constructor.
|
||||
*/
|
||||
nsUnicodeToHZ();
|
||||
|
||||
protected:
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
// Subclassing of nsEncoderSupport class [declaration]
|
||||
NS_IMETHOD ConvertNoBuff(const char16_t * aSrc,
|
||||
int32_t * aSrcLength,
|
||||
char * aDest,
|
||||
int32_t * aDestLength);
|
||||
|
||||
NS_IMETHOD FinishNoBuff(char * aDest, int32_t * aDestLength);
|
||||
|
||||
NS_IMETHOD ConvertNoBuffNoErr(const char16_t * aSrc, int32_t * aSrcLength,
|
||||
char * aDest, int32_t * aDestLength)
|
||||
{
|
||||
return NS_OK;
|
||||
} // just make it not abstract;
|
||||
|
||||
uint16_t mHZState;
|
||||
protected:
|
||||
nsGBKConvUtil mUtil;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif /* nsUnicodeToHZ_h___ */
|
@ -1,13 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="hz-gb-2312">
|
||||
<style type="text/css">
|
||||
p#p { color: green; }
|
||||
</style>
|
||||
<title>Bug 801681</title>
|
||||
</head>
|
||||
<body>
|
||||
<p id="p">� This line should be green</p>
|
||||
</body>
|
||||
</html>
|
@ -1,17 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<!--
|
||||
please do NOT add a newline at the end of this file. The buggy behaviour
|
||||
that it exhibits depends on the ">" of "</script>" being the last character
|
||||
in the file.
|
||||
-->
|
||||
<meta charset="hz-gb-2312">
|
||||
<style type="text/css">
|
||||
p#p { color: red; }
|
||||
</style>
|
||||
<title>Bug 801681</title>
|
||||
</head>
|
||||
<body>
|
||||
<p id="p">~ This line should be green</p>
|
||||
<script>document.getElementById("p").style.color="green";</script>
|
@ -1,10 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="hz-gb-2312">
|
||||
<title>Bug 801681</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>火狐</p>
|
||||
</body>
|
||||
</html>
|
@ -1,10 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="hz-gb-2312">
|
||||
<title>Bug 801681</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>~{;p:|~ ~ ~ ~ ~ ~}</p>
|
||||
</body>
|
||||
</html>
|
@ -1736,8 +1736,6 @@ random-if(Android&&AndroidVersion<15) == 776265-1a.html 776265-1-ref.html
|
||||
== 796847-1.svg 796847-1-ref.svg
|
||||
fuzzy(40,850) fuzzy-if(azureQuartz,73,542) == 797797-1.html 797797-1-ref.html # 'opacity:N' and rgba(,,,N) text don't match precisely
|
||||
fuzzy(40,850) fuzzy-if(azureQuartz,68,586) == 797797-2.html 797797-2-ref.html # 'opacity:N' and rgba(,,,N) text don't match precisely
|
||||
== 801681-1.html 801681-1-ref.html
|
||||
== 801681-2.html 801681-2-ref.html
|
||||
== 801994-1.html 801994-1-ref.html
|
||||
== 804323-1.html 804323-1-ref.html
|
||||
fuzzy-if(Android,8,608) == 811301-1.html 811301-1-ref.html
|
||||
|
Loading…
x
Reference in New Issue
Block a user