diff --git a/dom/encoding/domainsfallbacks.properties b/dom/encoding/domainsfallbacks.properties index b4911955d04b..4189b8f89242 100644 --- a/dom/encoding/domainsfallbacks.properties +++ b/dom/encoding/domainsfallbacks.properties @@ -54,8 +54,8 @@ xn--wgbh1c=windows-1256 gr=ISO-8859-7 -hk=Big5 -xn--j6w193g=Big5 +hk=Big5-HKSCS +xn--j6w193g=Big5-HKSCS hr=windows-1250 diff --git a/dom/encoding/encodingsgroups.properties b/dom/encoding/encodingsgroups.properties index 79f8fe5d0ddb..17f876f4a6b0 100644 --- a/dom/encoding/encodingsgroups.properties +++ b/dom/encoding/encodingsgroups.properties @@ -5,6 +5,7 @@ # x-unicode is assumed for encodings not listed here Big5=zh-TW +Big5-HKSCS=zh=HK EUC-JP=ja EUC-KR=ko gb18030=zh-CN diff --git a/dom/encoding/labelsencodings.properties b/dom/encoding/labelsencodings.properties index 42816c26d245..bf20fc0c966e 100644 --- a/dom/encoding/labelsencodings.properties +++ b/dom/encoding/labelsencodings.properties @@ -189,7 +189,7 @@ x-gbk=gbk gb18030=gb18030 hz-gb-2312=replacement big5=Big5 -big5-hkscs=Big5 +big5-hkscs=Big5-HKSCS cn-big5=Big5 csbig5=Big5 x-x-big5=Big5 diff --git a/dom/encoding/test/test_TextDecoder.js b/dom/encoding/test/test_TextDecoder.js index f6f726f18b9f..8d883605747d 100644 --- a/dom/encoding/test/test_TextDecoder.js +++ b/dom/encoding/test/test_TextDecoder.js @@ -44,7 +44,6 @@ function runTextDecoderOptions() }, "testDecodeABVOption"); test(testDecoderForThaiEncoding, "testDecoderForThaiEncoding"); test(testInvalid2022JP, "testInvalid2022JP"); - test(testDecoderForBig5, "testDecoderForBig5"); } /* @@ -356,7 +355,8 @@ function testDecoderGetEncoding() {encoding: "x-mac-cyrillic", labels: ["x-mac-cyrillic", "x-mac-ukrainian"]}, {encoding: "gbk", labels: ["chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "gbk", "iso-ir-58", "x-gbk"]}, {encoding: "gb18030", labels: ["gb18030"]}, - {encoding: "big5", labels: ["big5", "cn-big5", "csbig5", "x-x-big5", "big5-hkscs"]}, + {encoding: "big5", labels: ["big5", "cn-big5", "csbig5", "x-x-big5"]}, + {encoding: "big5-hkscs", labels: ["big5-hkscs"]}, {encoding: "euc-jp", labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"]}, {encoding: "iso-2022-jp", labels: ["csiso2022jp", "iso-2022-jp"]}, {encoding: "shift_jis", labels: ["csshiftjis", "ms_kanji", "shift-jis", "shift_jis", "sjis", "windows-31j", "x-sjis"]}, @@ -463,78 +463,3 @@ function testInvalid2022JP() }); assert_equals(failureCount, 0, failureCount + " of " + inputs.length + " tests failed"); } - -function testDecoderForBig5() -{ - const inputs = [ - [ 0x61, 0x62 ], - [ 0x87, 0x40 ], - [ 0xFE, 0xFE ], - [ 0xFE, 0xFD ], - [ 0x88, 0x62 ], - [ 0x88, 0x64 ], - [ 0x88, 0x66 ], - [ 0x88, 0xA3 ], - [ 0x88, 0xA5 ], - [ 0x88, 0xA7 ], - [ 0x99, 0xD4 ], - [ 0x99, 0xD5 ], - [ 0x99, 0xD6 ], - [ 0x61, 0x87, 0x40, 0x62 ], - [ 0x61, 0xFE, 0xFE, 0x62 ], - [ 0x61, 0xFE, 0xFD, 0x62 ], - [ 0x61, 0x88, 0x62, 0x62 ], - [ 0x61, 0x88, 0x64, 0x62 ], - [ 0x61, 0x88, 0x66, 0x62 ], - [ 0x61, 0x88, 0xA3, 0x62 ], - [ 0x61, 0x88, 0xA5, 0x62 ], - [ 0x61, 0x88, 0xA7, 0x62 ], - [ 0x61, 0x99, 0xD4, 0x62 ], - [ 0x61, 0x99, 0xD5, 0x62 ], - [ 0x61, 0x99, 0xD6, 0x62 ], - [ 0x80, 0x61 ], - [ 0xFF, 0x61 ], - [ 0xFE, 0x39 ], - [ 0x87, 0x66 ], - [ 0x81, 0x40 ], - [ 0x61, 0x81 ], - ]; - const expectations = [ - "\u0061\u0062", - "\u43F0", - "\u79D4", - "\uD864\uDD0D", - "\u00CA\u0304", - "\u00CA\u030C", - "\u00CA", - "\u00EA\u0304", - "\u00EA\u030C", - "\u00EA", - "\u8991", - "\uD85E\uDD67", - "\u8A29", - "\u0061\u43F0\u0062", - "\u0061\u79D4\u0062", - "\u0061\uD864\uDD0D\u0062", - "\u0061\u00CA\u0304\u0062", - "\u0061\u00CA\u030C\u0062", - "\u0061\u00CA\u0062", - "\u0061\u00EA\u0304\u0062", - "\u0061\u00EA\u030C\u0062", - "\u0061\u00EA\u0062", - "\u0061\u8991\u0062", - "\u0061\uD85E\uDD67\u0062", - "\u0061\u8A29\u0062", - "\uFFFD\u0061", - "\uFFFD\u0061", - "\uFFFD\u0039", - "\uFFFD\u0066", - "\uFFFD\u0040", - "\u0061\uFFFD", - ]; - - for (var i = 0; i < inputs.length; i++) { - testCharset({encoding: "big5", input: inputs[i], expected: expectations[i], - msg: "decoder test #" + i + " for big5."}); - } -} diff --git a/intl/locale/windows/wincharset.properties b/intl/locale/windows/wincharset.properties index 79a802492a32..6824cdc5bfcb 100644 --- a/intl/locale/windows/wincharset.properties +++ b/intl/locale/windows/wincharset.properties @@ -11,7 +11,7 @@ acp.932=Shift_JIS acp.936=gb18030 acp.949=EUC-KR acp.950=Big5 -acp.951=Big5 +acp.951=Big5-HKSCS acp.1250=windows-1250 acp.1251=windows-1251 acp.1252=windows-1252 diff --git a/intl/uconv/moz.build b/intl/uconv/moz.build index c2bc8d249473..4d525c48d632 100644 --- a/intl/uconv/moz.build +++ b/intl/uconv/moz.build @@ -27,6 +27,7 @@ EXPORTS += [ 'ucvja/nsUCVJACID.h', 'ucvko/nsUCvKOCID.h', 'ucvlatin/nsUCvLatinCID.h', + 'ucvtw/nsUCvTWCID.h', ] UNIFIED_SOURCES += [ @@ -136,9 +137,10 @@ UNIFIED_SOURCES += [ ] UNIFIED_SOURCES += [ - 'ucvtw/nsBIG5Data.cpp', + 'ucvtw/nsBIG5HKSCSToUnicode.cpp', 'ucvtw/nsBIG5ToUnicode.cpp', 'ucvtw/nsUnicodeToBIG5.cpp', + 'ucvtw/nsUnicodeToBIG5HKSCS.cpp', ] UNIFIED_SOURCES += [ diff --git a/intl/uconv/nsIUnicodeDecoder.h b/intl/uconv/nsIUnicodeDecoder.h index 319b60990b45..113eeb425b0c 100644 --- a/intl/uconv/nsIUnicodeDecoder.h +++ b/intl/uconv/nsIUnicodeDecoder.h @@ -82,18 +82,14 @@ public: * @param aDestLength [IN/OUT] the length of the destination data buffer; * after conversion will contain the number of Unicode * characters written - * @return NS_ERROR_UDEC_ILLEGALINPUT if an illegal input sequence + * @return NS_PARTIAL_MORE_INPUT if only a partial conversion was + * done; more input is needed to continue + * NS_PARTIAL_MORE_OUTPUT if only a partial conversion + * was done; more output space is needed to continue + * NS_ERROR_ILLEGAL_INPUT if an illegal input sequence * was encountered and the behavior was set to "signal"; * the caller must skip over one byte, reset the decoder * and retry. - * NS_OK_UDEC_MOREOUTPUT if only a partial conversion - * was done; more output space is needed to continue - * NS_OK_UDEC_MOREINPUT if the input ended in the middle - * of an input code unit sequence. If this is the last - * result the caller has at the end of the stream, the - * caller must append one U+FFFD to the output. - * NS_OK if the input ended after a complete input code - * unit sequence. */ NS_IMETHOD Convert(const char * aSrc, int32_t * aSrcLength, char16_t * aDest, int32_t * aDestLength) = 0; diff --git a/intl/uconv/nsIUnicodeEncoder.h b/intl/uconv/nsIUnicodeEncoder.h index 9e35cdf53712..cc3acfcd3ab5 100644 --- a/intl/uconv/nsIUnicodeEncoder.h +++ b/intl/uconv/nsIUnicodeEncoder.h @@ -96,12 +96,6 @@ public: * the first of a surrogate pair. * NS_ERROR_UENC_NOMAPPING if character without mapping * was encountered and the behavior was set to "signal". - * In the case of an unmappable BMP character, aDestLength - * must indicate that the unmappable character was - * consumed by the encoder (unlike in the decode API!). - * In the case of an unmappable astral character, - * aDestLength must indicate that the high surrogate was - * consumed by the encoder but the low surrogate was not. */ NS_IMETHOD Convert(const char16_t * aSrc, int32_t * aSrcLength, char * aDest, int32_t * aDestLength) = 0; diff --git a/intl/uconv/nsUConvModule.cpp b/intl/uconv/nsUConvModule.cpp index 98ec9772b2ef..433c8791319b 100644 --- a/intl/uconv/nsUConvModule.cpp +++ b/intl/uconv/nsUConvModule.cpp @@ -107,8 +107,12 @@ #include "nsUnicodeToISO2022JP.h" // ucvtw +#include "nsUCvTWCID.h" +#include "nsUCvTWDll.h" #include "nsBIG5ToUnicode.h" #include "nsUnicodeToBIG5.h" +#include "nsBIG5HKSCSToUnicode.h" +#include "nsUnicodeToBIG5HKSCS.h" // ucvko #include "nsUCvKOCID.h" @@ -180,6 +184,7 @@ NS_UCONV_REG_UNREG("EUC-JP", NS_EUCJPTOUNICODE_CID, NS_UNICODETOEUCJP_CID) // ucvtw NS_UCONV_REG_UNREG("Big5", NS_BIG5TOUNICODE_CID, NS_UNICODETOBIG5_CID) +NS_UCONV_REG_UNREG("Big5-HKSCS", NS_BIG5HKSCSTOUNICODE_CID, NS_UNICODETOBIG5HKSCS_CID) // ucvko NS_UCONV_REG_UNREG("EUC-KR", NS_EUCKRTOUNICODE_CID, NS_UNICODETOEUCKR_CID) @@ -209,8 +214,6 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsISO2022JPToUnicodeV2) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToISO2022JP) // ucvtw -NS_GENERIC_FACTORY_CONSTRUCTOR(nsBIG5ToUnicode) -NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToBIG5) // ucvko @@ -244,6 +247,23 @@ const uint16_t g_ASCIIMappingTable[] = { 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000 }; +// ucvtw +const uint16_t g_ufBig5Mapping[] = { +#include "big5.uf" +}; + +const uint16_t g_utBIG5Mapping[] = { +#include "big5.ut" +}; + +const uint16_t g_ufBig5HKSCSMapping[] = { +#include "hkscs.uf" +}; + +const uint16_t g_utBig5HKSCSMapping[] = { +#include "hkscs.ut" +}; + // ucvko const uint16_t g_utKSC5601Mapping[] = { #include "u20kscgl.ut" @@ -357,6 +377,8 @@ NS_DEFINE_NAMED_CID(NS_UNICODETOEUCJP_CID); NS_DEFINE_NAMED_CID(NS_UNICODETOISO2022JP_CID); NS_DEFINE_NAMED_CID(NS_UNICODETOBIG5_CID); NS_DEFINE_NAMED_CID(NS_BIG5TOUNICODE_CID); +NS_DEFINE_NAMED_CID(NS_UNICODETOBIG5HKSCS_CID); +NS_DEFINE_NAMED_CID(NS_BIG5HKSCSTOUNICODE_CID); NS_DEFINE_NAMED_CID(NS_EUCKRTOUNICODE_CID); NS_DEFINE_NAMED_CID(NS_UNICODETOEUCKR_CID); NS_DEFINE_NAMED_CID(NS_GBKTOUNICODE_CID); @@ -459,6 +481,8 @@ static const mozilla::Module::CIDEntry kUConvCIDs[] = { { &kNS_UNICODETOISO2022JP_CID, false, nullptr, nsUnicodeToISO2022JPConstructor }, { &kNS_UNICODETOBIG5_CID, false, nullptr, nsUnicodeToBIG5Constructor }, { &kNS_BIG5TOUNICODE_CID, false, nullptr, nsBIG5ToUnicodeConstructor }, + { &kNS_UNICODETOBIG5HKSCS_CID, false, nullptr, nsUnicodeToBIG5HKSCSConstructor }, + { &kNS_BIG5HKSCSTOUNICODE_CID, false, nullptr, nsBIG5HKSCSToUnicodeConstructor }, { &kNS_EUCKRTOUNICODE_CID, false, nullptr, nsCP949ToUnicodeConstructor }, { &kNS_UNICODETOEUCKR_CID, false, nullptr, nsUnicodeToCP949Constructor }, { &kNS_GBKTOUNICODE_CID, false, nullptr, nsGB18030ToUnicodeConstructor }, @@ -563,6 +587,8 @@ static const mozilla::Module::ContractIDEntry kUConvContracts[] = { { NS_UNICODEENCODER_CONTRACTID_BASE "ISO-2022-JP", &kNS_UNICODETOISO2022JP_CID }, { NS_UNICODEENCODER_CONTRACTID_BASE "Big5", &kNS_UNICODETOBIG5_CID }, { NS_UNICODEDECODER_CONTRACTID_BASE "Big5", &kNS_BIG5TOUNICODE_CID }, + { NS_UNICODEENCODER_CONTRACTID_BASE "Big5-HKSCS", &kNS_UNICODETOBIG5HKSCS_CID }, + { NS_UNICODEDECODER_CONTRACTID_BASE "Big5-HKSCS", &kNS_BIG5HKSCSTOUNICODE_CID }, { NS_UNICODEDECODER_CONTRACTID_BASE "EUC-KR", &kNS_EUCKRTOUNICODE_CID }, { NS_UNICODEENCODER_CONTRACTID_BASE "EUC-KR", &kNS_UNICODETOEUCKR_CID }, { NS_UNICODEDECODER_CONTRACTID_BASE "gbk", &kNS_GBKTOUNICODE_CID }, diff --git a/intl/uconv/tests/mochitest.ini b/intl/uconv/tests/mochitest.ini index 51ded9433ce9..ca1382c3f24e 100644 --- a/intl/uconv/tests/mochitest.ini +++ b/intl/uconv/tests/mochitest.ini @@ -12,4 +12,3 @@ skip-if = toolkit == 'android' #bug 775227 [test_unicode_noncharacters_gb18030.html] [test_unicode_noncharacters_utf8.html] [test_utf8_overconsumption.html] -[test_big5_encoder.html] diff --git a/intl/uconv/tests/test_big5_encoder.html b/intl/uconv/tests/test_big5_encoder.html deleted file mode 100644 index 33970d01017f..000000000000 --- a/intl/uconv/tests/test_big5_encoder.html +++ /dev/null @@ -1,43 +0,0 @@ - - - - - - Test for Unicode non-characters - - - - -
-
-
-Mozilla Bug 912470 -

-