From 61f320aa9df23ad8cf81c81afb6b58f865dc584a Mon Sep 17 00:00:00 2001 From: Daniel Holbert Date: Fri, 5 Dec 2008 11:52:01 -0800 Subject: [PATCH] Backed out changeset 78d662c2c878 (Bug 335531) on suspicion of causing mochitest failures in test_bug399284.html on linux & windows unittest boxes. --- content/base/public/nsContentUtils.h | 2 +- content/base/src/nsContentUtils.cpp | 18 +- content/base/src/nsScriptLoader.cpp | 4 +- .../en-US/chrome/charsetTitles.properties | 9 +- .../src/base/nsUniversalDetector.cpp | 8 +- intl/chardet/src/nsMetaCharsetObserver.cpp | 2 - intl/uconv/src/charsetalias.properties | 1 - intl/uconv/src/nsUConvModule.cpp | 15 +- intl/uconv/tests/unit/test_bug335531.js | 228 ------------------ intl/uconv/ucvlatin/nsUCvLatinCID.h | 5 - intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp | 90 +------ intl/uconv/ucvlatin/nsUTF32ToUnicode.h | 71 ++---- intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp | 27 +-- intl/uconv/ucvlatin/nsUnicodeToUTF32.h | 57 ++--- layout/style/nsCSSLoader.cpp | 19 +- .../converters/nsUnknownDecoder.cpp | 7 +- parser/htmlparser/src/nsParser.cpp | 11 +- .../en-US/chrome/global/intl.properties | 2 +- 18 files changed, 86 insertions(+), 490 deletions(-) delete mode 100644 intl/uconv/tests/unit/test_bug335531.js diff --git a/content/base/public/nsContentUtils.h b/content/base/public/nsContentUtils.h index 96bba708478a..a8d6d63724c9 100644 --- a/content/base/public/nsContentUtils.h +++ b/content/base/public/nsContentUtils.h @@ -523,7 +523,7 @@ public: * @return boolean indicating whether a BOM was detected. */ static PRBool CheckForBOM(const unsigned char* aBuffer, PRUint32 aLength, - nsACString& aCharset, PRBool *bigEndian = nsnull); + nsACString& aCharset); /** diff --git a/content/base/src/nsContentUtils.cpp b/content/base/src/nsContentUtils.cpp index 097b4df04c57..2a26782f6991 100644 --- a/content/base/src/nsContentUtils.cpp +++ b/content/base/src/nsContentUtils.cpp @@ -3171,7 +3171,7 @@ nsContentUtils::ConvertStringFromCharset(const nsACString& aCharset, /* static */ PRBool nsContentUtils::CheckForBOM(const unsigned char* aBuffer, PRUint32 aLength, - nsACString& aCharset, PRBool *bigEndian) + nsACString& aCharset) { PRBool found = PR_TRUE; aCharset.Truncate(); @@ -3186,30 +3186,22 @@ nsContentUtils::CheckForBOM(const unsigned char* aBuffer, PRUint32 aLength, aBuffer[1] == 0x00 && aBuffer[2] == 0xFE && aBuffer[3] == 0xFF) { - aCharset = "UTF-32"; - if (bigEndian) - *bigEndian = PR_TRUE; + aCharset = "UTF-32BE"; } else if (aLength >= 4 && aBuffer[0] == 0xFF && aBuffer[1] == 0xFE && aBuffer[2] == 0x00 && aBuffer[3] == 0x00) { - aCharset = "UTF-32"; - if (bigEndian) - *bigEndian = PR_FALSE; + aCharset = "UTF-32LE"; } else if (aLength >= 2 && aBuffer[0] == 0xFE && aBuffer[1] == 0xFF) { - aCharset = "UTF-16"; - if (bigEndian) - *bigEndian = PR_TRUE; + aCharset = "UTF-16BE"; } else if (aLength >= 2 && aBuffer[0] == 0xFF && aBuffer[1] == 0xFE) { - aCharset = "UTF-16"; - if (bigEndian) - *bigEndian = PR_FALSE; + aCharset = "UTF-16LE"; } else { found = PR_FALSE; } diff --git a/content/base/src/nsScriptLoader.cpp b/content/base/src/nsScriptLoader.cpp index 2e38624eac12..3279cbb4eabd 100644 --- a/content/base/src/nsScriptLoader.cpp +++ b/content/base/src/nsScriptLoader.cpp @@ -766,14 +766,14 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsCString& oChars if (0xFF == aBytes[1]) { // FE FF // UTF-16, big-endian - oCharset.Assign("UTF-16"); + oCharset.Assign("UTF-16BE"); } break; case 0xFF: if (0xFE == aBytes[1]) { // FF FE // UTF-16, little-endian - oCharset.Assign("UTF-16"); + oCharset.Assign("UTF-16LE"); } break; } diff --git a/dom/locales/en-US/chrome/charsetTitles.properties b/dom/locales/en-US/chrome/charsetTitles.properties index 441750b6192d..115a7e96b093 100644 --- a/dom/locales/en-US/chrome/charsetTitles.properties +++ b/dom/locales/en-US/chrome/charsetTitles.properties @@ -80,11 +80,10 @@ iso-2022-kr.title = Korean (ISO-2022-KR) utf-7.title = Unicode (UTF-7) utf-8.title = Unicode (UTF-8) utf-16.title = Unicode (UTF-16) -utf-16le.title = Unicode (UTF-16LE) -utf-16be.title = Unicode (UTF-16BE) -utf-32.title = Unicode (UTF-32) -utf-32le.title = Unicode (UTF-32LE) -utf-32be.title = Unicode (UTF-32BE) +utf-16le.title = Unicode (UTF-16 Little Endian) +utf-16be.title = Unicode (UTF-16 Big Endian) +utf-32le.title = Unicode (UTF-32 Little Endian) +utf-32be.title = Unicode (UTF-32 Big Endian) iso-8859-5.title = Cyrillic (ISO-8859-5) iso-ir-111.title = Cyrillic (ISO-IR-111) windows-1251.title = Cyrillic (Windows-1251) diff --git a/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/extensions/universalchardet/src/base/nsUniversalDetector.cpp index 0a3a79fabd0a..5205520bbf9a 100644 --- a/extensions/universalchardet/src/base/nsUniversalDetector.cpp +++ b/extensions/universalchardet/src/base/nsUniversalDetector.cpp @@ -125,12 +125,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) mDetectedCharset = "X-ISO-10646-UCS-4-3412"; else if ('\xFF' == aBuf[1]) // FE FF UTF-16, big endian BOM - mDetectedCharset = "UTF-16"; + mDetectedCharset = "UTF-16BE"; break; case '\x00': if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3])) // 00 00 FE FF UTF-32, big-endian BOM - mDetectedCharset = "UTF-32"; + mDetectedCharset = "UTF-32BE"; else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3])) // 00 00 FF FE UCS-4, unusual octet order BOM (2143) mDetectedCharset = "X-ISO-10646-UCS-4-2143"; @@ -138,10 +138,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) case '\xFF': if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3])) // FF FE 00 00 UTF-32, little-endian BOM - mDetectedCharset = "UTF-32"; + mDetectedCharset = "UTF-32LE"; else if ('\xFE' == aBuf[1]) // FF FE UTF-16, little endian BOM - mDetectedCharset = "UTF-16"; + mDetectedCharset = "UTF-16LE"; break; } // switch diff --git a/intl/chardet/src/nsMetaCharsetObserver.cpp b/intl/chardet/src/nsMetaCharsetObserver.cpp index 50da1bd9849c..a44ad6a77a9c 100644 --- a/intl/chardet/src/nsMetaCharsetObserver.cpp +++ b/intl/chardet/src/nsMetaCharsetObserver.cpp @@ -296,7 +296,6 @@ NS_IMETHODIMP nsMetaCharsetObserver::Notify( if (!preferred.EqualsLiteral("UTF-16") && !preferred.EqualsLiteral("UTF-16BE") && !preferred.EqualsLiteral("UTF-16LE") && - !preferred.EqualsLiteral("UTF-32") && !preferred.EqualsLiteral("UTF-32BE") && !preferred.EqualsLiteral("UTF-32LE")) { // Propagate the error message so that the parser can @@ -376,7 +375,6 @@ NS_IMETHODIMP nsMetaCharsetObserver::GetCharsetFromCompatibilityTag( !preferred.EqualsLiteral("UTF-16") && !preferred.EqualsLiteral("UTF-16BE") && !preferred.EqualsLiteral("UTF-16LE") && - !preferred.EqualsLiteral("UTF-32") && !preferred.EqualsLiteral("UTF-32BE") && !preferred.EqualsLiteral("UTF-32LE")) AppendASCIItoUTF16(preferred, aCharset); diff --git a/intl/uconv/src/charsetalias.properties b/intl/uconv/src/charsetalias.properties index ff4efd32e054..d35f55d5c40d 100644 --- a/intl/uconv/src/charsetalias.properties +++ b/intl/uconv/src/charsetalias.properties @@ -85,7 +85,6 @@ iso-2022-kr=ISO-2022-KR iso-2022-jp=ISO-2022-JP utf-32be=UTF-32BE utf-32le=UTF-32LE -utf-32=UTF-32 utf-16be=UTF-16BE utf-16le=UTF-16LE utf-16=UTF-16 diff --git a/intl/uconv/src/nsUConvModule.cpp b/intl/uconv/src/nsUConvModule.cpp index ab59be02fb5b..d9a727880992 100644 --- a/intl/uconv/src/nsUConvModule.cpp +++ b/intl/uconv/src/nsUConvModule.cpp @@ -344,7 +344,6 @@ NS_UCONV_REG_UNREG("x-imap4-modified-utf7", NS_MUTF7TOUNICODE_CID, NS_UNICODETOM NS_UCONV_REG_UNREG("UTF-16", NS_UTF16TOUNICODE_CID, NS_UNICODETOUTF16_CID) NS_UCONV_REG_UNREG("UTF-16BE", NS_UTF16BETOUNICODE_CID, NS_UNICODETOUTF16BE_CID) NS_UCONV_REG_UNREG("UTF-16LE", NS_UTF16LETOUNICODE_CID, NS_UNICODETOUTF16LE_CID) -NS_UCONV_REG_UNREG("UTF-32", NS_UTF32TOUNICODE_CID, NS_UNICODETOUTF32_CID) NS_UCONV_REG_UNREG("UTF-32BE", NS_UTF32BETOUNICODE_CID, NS_UNICODETOUTF32BE_CID) NS_UCONV_REG_UNREG("UTF-32LE", NS_UTF32LETOUNICODE_CID, NS_UNICODETOUTF32LE_CID) NS_UCONV_REG_UNREG("T.61-8bit", NS_T61TOUNICODE_CID, NS_UNICODETOT61_CID) @@ -419,7 +418,6 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsMUTF7ToUnicode) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16ToUnicode) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16BEToUnicode) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16LEToUnicode) -NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32ToUnicode) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32BEToUnicode) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32LEToUnicode) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF7) @@ -429,7 +427,6 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16LE) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32BE) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32LE) -NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToTSCII) NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToTamilTTF) @@ -970,11 +967,6 @@ static const nsModuleComponentInfo components[] = NS_UNICODEDECODER_CONTRACTID_BASE "UTF-16LE", nsUTF16LEToUnicodeConstructor , }, - { - DECODER_NAME_BASE "UTF-32" , NS_UTF32TOUNICODE_CID, - NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32", - nsUTF32ToUnicodeConstructor , - }, { DECODER_NAME_BASE "UTF-32BE" , NS_UTF32BETOUNICODE_CID, NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32BE", @@ -1290,12 +1282,7 @@ static const nsModuleComponentInfo components[] = NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32LE", nsUnicodeToUTF32LEConstructor, }, - { - ENCODER_NAME_BASE "UTF-32" , NS_UNICODETOUTF32_CID, - NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32", - nsUnicodeToUTF32Constructor, - }, - { + { ENCODER_NAME_BASE "T.61-8bit" , NS_UNICODETOT61_CID, NS_UNICODEENCODER_CONTRACTID_BASE "T.61-8bit", nsUnicodeToT61Constructor, diff --git a/intl/uconv/tests/unit/test_bug335531.js b/intl/uconv/tests/unit/test_bug335531.js deleted file mode 100644 index 4509495c1153..000000000000 --- a/intl/uconv/tests/unit/test_bug335531.js +++ /dev/null @@ -1,228 +0,0 @@ -/* Test case for bug 335531 - * - * Uses nsIConverterInputStream to decode UTF-16 text with all combinations - * of UTF-16BE and UTF-16LE with and without BOM. - * - * Sample text is: "Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему." - * - * The enclosing quotation marks are included in the sample text to test that - * UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is - * not explicitly called. This only works when the first character of the text - * is an eight-bit character. - */ - -const beBOM="%00%00%FE%FF"; -const leBOM="%FF%FE%00%00"; -const outBOM="\uFEFF"; -const sampleUTF32BE="%00%00%00%22%00%00%04%12%00%00%04%41%00%00%04%35%00%00%00%20%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%4B%00%00%04%35%00%00%00%20%00%00%04%41%00%00%04%35%00%00%04%3C%00%00%04%4C%00%00%04%38%00%00%00%20%00%00%04%3F%00%00%04%3E%00%00%04%45%00%00%04%3E%00%00%04%36%00%00%04%38%00%00%00%20%00%00%04%34%00%00%04%40%00%00%04%43%00%00%04%33%00%00%00%20%00%00%04%3D%00%00%04%30%00%00%00%20%00%00%04%34%00%00%04%40%00%00%04%43%00%00%04%33%00%00%04%30%00%00%00%2C%00%00%00%20%00%00%04%3A%00%00%04%30%00%00%04%36%00%00%04%34%00%00%04%30%00%00%04%4F%00%00%00%20%00%00%04%3D%00%00%04%35%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%30%00%00%04%4F%00%00%00%20%00%00%04%41%00%00%04%35%00%00%04%3C%00%00%04%4C%00%00%04%4F%00%00%00%20%00%00%04%3D%00%00%04%35%00%00%04%41%00%00%04%47%00%00%04%30%00%00%04%41%00%00%04%42%00%00%04%3B%00%00%04%38%00%00%04%32%00%00%04%30%00%00%00%20%00%00%04%3F%00%00%04%3E%00%00%00%2D%00%00%04%41%00%00%04%32%00%00%04%3E%00%00%04%35%00%00%04%3C%00%00%04%43%00%00%00%2E%00%00%00%22"; -const sampleUTF32LE="%22%00%00%00%12%04%00%00%41%04%00%00%35%04%00%00%20%00%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%4B%04%00%00%35%04%00%00%20%00%00%00%41%04%00%00%35%04%00%00%3C%04%00%00%4C%04%00%00%38%04%00%00%20%00%00%00%3F%04%00%00%3E%04%00%00%45%04%00%00%3E%04%00%00%36%04%00%00%38%04%00%00%20%00%00%00%34%04%00%00%40%04%00%00%43%04%00%00%33%04%00%00%20%00%00%00%3D%04%00%00%30%04%00%00%20%00%00%00%34%04%00%00%40%04%00%00%43%04%00%00%33%04%00%00%30%04%00%00%2C%00%00%00%20%00%00%00%3A%04%00%00%30%04%00%00%36%04%00%00%34%04%00%00%30%04%00%00%4F%04%00%00%20%00%00%00%3D%04%00%00%35%04%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%30%04%00%00%4F%04%00%00%20%00%00%00%41%04%00%00%35%04%00%00%3C%04%00%00%4C%04%00%00%4F%04%00%00%20%00%00%00%3D%04%00%00%35%04%00%00%41%04%00%00%47%04%00%00%30%04%00%00%41%04%00%00%42%04%00%00%3B%04%00%00%38%04%00%00%32%04%00%00%30%04%00%00%20%00%00%00%3F%04%00%00%3E%04%00%00%2D%00%00%00%41%04%00%00%32%04%00%00%3E%04%00%00%35%04%00%00%3C%04%00%00%43%04%00%00%2E%00%00%00%22%00%00%00"; -const expectedNoBOM = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\""; - -function makeText(withBOM, charset) -{ - var theText = eval("sample" + charset); - if (withBOM) { - if (charset == "UTF32BE") { - theText = beBOM + theText; - } else { - theText = leBOM + theText; - } - } - return theText; -} - -function testCase(withBOM, charset, charsetDec, decoder, bufferLength) -{ - var dataURI = "data:text/plain;charset=" + charsetDec + "," + - makeText(withBOM, charset); - - var IOService = Components.Constructor("@mozilla.org/network/io-service;1", - "nsIIOService"); - var ConverterInputStream = - Components.Constructor("@mozilla.org/intl/converter-input-stream;1", - "nsIConverterInputStream", - "init"); - - var ios = new IOService(); - var channel = ios.newChannel(dataURI, "", null); - var testInputStream = channel.open(); - var testConverter = new ConverterInputStream(testInputStream, - decoder, - bufferLength, - 0xFFFD); - - if (!(testConverter instanceof - Components.interfaces.nsIUnicharLineInputStream)) - throw "not line input stream"; - - var outStr = ""; - var more; - do { - // read the line and check for eof - var line = {}; - more = testConverter.readLine(line); - outStr += line.value; - } while (more); - - var expected = expectedNoBOM; - if (withBOM) { - // BE / LE decoder wouldn't strip the BOM - if (decoder == "UTF-32BE" || decoder == "UTF-32LE") { - expected = outBOM + expectedNoBOM; - } - } - - do_check_eq(outStr, expected); -} - -// Tests conversion of one to three byte(s) from UTF-32 to Unicode - -const expectedString = "\ufffd"; - -const charset = "UTF-32"; - -function testCase2(inString) { - var ScriptableUnicodeConverter = - Components.Constructor("@mozilla.org/intl/scriptableunicodeconverter", - "nsIScriptableUnicodeConverter"); - - var converter = new ScriptableUnicodeConverter(); - converter.charset = charset; - var outString; - try { - outString = converter.ConvertToUnicode(inString) + converter.Finish(); - } catch(e) { - outString = "\ufffd"; - } - do_check_eq(escape(outString), escape(expectedString)); -} - -/* - * Uses nsIConverterInputStream to decode UTF-32 text with surrogate characters - * - * Sample text is: "g" in Mathematical Bold Symbolls (U+1D420) - * - * The test uses buffers of 4 different lengths to test end of buffer in mid- - * UTF32 character - */ - -// Single supplementaly character -// expected: surrogate pair -const test0="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%00%2D%00%00%00%2D"; -const expected0 = "--\uD835\uDC20--"; -// High surrogate followed by low surrogate (invalid in UTF-32) -// expected: two replacement chars -const test1="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%DC%20%00%00%00%2D%00%00%00%2D"; -const expected1 = "--\uFFFD\uFFFD--"; -// Lone high surrogate -// expected: one replacement char -const test2="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%00%2D%00%00%00%2D"; -const expected2 = "--\uFFFD--"; -// Lone low surrogate -// expected: one replacement char -const test3="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%00%2D%00%00%00%2D"; -const expected3 = "--\uFFFD--"; -// Two high surrogates -// expected: two replacement chars -const test4="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%00%D8%35%00%00%00%2D%00%00%00%2D"; -const expected4 = "--\uFFFD\uFFFD--"; -// Two low surrogates -// expected: two replacement chars -const test5="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%DC%20%00%00%00%2D%00%00%00%2D"; -const expected5 = "--\uFFFD\uFFFD--"; -// Low surrogate followed by high surrogate -// expected: two replacement chars -const test6="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%00%D8%35%00%00%00%2D%00%00%00%2D"; -const expected6 = "--\uFFFD\uFFFD--"; -// Lone high surrogate followed by supplementaly character -// expected: replacement char followed by surrogate pair -const test7="%00%00%00%2D%00%00%00%2D%00%00%D8%35%00%01%D4%20%00%00%00%2D%00%00%00%2D"; -const expected7 = "--\uFFFD\uD835\uDC20--"; -// Lone low surrogate followed by supplementaly character -// expected: replacement char followed by surrogate pair -const test8="%00%00%00%2D%00%00%00%2D%00%00%DC%20%00%01%D4%20%00%00%00%2D%00%00%00%2D"; -const expected8 = "--\uFFFD\uD835\uDC20--"; -// Supplementaly character followed by lone high surrogate -// expected: surrogate pair followed by replacement char -const test9="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%D8%35%00%00%00%2D%00%00%00%2D"; -const expected9 = "--\uD835\uDC20\uFFFD--"; -// Supplementaly character followed by lone low surrogate -// expected: surrogate pair followed by replacement char -const test10="%00%00%00%2D%00%00%00%2D%00%01%D4%20%00%00%DC%20%00%00%00%2D%00%00%00%2D"; -const expected10 = "--\uD835\uDC20\uFFFD--"; -// Lone high surrogate at the end of the input -// expected: one replacement char (invalid in UTF-32) -const test11="%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%D8%35"; -const expected11 = "----\uFFFD"; -// Half code unit at the end of the input -// expected: nothing -const test12="%00%00%00%2D%00%00%00%2D%00%00%00%2D%00%00%00%2D%D8"; -const expected12 = "----"; - -function testCase3(testNumber, bufferLength) -{ - var dataURI = "data:text/plain;charset=UTF32BE," + eval("test" + testNumber); - - var IOService = Components.Constructor("@mozilla.org/network/io-service;1", - "nsIIOService"); - var ConverterInputStream = - Components.Constructor("@mozilla.org/intl/converter-input-stream;1", - "nsIConverterInputStream", - "init"); - - var ios = new IOService(); - var channel = ios.newChannel(dataURI, "", null); - var testInputStream = channel.open(); - var testConverter = new ConverterInputStream(testInputStream, - "UTF-32BE", - bufferLength, - 0xFFFD); - - if (!(testConverter instanceof - Components.interfaces.nsIUnicharLineInputStream)) - throw "not line input stream"; - - var outStr = ""; - var more; - do { - // read the line and check for eof - var line = {}; - more = testConverter.readLine(line); - outStr += line.value; - } while (more); - - // escape the strings before comparing for better readability - do_check_eq(escape(outStr), escape(eval("expected" + testNumber))); -} - -function run_test() -{ - /* BOM charset charset decoder buffer - declaration length */ - testCase(true, "UTF32LE", "UTF-32", "UTF-32", 64); - testCase(true, "UTF32BE", "UTF-32", "UTF-32", 64); - testCase(true, "UTF32LE", "UTF-32", "UTF-32LE", 64); - testCase(true, "UTF32BE", "UTF-32", "UTF-32BE", 64); - testCase(false, "UTF32LE", "UTF-32", "UTF-32", 64); - testCase(false, "UTF32BE", "UTF-32", "UTF-32", 64); - testCase(false, "UTF32LE", "UTF-32", "UTF-32LE", 64); - testCase(false, "UTF32BE", "UTF-32", "UTF-32BE", 64); - testCase(true, "UTF32LE", "UTF-32", "UTF-32", 65); - testCase(true, "UTF32BE", "UTF-32", "UTF-32", 65); - testCase(true, "UTF32LE", "UTF-32", "UTF-32LE", 65); - testCase(true, "UTF32BE", "UTF-32", "UTF-32BE", 65); - testCase(false, "UTF32LE", "UTF-32", "UTF-32", 65); - testCase(false, "UTF32BE", "UTF-32", "UTF-32", 65); - testCase(false, "UTF32LE", "UTF-32", "UTF-32LE", 65); - testCase(false, "UTF32BE", "UTF-32", "UTF-32BE", 65); - - testCase2("A"); - testCase2("AB"); - testCase2("ABC"); - - for (var test = 0; test <= 12; ++ test) { - for (var bufferLength = 4; bufferLength < 8; ++ bufferLength) { - testCase3(test, bufferLength); - } - } -} diff --git a/intl/uconv/ucvlatin/nsUCvLatinCID.h b/intl/uconv/ucvlatin/nsUCvLatinCID.h index 68abcaba14f8..09df7afe3b86 100644 --- a/intl/uconv/ucvlatin/nsUCvLatinCID.h +++ b/intl/uconv/ucvlatin/nsUCvLatinCID.h @@ -567,11 +567,6 @@ #define NS_UTF16TOUNICODE_CID \ { 0xd673255d, 0x1184, 0x400a, {0xb0, 0xb5, 0xee,0x9d, 0x12, 0x95, 0xbd, 0x85}} -// Class ID for our UTF32ToUnicode charset converter -// {30DCD313-73E1-447d-8339-37744952154E} -#define NS_UTF32TOUNICODE_CID \ - { 0x30dcd313, 0x73e1, 0x447d, {0x83, 0x39, 0x37, 0x74, 0x49, 0x52, 0x15, 0x4e}} - // Class ID for our UTF16LEToUnicode charset converter // {BA6151B7-1DFA-11d3-B3BF-00805F8A6670} #define NS_UTF16LETOUNICODE_CID \ diff --git a/intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp b/intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp index a16cb5bcb902..e659205fdf72 100644 --- a/intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp +++ b/intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp @@ -167,7 +167,7 @@ static nsresult ConvertCommon(const char * aSrc, //---------------------------------------------------------------------- // Class nsUTF32ToUnicode [implementation] -nsUTF32ToUnicodeBase::nsUTF32ToUnicodeBase() : nsBasicDecoderSupport() +nsUTF32ToUnicode::nsUTF32ToUnicode() : nsBasicDecoderSupport() { Reset(); } @@ -175,9 +175,9 @@ nsUTF32ToUnicodeBase::nsUTF32ToUnicodeBase() : nsBasicDecoderSupport() //---------------------------------------------------------------------- // Subclassing of nsDecoderSupport class [implementation] -NS_IMETHODIMP nsUTF32ToUnicodeBase::GetMaxLength(const char * aSrc, - PRInt32 aSrcLength, - PRInt32 * aDestLength) +NS_IMETHODIMP nsUTF32ToUnicode::GetMaxLength(const char * aSrc, + PRInt32 aSrcLength, + PRInt32 * aDestLength) { // Non-BMP characters take two PRUnichars(a pair of surrogate codepoints) // so that we have to divide by 2 instead of 4 for the worst case. @@ -189,7 +189,7 @@ NS_IMETHODIMP nsUTF32ToUnicodeBase::GetMaxLength(const char * aSrc, //---------------------------------------------------------------------- // Subclassing of nsBasicDecoderSupport class [implementation] -NS_IMETHODIMP nsUTF32ToUnicodeBase::Reset() +NS_IMETHODIMP nsUTF32ToUnicode::Reset() { // the number of additional bytes to read to complete UTF-32 4byte seq. mState = 0; @@ -203,7 +203,7 @@ NS_IMETHODIMP nsUTF32ToUnicodeBase::Reset() // Class nsUTF32BEToUnicode [implementation] //---------------------------------------------------------------------- -// Subclassing of nsUTF32ToUnicodeBase class [implementation] +// Subclassing of nsUTF32ToUnicode class [implementation] NS_IMETHODIMP nsUTF32BEToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLength, @@ -218,7 +218,7 @@ NS_IMETHODIMP nsUTF32BEToUnicode::Convert(const char * aSrc, // Class nsUTF32LEToUnicode [implementation] //---------------------------------------------------------------------- -// Subclassing of nsUTF32ToUnicodeBase class [implementation] +// Subclassing of nsUTF32ToUnicode class [implementation] NS_IMETHODIMP nsUTF32LEToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLength, @@ -229,81 +229,5 @@ NS_IMETHODIMP nsUTF32LEToUnicode::Convert(const char * aSrc, mBufferInc, PR_TRUE); } -//---------------------------------------------------------------------- -// Class nsUTF32ToUnicode [implementation] - -//---------------------------------------------------------------------- -// Subclassing of nsUTF32ToUnicodeBase class [implementation] - -NS_IMETHODIMP nsUTF32ToUnicode::Reset() -{ - nsresult rv = nsUTF32ToUnicodeBase::Reset(); - mState = 4; - mEndian = kUnknown; - mFoundBOM = PR_FALSE; - return rv; -} - -NS_IMETHODIMP nsUTF32ToUnicode::Convert(const char * aSrc, - PRInt32 * aSrcLength, - PRUnichar * aDest, - PRInt32 * aDestLength) -{ - PRBool foundBOM = PR_FALSE; - if (4 == mState) // Called for the first time. - { - if (*aSrcLength < 4) - return NS_ERROR_ILLEGAL_INPUT; - - // check if BOM (0xFEFF) is at the beginning, remove it if found, and - // set mEndian accordingly. - if (0xFF == PRUint8(aSrc[0]) && 0xFE == PRUint8(aSrc[1]) && - 0 == PRUint8(aSrc[2]) && 0 == PRUint8(aSrc[3])) { - aSrc += 4; - *aSrcLength -= 4; - mState = 0; - mEndian = kLittleEndian; - mFoundBOM = foundBOM = PR_TRUE; - } - else if (0 == PRUint8(aSrc[0]) && 0 == PRUint8(aSrc[1]) && - 0xFE == PRUint8(aSrc[2]) && 0xFF == PRUint8(aSrc[3])) { - aSrc += 4; - *aSrcLength -= 4; - mState = 0; - mEndian = kBigEndian; - mFoundBOM = foundBOM = PR_TRUE; - } - // BOM is not found, but we can use a simple heuristic to determine - // the endianness. Assume the first character is [U+0001, U+FFFF]. - // Not always valid, but it's very likely to hold for html/xml/css. -#if 0 // BE case will be handled below - else if (!aSrc[0] && !aSrc[1] && (aSrc[2] || aSrc[3])) { // 0x00 0x00 0xhh 0xhh (hh != 00) - mState = 0; - mEndian = kBigEndian; - } -#endif - else if ((aSrc[0] || aSrc[1]) && !aSrc[2] && !aSrc[3]) { // 0xhh 0xhh 0x00 0x00 (hh != 00) - mState = 0; - mEndian = kLittleEndian; - } - else { // Neither BOM nor 'plausible' byte patterns at the beginning. - // Just assume it's BE (following Unicode standard) - // and let the garbage show up in the browser. (security concern?) - // (bug 246194) - mState = 0; - mEndian = kBigEndian; - } - } - - nsresult rv = ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState, - mBufferInc, mEndian == kLittleEndian); - if (foundBOM) - *aSrcLength += 4; // need to consume BOM - - // If BOM is not found and we're to return NS_OK, signal that BOM - // is not found. Otherwise, return |rv| from |UTF16ConvertToUnicode| - return (rv == NS_OK && !mFoundBOM) ? NS_OK_UDEC_NOBOMFOUND : rv; -} - // XXX : What to do with 'unflushed' mBufferInc?? : Finish() diff --git a/intl/uconv/ucvlatin/nsUTF32ToUnicode.h b/intl/uconv/ucvlatin/nsUTF32ToUnicode.h index 28f2a3272609..fcb518ee8b19 100644 --- a/intl/uconv/ucvlatin/nsUTF32ToUnicode.h +++ b/intl/uconv/ucvlatin/nsUTF32ToUnicode.h @@ -42,27 +42,29 @@ #define nsUTF32ToUnicode_h___ //---------------------------------------------------------------------- -// Class nsUTF32ToUnicodeBase [declaration] +// Class nsUTF32ToUnicode [declaration] /** - * A character set converter from UTF-32 family to Unicode. - * The base class for UTF-32BE/UTF-32LE/UTF-32 to Unicode converters. + * A character set converter from UTF32 to Unicode. + * The base class for UTF32BE/UTF32LE to Unicode converters. * @created 08/Dec/2002 * @author Jungshik Shin */ -class nsUTF32ToUnicodeBase : public nsBasicDecoderSupport +class nsUTF32ToUnicode : public nsBasicDecoderSupport { +public: + + /** + * Class constructor. + */ + nsUTF32ToUnicode(); + protected: - /** - * Class constructor. accessible only by child classes - */ - nsUTF32ToUnicodeBase(); - // the number of additional bytes to read to complete an incomplete UTF-32 4byte seq. - PRUint16 mState; + PRUint16 mState; // buffer for an incomplete UTF-32 sequence. PRUint8 mBufferInc[4]; @@ -80,13 +82,13 @@ protected: // Class nsUTF32BEToUnicode [declaration] /** - * A character set converter from UTF-32BE to Unicode. - * A subclass of UTF32ToUnicodeBase. + * A character set converter from UTF32BE to Unicode. + * A subclass of UTF32ToUnicode. * @created 08/Dec/2002 * @author Jungshik Shin */ -class nsUTF32BEToUnicode : public nsUTF32ToUnicodeBase +class nsUTF32BEToUnicode : public nsUTF32ToUnicode { public: @@ -104,13 +106,13 @@ public: // Class nsUTF32LEToUnicode [declaration] /** - * A character set converter from UTF-32LE to Unicode. - * A subclass of UTF32ToUnicodeBase. + * A character set converter from UTF32LE to Unicode. + * A subclass of UTF32ToUnicode. * @created 08/Dec/2002 * @author Jungshik Shin */ -class nsUTF32LEToUnicode : public nsUTF32ToUnicodeBase +class nsUTF32LEToUnicode : public nsUTF32ToUnicode { public: @@ -123,42 +125,5 @@ public: }; -//---------------------------------------------------------------------- -// Class nsUTF32ToUnicode [declaration] - -/** - * A character set converter from UTF-32 to Unicode. - * A subclass of UTF32ToUnicodeBase. - * @created 08/Dec/2002 - * @author Jungshik Shin - */ - -class nsUTF32ToUnicode : public nsUTF32ToUnicodeBase -{ -public: - - /** - * Class constructor. - */ - nsUTF32ToUnicode() { Reset(); } - - //-------------------------------------------------------------------- - // Subclassing of nsBasicDecoderSupport class [declaration] - - NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength, - PRUnichar * aDest, PRInt32 * aDestLength); - - //-------------------------------------------------------------------- - // Subclassing of nsUTF32ToUnicodeBase class [declaration] - - NS_IMETHOD Reset(); - -private: - - enum Endian {kUnknown, kBigEndian, kLittleEndian}; - Endian mEndian; - PRBool mFoundBOM; -}; - #endif /* nsUTF32ToUnicode_h___ */ diff --git a/intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp b/intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp index 4e816480d790..10de40dc268b 100644 --- a/intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp +++ b/intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp @@ -80,7 +80,6 @@ static nsresult ConvertCommon(const PRUnichar * aSrc, char * aDest, PRInt32 * aDestLength, PRUnichar * aHighSurrogate, - PRUnichar * aBOM, PRBool aIsLE) { const PRUnichar * src = aSrc; @@ -89,18 +88,6 @@ static nsresult ConvertCommon(const PRUnichar * aSrc, const char * destEnd = aDest + *aDestLength; PRUint32 ucs4; - // Handle BOM if necessary - if (0 != *aBOM) - { - if (*aDestLength < 4) { - *aSrcLength = *aDestLength = 0; - return NS_OK_UENC_MOREOUTPUT; - } - - *(PRUint32*)dest = *aBOM; - *aBOM = 0; - dest += 4; - } // left-over high surroage code point from the prev. run. if (*aHighSurrogate) @@ -207,21 +194,21 @@ static nsresult FinishCommon(char * aDest, //---------------------------------------------------------------------- // Class nsUnicodeToUTF32 [implementation] -NS_IMPL_ISUPPORTS1(nsUnicodeToUTF32Base, nsIUnicodeEncoder) +NS_IMPL_ISUPPORTS1(nsUnicodeToUTF32, nsIUnicodeEncoder) //---------------------------------------------------------------------- // Subclassing of nsIUnicodeEncoder class [implementation] -NS_IMETHODIMP nsUnicodeToUTF32Base::GetMaxLength(const PRUnichar * aSrc, - PRInt32 aSrcLength, - PRInt32 * aDestLength) +NS_IMETHODIMP nsUnicodeToUTF32::GetMaxLength(const PRUnichar * aSrc, + PRInt32 aSrcLength, + PRInt32 * aDestLength) { *aDestLength = aSrcLength * 4; return NS_OK; } -NS_IMETHODIMP nsUnicodeToUTF32Base::FillInfo(PRUint32 *aInfo) +NS_IMETHODIMP nsUnicodeToUTF32::FillInfo(PRUint32 *aInfo) { memset(aInfo, 0xFF, (0x10000L >> 3)); return NS_OK; @@ -241,7 +228,7 @@ NS_IMETHODIMP nsUnicodeToUTF32BE::Convert(const PRUnichar * aSrc, PRInt32 * aDestLength) { return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, - &mHighSurrogate, &mBOM, PR_FALSE); + &mHighSurrogate, PR_FALSE); } NS_IMETHODIMP nsUnicodeToUTF32BE::Finish(char * aDest, @@ -264,7 +251,7 @@ NS_IMETHODIMP nsUnicodeToUTF32LE::Convert(const PRUnichar * aSrc, PRInt32 * aDestLength) { return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, - &mHighSurrogate, &mBOM, PR_TRUE); + &mHighSurrogate, PR_TRUE); } NS_IMETHODIMP nsUnicodeToUTF32LE::Finish(char * aDest, diff --git a/intl/uconv/ucvlatin/nsUnicodeToUTF32.h b/intl/uconv/ucvlatin/nsUnicodeToUTF32.h index 69ce6468eada..2b73fa2bf0bb 100644 --- a/intl/uconv/ucvlatin/nsUnicodeToUTF32.h +++ b/intl/uconv/ucvlatin/nsUnicodeToUTF32.h @@ -45,24 +45,25 @@ // Class nsUnicodeToUTF32 [declaration] /** - * A character set converter from UTF-32 family to Unicode. - * The base class for UTF-32/UTF-32BE/UTF-32LE to Unicode converters. + * A character set converter from UTF32 to Unicode. + * The base class for UTF32BE/UTF32LE to Unicode converters. * @created 08/Dec/2002 * @author Jungshik Shin */ -class nsUnicodeToUTF32Base : public nsIUnicodeEncoder +class nsUnicodeToUTF32 : public nsIUnicodeEncoder { NS_DECL_ISUPPORTS -protected: +public: /** - * Class constructor. accessible only by child classes + * Class constructor. */ - nsUnicodeToUTF32Base() {mBOM = 0; mHighSurrogate = 0;} - virtual ~nsUnicodeToUTF32Base() {} + nsUnicodeToUTF32() {mHighSurrogate = 0;} + virtual ~nsUnicodeToUTF32() {} +protected: PRUnichar mHighSurrogate; NS_IMETHOD GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength, @@ -71,28 +72,26 @@ protected: //-------------------------------------------------------------------- // Subclassing of nsIUnicodeEncoder class [declaration] - NS_IMETHOD Reset() {mBOM = 0; mHighSurrogate = 0; return NS_OK;} + NS_IMETHOD Reset() {mHighSurrogate = 0; return NS_OK;} NS_IMETHOD FillInfo(PRUint32* aInfo); NS_IMETHOD SetOutputErrorBehavior(PRInt32 aBehavior, nsIUnicharEncoder * aEncoder, PRUnichar aChar) {return NS_OK;} -protected: - PRUnichar mBOM; }; //---------------------------------------------------------------------- // Class nsUnicodeToUTF32BE [declaration] /** - * A character set converter from Unicode to UTF-32BE. - * A subclass of UnicodeToUTF32Base. + * A character set converter from Unicode to UTF32BE. + * A subclass of UnicodeToUTF32. * @created 08/Dec/2002 * @author Jungshik Shin */ -class nsUnicodeToUTF32BE : public nsUnicodeToUTF32Base +class nsUnicodeToUTF32BE : public nsUnicodeToUTF32 { public: @@ -110,13 +109,13 @@ public: // Class nsUnicodeToUTF32LE [declaration] /** - * A character set converter from Unicode to UTF-32LE. - * A subclass of UnicodeToUTF32Base. + * A character set converter from Unicode to UTF32LE. + * A subclass of UnicodeToUTF32. * @created 08/Dec/2002 * @author Jungshik Shin */ -class nsUnicodeToUTF32LE : public nsUnicodeToUTF32Base +class nsUnicodeToUTF32LE : public nsUnicodeToUTF32 { public: @@ -128,31 +127,5 @@ public: }; -//---------------------------------------------------------------------- -// Class nsUnicodeToUTF32 [declaration] - -/** - * A character set converter from Unicode to UTF-32. - * A subclass of UnicodeToUTF32Base. - * @created 08/Dec/2002 - * @author Jungshik Shin - */ -#ifdef IS_LITTLE_ENDIAN -class nsUnicodeToUTF32 : public nsUnicodeToUTF32LE -#elif defined(IS_BIG_ENDIAN) -class nsUnicodeToUTF32 : public nsUnicodeToUTF32BE -#else -#error "Unknown endianness" -#endif -{ -public: - nsUnicodeToUTF32() {mBOM = 0xFEFF; mHighSurrogate = 0;}; - - //-------------------------------------------------------------------- - // Subclassing of nsUnicodeToUTF32Base class [declaration] - NS_IMETHOD Reset() {mBOM = 0xFEFF; mHighSurrogate = 0; return NS_OK;}; - -}; - #endif /* nsUnicodeToUTF32_h___ */ diff --git a/layout/style/nsCSSLoader.cpp b/layout/style/nsCSSLoader.cpp index deffc33ff3d9..4deb9445f8ea 100644 --- a/layout/style/nsCSSLoader.cpp +++ b/layout/style/nsCSSLoader.cpp @@ -456,7 +456,6 @@ static nsresult GetCharsetFromData(const unsigned char* aStyleSheetData, return NS_ERROR_NOT_AVAILABLE; PRUint32 step = 1; PRUint32 pos = 0; - PRBool bigEndian = PR_FALSE; // Determine the encoding type. If we have a BOM, set aCharset to the // charset listed for that BOM in http://www.w3.org/TR/REC-xml#sec-guessing; // that way even if we don't have a valid @charset rule we can use the BOM to @@ -490,18 +489,26 @@ static nsresult GetCharsetFromData(const unsigned char* aStyleSheetData, aCharset = "UTF-32"; } else if (nsContentUtils::CheckForBOM(aStyleSheetData, - aDataLength, aCharset, &bigEndian)) { + aDataLength, aCharset)) { if (aCharset.Equals("UTF-8")) { step = 1; pos = 3; } - else if (aCharset.Equals("UTF-32")) { + else if (aCharset.Equals("UTF-32BE")) { step = 4; - pos = bigEndian ? 7 : 4; + pos = 7; } - else if (aCharset.Equals("UTF-16")) { + else if (aCharset.Equals("UTF-32LE")) { + step = 4; + pos = 4; + } + else if (aCharset.Equals("UTF-16BE")) { step = 2; - pos = bigEndian ? 3 : 2; + pos = 3; + } + else if (aCharset.Equals("UTF-16LE")) { + step = 2; + pos = 2; } } else if (aStyleSheetData[0] == 0x00 && diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.cpp b/netwerk/streamconv/converters/nsUnknownDecoder.cpp index 3ceeeb463140..c9a4f424020f 100644 --- a/netwerk/streamconv/converters/nsUnknownDecoder.cpp +++ b/netwerk/streamconv/converters/nsUnknownDecoder.cpp @@ -568,10 +568,11 @@ PRBool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) // are for 2-byte encodings and the UTF-8 BOM is 3 bytes). if (mBufferLen >= 4) { const unsigned char* buf = (const unsigned char*)mBuffer; - if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian - (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian + if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16BE + (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16LE (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8 - (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)) { // UCS-4, Big Endian + (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF) || // UCS-4BE + (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFF && buf[3] == 0xFE)) { // UCS-4 mContentType = TEXT_PLAIN; return PR_TRUE; diff --git a/parser/htmlparser/src/nsParser.cpp b/parser/htmlparser/src/nsParser.cpp index 672880548c3f..c6c81d59e1d3 100644 --- a/parser/htmlparser/src/nsParser.cpp +++ b/parser/htmlparser/src/nsParser.cpp @@ -2402,10 +2402,8 @@ nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext) } -#define UTF16_BOM "UTF-16" #define UTF16_BE "UTF-16BE" #define UTF16_LE "UTF-16LE" -#define UCS4_BOM "UTF-32" #define UCS4_BE "UTF-32BE" #define UCS4_LE "UTF-32LE" #define UCS4_2143 "X-ISO-10646-UCS-4-2143" @@ -2443,7 +2441,7 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, // 00 00 if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) { // 00 00 FE FF UCS-4, big-endian machine (1234 order) - oCharset.Assign(UCS4_BOM); + oCharset.Assign(UCS4_BE); } else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) { // 00 00 00 3C UCS-4, big-endian machine (1234 order) oCharset.Assign(UCS4_BE); @@ -2574,7 +2572,7 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, oCharset.Assign(UCS4_3412); } else { // FE FF UTF-16, big-endian - oCharset.Assign(UTF16_BOM); + oCharset.Assign(UTF16_BE); } oCharsetSource= kCharsetFromByteOrderMark; } @@ -2583,11 +2581,11 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, if(0xFE==aBytes[1]) { if(0x00==aBytes[2] && 0x00==aBytes[3]) // FF FE 00 00 UTF-32, little-endian - oCharset.Assign(UCS4_BOM); + oCharset.Assign(UCS4_LE); else // FF FE // UTF-16, little-endian - oCharset.Assign(UTF16_BOM); + oCharset.Assign(UTF16_LE); oCharsetSource= kCharsetFromByteOrderMark; } break; @@ -2782,7 +2780,6 @@ ParserWriteFunc(nsIInputStream* in, (!preferred.EqualsLiteral("UTF-16") && !preferred.EqualsLiteral("UTF-16BE") && !preferred.EqualsLiteral("UTF-16LE") && - !preferred.EqualsLiteral("UTF-32") && !preferred.EqualsLiteral("UTF-32BE") && !preferred.EqualsLiteral("UTF-32LE")))) { guess = preferred; diff --git a/toolkit/locales/en-US/chrome/global/intl.properties b/toolkit/locales/en-US/chrome/global/intl.properties index c7a4014e894c..dba12e393317 100644 --- a/toolkit/locales/en-US/chrome/global/intl.properties +++ b/toolkit/locales/en-US/chrome/global/intl.properties @@ -29,7 +29,7 @@ intl.charsetmenu.browser.more3=GB2312, x-gbk, gb18030, HZ-GB-2312, ISO-2022-CN, intl.charsetmenu.browser.more4=armscii-8, GEOSTD8, TIS-620, ISO-8859-11, windows-874, IBM857, ISO-8859-9, x-mac-turkish, windows-1254, x-viet-tcvn5712, VISCII, x-viet-vps, windows-1258, x-mac-devanagari, x-mac-gujarati, x-mac-gurmukhi intl.charsetmenu.browser.more5=ISO-8859-6, windows-1256, IBM864, x-mac-arabic, x-mac-farsi, ISO-8859-8-I, windows-1255, ISO-8859-8, IBM862, x-mac-hebrew # Localization Note: Never change the following entry. -intl.charsetmenu.browser.unicode=UTF-8, UTF-16LE, UTF-16BE, UTF-32, UTF-32LE, UTF-32BE +intl.charsetmenu.browser.unicode=UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE intl.charset.default=ISO-8859-1 intl.charset.detector= intl.charsetmenu.mailedit=ISO-8859-1, ISO-8859-15, ISO-8859-6, armscii-8, geostd8, ISO-8859-13, ISO-8859-14, ISO-8859-2, GB2312, GB18030, Big5, KOI8-R, windows-1251, KOI8-U, ISO-8859-7, ISO-8859-8-I, windows-1255, ISO-2022-JP, EUC-KR, ISO-8859-10, ISO-8859-3, TIS-620, ISO-8859-9, UTF-8, VISCII