diff --git a/Cargo.lock b/Cargo.lock index e7998b5654e9..e3e608e6e432 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1009,21 +1009,21 @@ name = "encoding_c" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "encoding_glue" version = "0.1.0" dependencies = [ - "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", "nserror 0.1.0", "nsstring 0.1.0", ] [[package]] name = "encoding_rs" -version = "0.8.16" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1299,6 +1299,7 @@ dependencies = [ "profiler_helper 0.1.0", "rsdparsa_capi 0.1.0", "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "storage 0.1.0", "webrender_bindings 0.1.0", "xpcom 0.1.0", @@ -2017,7 +2018,7 @@ name = "nsstring" version = "0.1.0" dependencies = [ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2773,6 +2774,24 @@ dependencies = [ "opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "shift_or_euc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "shift_or_euc_c" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", + "shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "shlex" version = "0.1.1" @@ -3805,7 +3824,7 @@ dependencies = [ "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" "checksum ena 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "25b4e5febb25f08c49f1b07dc33a182729a6b21edfb562b5aef95f78e0dbe5bb" "checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee" -"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73" +"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed" "checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad" "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" "checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea" @@ -3967,6 +3986,8 @@ dependencies = [ "checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae" "checksum sha-1 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "23962131a91661d643c98940b20fcaffe62d776a823247be80a48fcb8b6fce68" "checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d" +"checksum shift_or_euc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c" +"checksum shift_or_euc_c 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc" "checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" "checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84" "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" diff --git a/browser/base/content/browser.js b/browser/base/content/browser.js index b27e04a4fb93..50cd63260e88 100644 --- a/browser/base/content/browser.js +++ b/browser/base/content/browser.js @@ -6615,6 +6615,9 @@ function handleDroppedLink(event, urlOrLinks, nameOrTriggeringPrincipal, trigger function BrowserSetForcedCharacterSet(aCharset) { if (aCharset) { + if (aCharset == "Japanese") { + aCharset = "Shift_JIS"; + } gBrowser.selectedBrowser.characterSet = aCharset; // Save the forced character-set PlacesUIUtils.setCharsetForPage(getWebNavigation().currentURI, @@ -6629,7 +6632,8 @@ function BrowserCharsetReload() { } function UpdateCurrentCharset(target) { - let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet); + let selectedCharset = CharsetMenu.foldCharset(gBrowser.selectedBrowser.characterSet, + gBrowser.selectedBrowser.charsetAutodetected); for (let menuItem of target.getElementsByTagName("menuitem")) { let isSelected = menuItem.getAttribute("charset") === selectedCharset; menuItem.setAttribute("checked", isSelected); diff --git a/browser/components/customizableui/CustomizableWidgets.jsm b/browser/components/customizableui/CustomizableWidgets.jsm index d8c61e66cbd3..d87fbeb54f05 100644 --- a/browser/components/customizableui/CustomizableWidgets.jsm +++ b/browser/components/customizableui/CustomizableWidgets.jsm @@ -358,7 +358,8 @@ const CustomizableWidgets = [ }, updateCurrentCharset(aDocument) { let currentCharset = aDocument.defaultView.gBrowser.selectedBrowser.characterSet; - currentCharset = CharsetMenu.foldCharset(currentCharset); + let {charsetAutodetected} = aDocument.defaultView.gBrowser.selectedBrowser; + currentCharset = CharsetMenu.foldCharset(currentCharset, charsetAutodetected); let pinnedContainer = aDocument.getElementById("PanelUI-characterEncodingView-pinned"); let charsetContainer = aDocument.getElementById("PanelUI-characterEncodingView-charsets"); diff --git a/docshell/base/nsDocShell.cpp b/docshell/base/nsDocShell.cpp index 29d0b70aba90..17cc453c6388 100644 --- a/docshell/base/nsDocShell.cpp +++ b/docshell/base/nsDocShell.cpp @@ -1276,7 +1276,6 @@ nsDocShell::GatherCharsetMenuTelemetry() { case kCharsetFromDocTypeDefault: case kCharsetFromCache: case kCharsetFromParentFrame: - case kCharsetFromHintPrevDoc: // Changing charset on an unlabeled doc. if (isFileURL) { Telemetry::AccumulateCategorical( @@ -1892,6 +1891,26 @@ nsDocShell::GetMayEnableCharacterEncodingMenu( return NS_OK; } +NS_IMETHODIMP +nsDocShell::GetCharsetAutodetected(bool* aCharsetAutodetected) { + *aCharsetAutodetected = false; + if (!mContentViewer) { + return NS_OK; + } + Document* doc = mContentViewer->GetDocument(); + if (!doc) { + return NS_OK; + } + int32_t source = doc->GetDocumentCharacterSetSource(); + + if (source == kCharsetFromAutoDetection || + source == kCharsetFromUserForcedAutoDetection) { + *aCharsetAutodetected = true; + } + + return NS_OK; +} + NS_IMETHODIMP nsDocShell::GetDocShellEnumerator(int32_t aItemType, DocShellEnumeratorDirection aDirection, @@ -8372,11 +8391,11 @@ nsresult nsDocShell::SetupNewViewer(nsIContentViewer* aNewViewer) { const Encoding* forceCharset = nullptr; const Encoding* hintCharset = nullptr; - int32_t hintCharsetSource; - float textZoom; - float pageZoom; - float overrideDPPX; - bool styleDisabled; + int32_t hintCharsetSource = kCharsetUninitialized; + float textZoom = 1.0; + float pageZoom = 1.0; + float overrideDPPX = 1.0; + bool styleDisabled = false; // |newMUDV| also serves as a flag to set the data from the above vars nsCOMPtr newCv; @@ -10188,6 +10207,8 @@ nsresult nsDocShell::DoURILoad(nsDocShellLoadState* aLoadState, MOZ_ASSERT(NS_SUCCEEDED(rv)); } + Unused << rv; // Keep Coverity happy + nsCOMPtr props(do_QueryInterface(channel)); if (props) { // save true referrer for those who need it (e.g. xpinstall whitelisting) diff --git a/docshell/base/nsIDocShell.idl b/docshell/base/nsIDocShell.idl index 484dc90d1b85..c26d63b40246 100644 --- a/docshell/base/nsIDocShell.idl +++ b/docshell/base/nsIDocShell.idl @@ -878,6 +878,11 @@ interface nsIDocShell : nsIDocShellTreeItem */ [infallible] readonly attribute boolean mayEnableCharacterEncodingMenu; + /** + * Indicates that the character encoding was autodetected. + */ + [infallible] readonly attribute boolean charsetAutodetected; + attribute nsIEditor editor; readonly attribute boolean editable; /* this docShell is editable */ readonly attribute boolean hasEditingSession; /* this docShell has an editing session */ diff --git a/docshell/test/browser/browser.ini b/docshell/test/browser/browser.ini index 480c1d491c6e..417a1684f2eb 100644 --- a/docshell/test/browser/browser.ini +++ b/docshell/test/browser/browser.ini @@ -44,6 +44,14 @@ support-files = file_bug1415918_beforeunload_iframe_2.html file_bug1415918_beforeunload_iframe.html file_bug1415918_beforeunload.html + file_bug1543077-1-child.html + file_bug1543077-1.html + file_bug1543077-2-child.html + file_bug1543077-2.html + file_bug1543077-3-child.html + file_bug1543077-3.html + file_bug1543077-4-child.html + file_bug1543077-4.html file_multiple_pushState.html print_postdata.sjs test-form_sjis.html @@ -60,6 +68,10 @@ support-files = onpageshow_message.html file_cross_process_csp_inheritance.html +[browser_bug1543077-1.js] +[browser_bug1543077-2.js] +[browser_bug1543077-3.js] +[browser_bug1543077-4.js] [browser_bug1206879.js] [browser_bug1309900_crossProcessHistoryNavigation.js] [browser_bug1328501.js] diff --git a/docshell/test/browser/browser_bug1543077-1.js b/docshell/test/browser/browser_bug1543077-1.js new file mode 100644 index 000000000000..3bbdd307b902 --- /dev/null +++ b/docshell/test/browser/browser_bug1543077-1.js @@ -0,0 +1,18 @@ +function test() { + var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/"; + runCharsetTest(rootDir + "file_bug1543077-1.html", afterOpen, "Japanese", afterChangeCharset); +} + +function afterOpen() { + is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially"); + + is(content.frames[0].document.documentElement.textContent.indexOf("\u00A4"), 87, "Child doc should be windows-1252 initially"); +} + +function afterChangeCharset() { + is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently"); + is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 87, "Child doc should decode as EUC-JP subsequently"); + + is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently"); + is(content.frames[0].document.characterSet, "EUC-JP", "Child doc should report EUC-JP subsequently"); +} diff --git a/docshell/test/browser/browser_bug1543077-2.js b/docshell/test/browser/browser_bug1543077-2.js new file mode 100644 index 000000000000..6a0f6ae62d7c --- /dev/null +++ b/docshell/test/browser/browser_bug1543077-2.js @@ -0,0 +1,18 @@ +function test() { + var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/"; + runCharsetTest(rootDir + "file_bug1543077-2.html", afterOpen, "Japanese", afterChangeCharset); +} + +function afterOpen() { + is(content.document.documentElement.textContent.indexOf("\u201A"), 134, "Parent doc should be windows-1252 initially"); + + is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially"); +} + +function afterChangeCharset() { + is(content.document.documentElement.textContent.indexOf("\u3042"), 134, "Parent doc should decode as Shift_JIS subsequently"); + is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently"); + + is(content.document.characterSet, "Shift_JIS", "Parent doc should report Shift_JIS subsequently"); + is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently"); +} diff --git a/docshell/test/browser/browser_bug1543077-3.js b/docshell/test/browser/browser_bug1543077-3.js new file mode 100644 index 000000000000..bf998332bd35 --- /dev/null +++ b/docshell/test/browser/browser_bug1543077-3.js @@ -0,0 +1,18 @@ +function test() { + var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/"; + runCharsetTest(rootDir + "file_bug1543077-3.html", afterOpen, "Japanese", afterChangeCharset); +} + +function afterOpen() { + is(content.document.documentElement.textContent.indexOf("\u001B"), 136, "Parent doc should be windows-1252 initially"); + + is(content.frames[0].document.documentElement.textContent.indexOf("\u001B"), 92, "Child doc should be windows-1252 initially"); +} + +function afterChangeCharset() { + is(content.document.documentElement.textContent.indexOf("\u3042"), 136, "Parent doc should decode as ISO-2022-JP subsequently"); + is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 92, "Child doc should decode as ISO-2022-JP subsequently"); + + is(content.document.characterSet, "ISO-2022-JP", "Parent doc should report ISO-2022-JP subsequently"); + is(content.frames[0].document.characterSet, "ISO-2022-JP", "Child doc should report ISO-2022-JP subsequently"); +} diff --git a/docshell/test/browser/browser_bug1543077-4.js b/docshell/test/browser/browser_bug1543077-4.js new file mode 100644 index 000000000000..24b400d42f3f --- /dev/null +++ b/docshell/test/browser/browser_bug1543077-4.js @@ -0,0 +1,18 @@ +function test() { + var rootDir = "http://mochi.test:8888/browser/docshell/test/browser/"; + runCharsetTest(rootDir + "file_bug1543077-4.html", afterOpen, "Japanese", afterChangeCharset); +} + +function afterOpen() { + is(content.document.documentElement.textContent.indexOf("\u00A4"), 131, "Parent doc should be windows-1252 initially"); + + is(content.frames[0].document.documentElement.textContent.indexOf("\u201A"), 90, "Child doc should be windows-1252 initially"); +} + +function afterChangeCharset() { + is(content.document.documentElement.textContent.indexOf("\u3042"), 131, "Parent doc should decode as EUC-JP subsequently"); + is(content.frames[0].document.documentElement.textContent.indexOf("\u3042"), 90, "Child doc should decode as Shift_JIS subsequently"); + + is(content.document.characterSet, "EUC-JP", "Parent doc should report EUC-JP subsequently"); + is(content.frames[0].document.characterSet, "Shift_JIS", "Child doc should report Shift_JIS subsequently"); +} diff --git a/docshell/test/browser/file_bug1543077-1-child.html b/docshell/test/browser/file_bug1543077-1-child.html new file mode 100644 index 000000000000..d244b27717bf --- /dev/null +++ b/docshell/test/browser/file_bug1543077-1-child.html @@ -0,0 +1,11 @@ + + + + +No encoding declaration in parent or child + + +

Hiragana letter a if decoded as EUC-JP: ¤¢

+ + + diff --git a/docshell/test/browser/file_bug1543077-1.html b/docshell/test/browser/file_bug1543077-1.html new file mode 100644 index 000000000000..4d37ec18fc4a --- /dev/null +++ b/docshell/test/browser/file_bug1543077-1.html @@ -0,0 +1,16 @@ + + + + +No encoding declaration in parent or child + + +

No encoding declaration in parent or child

+ +

Hiragana letter a if decoded as EUC-JP: ¤¢

+ + + + + + diff --git a/docshell/test/browser/file_bug1543077-2-child.html b/docshell/test/browser/file_bug1543077-2-child.html new file mode 100644 index 000000000000..c3415e97a490 --- /dev/null +++ b/docshell/test/browser/file_bug1543077-2-child.html @@ -0,0 +1,11 @@ + + + + +No encoding declaration in parent or child + + +

Hiragana letter a if decoded as Shift_JIS: ‚ 

+ + + diff --git a/docshell/test/browser/file_bug1543077-2.html b/docshell/test/browser/file_bug1543077-2.html new file mode 100644 index 000000000000..c0ad81570f7c --- /dev/null +++ b/docshell/test/browser/file_bug1543077-2.html @@ -0,0 +1,16 @@ + + + + +No encoding declaration in parent or child + + +

No encoding declaration in parent or child

+ +

Hiragana letter a if decoded as Shift_JIS: ‚ 

+ + + + + + diff --git a/docshell/test/browser/file_bug1543077-3-child.html b/docshell/test/browser/file_bug1543077-3-child.html new file mode 100644 index 000000000000..858a4623ed72 --- /dev/null +++ b/docshell/test/browser/file_bug1543077-3-child.html @@ -0,0 +1,11 @@ + + + + +No encoding declaration in parent or child + + +

Hiragana letter a if decoded as ISO-2022-JP: $B$"(B

+ + + diff --git a/docshell/test/browser/file_bug1543077-3.html b/docshell/test/browser/file_bug1543077-3.html new file mode 100644 index 000000000000..c4f467dd3fb0 --- /dev/null +++ b/docshell/test/browser/file_bug1543077-3.html @@ -0,0 +1,16 @@ + + + + +No encoding declaration in parent or child + + +

No encoding declaration in parent or child

+ +

Hiragana letter a if decoded as ISO-2022-JP: $B$"(B

+ + + + + + diff --git a/docshell/test/browser/file_bug1543077-4-child.html b/docshell/test/browser/file_bug1543077-4-child.html new file mode 100644 index 000000000000..c3415e97a490 --- /dev/null +++ b/docshell/test/browser/file_bug1543077-4-child.html @@ -0,0 +1,11 @@ + + + + +No encoding declaration in parent or child + + +

Hiragana letter a if decoded as Shift_JIS: ‚ 

+ + + diff --git a/docshell/test/browser/file_bug1543077-4.html b/docshell/test/browser/file_bug1543077-4.html new file mode 100644 index 000000000000..b8feb4cba66f --- /dev/null +++ b/docshell/test/browser/file_bug1543077-4.html @@ -0,0 +1,16 @@ + + + + +No encoding declaration in parent or child + + +

No encoding declaration in parent or child

+ +

Hiragana letter a if decoded as EUC-JP: ¤¢

+ + + + + + diff --git a/dom/interfaces/base/nsIBrowser.idl b/dom/interfaces/base/nsIBrowser.idl index 254eccd9d815..a8a7f0878ae9 100644 --- a/dom/interfaces/base/nsIBrowser.idl +++ b/dom/interfaces/base/nsIBrowser.idl @@ -93,6 +93,12 @@ interface nsIBrowser : nsISupports */ attribute boolean mayEnableCharacterEncodingMenu; + /** + * Whether or not the character encoding was detected by analyzing + * content (as opposed to reading a protocol label). + */ + attribute boolean charsetAutodetected; + /** * Called by Gecko to update the browser when its state changes. * diff --git a/dom/ipc/BrowserChild.cpp b/dom/ipc/BrowserChild.cpp index 9cc6bb22c612..f416f020ebbb 100644 --- a/dom/ipc/BrowserChild.cpp +++ b/dom/ipc/BrowserChild.cpp @@ -3539,6 +3539,8 @@ NS_IMETHODIMP BrowserChild::OnStateChange(nsIWebProgress* aWebProgress, stateChangeData->isNavigating() = docShell->GetIsNavigating(); stateChangeData->mayEnableCharacterEncodingMenu() = docShell->GetMayEnableCharacterEncodingMenu(); + stateChangeData->charsetAutodetected() = + docShell->GetCharsetAutodetected(); if (document && aStateFlags & nsIWebProgressListener::STATE_STOP) { document->GetContentType(stateChangeData->contentType()); diff --git a/dom/ipc/BrowserParent.cpp b/dom/ipc/BrowserParent.cpp index 843c2dbd5d27..e6ceff4290d5 100644 --- a/dom/ipc/BrowserParent.cpp +++ b/dom/ipc/BrowserParent.cpp @@ -2394,6 +2394,8 @@ mozilla::ipc::IPCResult BrowserParent::RecvOnStateChange( Unused << browser->SetIsNavigating(aStateChangeData->isNavigating()); Unused << browser->SetMayEnableCharacterEncodingMenu( aStateChangeData->mayEnableCharacterEncodingMenu()); + Unused << browser->SetCharsetAutodetected( + aStateChangeData->charsetAutodetected()); Unused << browser->UpdateForStateChange(aStateChangeData->charset(), aStateChangeData->documentURI(), aStateChangeData->contentType()); diff --git a/dom/ipc/PBrowser.ipdl b/dom/ipc/PBrowser.ipdl index 026fa6a6c1b4..df7f92f7ee03 100644 --- a/dom/ipc/PBrowser.ipdl +++ b/dom/ipc/PBrowser.ipdl @@ -123,6 +123,7 @@ struct WebProgressStateChangeData { bool isNavigating; bool mayEnableCharacterEncodingMenu; + bool charsetAutodetected; // The following fields are only set when the aStateFlags param passed with // this struct is |nsIWebProgress.STATE_STOP|. diff --git a/extensions/universalchardet/moz.build b/extensions/universalchardet/moz.build index 84696a0ff6e7..874d696f9575 100644 --- a/extensions/universalchardet/moz.build +++ b/extensions/universalchardet/moz.build @@ -4,7 +4,6 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -DIRS += ['src'] TEST_DIRS += ['tests'] with Files('**'): diff --git a/extensions/universalchardet/src/base/CharDistribution.cpp b/extensions/universalchardet/src/base/CharDistribution.cpp deleted file mode 100644 index f2aea48db586..000000000000 --- a/extensions/universalchardet/src/base/CharDistribution.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "CharDistribution.h" - -#include "JISFreq.tab" -#include "mozilla/ArrayUtils.h" - -#define SURE_YES 0.99f -#define SURE_NO 0.01f - -// return confidence base on received data -float CharDistributionAnalysis::GetConfidence(void) { - // if we didn't receive any character in our consideration range, or the - // number of frequent characters is below the minimum threshold, return - // negative answer - if (mTotalChars <= 0 || mFreqChars <= mDataThreshold) return SURE_NO; - - if (mTotalChars != mFreqChars) { - float r = - mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio); - - if (r < SURE_YES) return r; - } - // normalize confidence, (we don't want to be 100% sure) - return SURE_YES; -} - -SJISDistributionAnalysis::SJISDistributionAnalysis() { - mCharToFreqOrder = JISCharToFreqOrder; - mTableSize = mozilla::ArrayLength(JISCharToFreqOrder); - mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; -} - -EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() { - mCharToFreqOrder = JISCharToFreqOrder; - mTableSize = mozilla::ArrayLength(JISCharToFreqOrder); - mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; -} diff --git a/extensions/universalchardet/src/base/CharDistribution.h b/extensions/universalchardet/src/base/CharDistribution.h deleted file mode 100644 index b11da9e79186..000000000000 --- a/extensions/universalchardet/src/base/CharDistribution.h +++ /dev/null @@ -1,201 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef CharDistribution_h__ -#define CharDistribution_h__ - -#include "nscore.h" - -#define ENOUGH_DATA_THRESHOLD 1024 - -class CharDistributionAnalysis { - public: - CharDistributionAnalysis() { Reset(); } - - // feed a block of data and do distribution analysis - void HandleData(const char* aBuf, uint32_t aLen) {} - - // Feed a character with known length - void HandleOneChar(const char* aStr, uint32_t aCharLen) { - int32_t order; - - // we only care about 2-bytes character in our distribution analysis - order = (aCharLen == 2) ? GetOrder(aStr) : -1; - - if (order >= 0) { - mTotalChars++; - // order is valid - if ((uint32_t)order < mTableSize) { - if (512 > mCharToFreqOrder[order]) mFreqChars++; - } - } - } - - // return confidence base on existing data - float GetConfidence(void); - - // Reset analyser, clear any state - void Reset() { - mDone = false; - mTotalChars = 0; - mFreqChars = 0; - mDataThreshold = 0; - } - - // It is not necessary to receive all data to draw conclusion. For charset - // detection, - // certain amount of data is enough - bool GotEnoughData() { return mTotalChars > ENOUGH_DATA_THRESHOLD; } - - protected: - // we do not handle character base on its original encoding string, but - // convert this encoding string to a number, here called order. - // This allow multiple encoding of a language to share one frequency table - virtual int32_t GetOrder(const char* str) { return -1; } - - // If this flag is set to true, detection is done and conclusion has been made - bool mDone; - - // The number of characters whose frequency order is less than 512 - uint32_t mFreqChars; - - // Total character encounted. - uint32_t mTotalChars; - - // Number of hi-byte characters needed to trigger detection - uint32_t mDataThreshold; - - // Mapping table to get frequency order from char order (get from GetOrder()) - const int16_t* mCharToFreqOrder; - - // Size of above table - uint32_t mTableSize; - - // This is a constant value varies from language to language, it is used in - // calculating confidence. See my paper for further detail. - float mTypicalDistributionRatio; -}; - -class EUCTWDistributionAnalysis : public CharDistributionAnalysis { - public: - EUCTWDistributionAnalysis(); - - protected: - // for euc-TW encoding, we are interested - // first byte range: 0xc4 -- 0xfe - // second byte range: 0xa1 -- 0xfe - // no validation needed here. State machine has done that - int32_t GetOrder(const char* str) override { - if ((unsigned char)*str >= (unsigned char)0xc4) - return 94 * ((unsigned char)str[0] - (unsigned char)0xc4) + - (unsigned char)str[1] - (unsigned char)0xa1; - else - return -1; - } -}; - -class EUCKRDistributionAnalysis : public CharDistributionAnalysis { - public: - EUCKRDistributionAnalysis(); - - protected: - // for euc-KR encoding, we are interested - // first byte range: 0xb0 -- 0xfe - // second byte range: 0xa1 -- 0xfe - // no validation needed here. State machine has done that - int32_t GetOrder(const char* str) override { - if ((unsigned char)*str >= (unsigned char)0xb0) - return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) + - (unsigned char)str[1] - (unsigned char)0xa1; - else - return -1; - } -}; - -class GB2312DistributionAnalysis : public CharDistributionAnalysis { - public: - GB2312DistributionAnalysis(); - - protected: - // for GB2312 encoding, we are interested - // first byte range: 0xb0 -- 0xfe - // second byte range: 0xa1 -- 0xfe - // no validation needed here. State machine has done that - int32_t GetOrder(const char* str) override { - if ((unsigned char)*str >= (unsigned char)0xb0 && - (unsigned char)str[1] >= (unsigned char)0xa1) - return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) + - (unsigned char)str[1] - (unsigned char)0xa1; - else - return -1; - } -}; - -class Big5DistributionAnalysis : public CharDistributionAnalysis { - public: - Big5DistributionAnalysis(); - - protected: - // for big5 encoding, we are interested - // first byte range: 0xa4 -- 0xfe - // second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe - // no validation needed here. State machine has done that - int32_t GetOrder(const char* str) override { - if ((unsigned char)*str >= (unsigned char)0xa4) - if ((unsigned char)str[1] >= (unsigned char)0xa1) - return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) + - (unsigned char)str[1] - (unsigned char)0xa1 + 63; - else - return 157 * ((unsigned char)str[0] - (unsigned char)0xa4) + - (unsigned char)str[1] - (unsigned char)0x40; - else - return -1; - } -}; - -class SJISDistributionAnalysis : public CharDistributionAnalysis { - public: - SJISDistributionAnalysis(); - - protected: - // for sjis encoding, we are interested - // first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe - // second byte range: 0x40 -- 0x7e, 0x81 -- oxfe - // no validation needed here. State machine has done that - int32_t GetOrder(const char* str) override { - int32_t order; - if ((unsigned char)*str >= (unsigned char)0x81 && - (unsigned char)*str <= (unsigned char)0x9f) - order = 188 * ((unsigned char)str[0] - (unsigned char)0x81); - else if ((unsigned char)*str >= (unsigned char)0xe0 && - (unsigned char)*str <= (unsigned char)0xef) - order = 188 * ((unsigned char)str[0] - (unsigned char)0xe0 + 31); - else - return -1; - order += (unsigned char)*(str + 1) - 0x40; - if ((unsigned char)str[1] > (unsigned char)0x7f) order--; - return order; - } -}; - -class EUCJPDistributionAnalysis : public CharDistributionAnalysis { - public: - EUCJPDistributionAnalysis(); - - protected: - // for euc-JP encoding, we are interested - // first byte range: 0xa0 -- 0xfe - // second byte range: 0xa1 -- 0xfe - // no validation needed here. State machine has done that - int32_t GetOrder(const char* str) override { - if ((unsigned char)*str >= (unsigned char)0xa0) - return 94 * ((unsigned char)str[0] - (unsigned char)0xa1) + - (unsigned char)str[1] - (unsigned char)0xa1; - else - return -1; - } -}; - -#endif // CharDistribution_h__ diff --git a/extensions/universalchardet/src/base/JISFreq.tab b/extensions/universalchardet/src/base/JISFreq.tab deleted file mode 100644 index d3815caa4276..000000000000 --- a/extensions/universalchardet/src/base/JISFreq.tab +++ /dev/null @@ -1,554 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -//Sampling from about 20M text materials include literature and computer technology - -// Japanese frequency table, applied to both S-JIS and EUC-JP -//They are sorted in order. - -/****************************************************************************** - * 128 --> 0.77094 - * 256 --> 0.85710 - * 512 --> 0.92635 - * 1024 --> 0.97130 - * 2048 --> 0.99431 - * - * Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 - * Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 - * - * Typical Distribution Ratio, 25% of IDR - *****************************************************************************/ - -#define JIS_TYPICAL_DISTRIBUTION_RATIO (float) 3.0 - -// Char to FreqOrder table -static const int16_t JISCharToFreqOrder[] = -{ - 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, // 16 -3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, // 32 -1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, // 48 -2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, // 64 -2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, // 80 -5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, // 96 -1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, // 112 -5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, // 128 -5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, // 144 -5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, // 160 -5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, // 176 -5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, // 192 -5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, // 208 -1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, // 224 -1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, // 240 -1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, // 256 -2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, // 272 -3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, // 288 -3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, // 304 - 4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, // 320 - 12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, // 336 -1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, // 352 - 109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, // 368 -5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, // 384 - 271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, // 400 - 32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, // 416 - 43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, // 432 - 280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, // 448 - 54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, // 464 -5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, // 480 -5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, // 496 -5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, // 512 -4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, // 528 -5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, // 544 -5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, // 560 -5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, // 576 -5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, // 592 -5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, // 608 -5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, // 624 -5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, // 640 -5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, // 656 -5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, // 672 -3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, // 688 -5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, // 704 -5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, // 720 -5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, // 736 -5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, // 752 -5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, // 768 -5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, // 784 -5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, // 800 -5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, // 816 -5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, // 832 -5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, // 848 -5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, // 864 -5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, // 880 -5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, // 896 -5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, // 912 -5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, // 928 -5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, // 944 -5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, // 960 -5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, // 976 -5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, // 992 -5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, // 1008 -5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, // 1024 -5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, // 1040 -5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, // 1056 -5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, // 1072 -5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, // 1088 -5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, // 1104 -5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, // 1120 -5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, // 1136 -5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, // 1152 -5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, // 1168 -5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, // 1184 -5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, // 1200 -5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, // 1216 -5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, // 1232 -5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, // 1248 -5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, // 1264 -5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, // 1280 -5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, // 1296 -6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, // 1312 -6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, // 1328 -6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, // 1344 -6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, // 1360 -6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, // 1376 -6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, // 1392 -6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, // 1408 -6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, // 1424 -4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, // 1440 - 854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, // 1456 - 665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, // 1472 -1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, // 1488 -1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, // 1504 - 896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, // 1520 -3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, // 1536 -3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, // 1552 - 804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, // 1568 -3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, // 1584 -3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, // 1600 - 586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, // 1616 -2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, // 1632 - 277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, // 1648 -3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, // 1664 -1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, // 1680 - 380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, // 1696 -1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, // 1712 - 850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, // 1728 -2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, // 1744 -2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, // 1760 -2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, // 1776 -2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, // 1792 -1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, // 1808 -1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, // 1824 -1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, // 1840 -1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, // 1856 -2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, // 1872 -1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, // 1888 -2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, // 1904 -1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, // 1920 -1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, // 1936 -1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, // 1952 -1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, // 1968 -1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, // 1984 -1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, // 2000 - 606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, // 2016 - 684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, // 2032 -1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, // 2048 -2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, // 2064 -2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, // 2080 -2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, // 2096 -3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, // 2112 -3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, // 2128 - 884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, // 2144 -3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, // 2160 -1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, // 2176 - 861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, // 2192 -2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, // 2208 -1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, // 2224 - 576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, // 2240 -3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, // 2256 -4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, // 2272 -2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, // 2288 -1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, // 2304 -2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, // 2320 -1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, // 2336 - 385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, // 2352 - 178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, // 2368 -1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, // 2384 -2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, // 2400 -2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, // 2416 -2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, // 2432 -3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, // 2448 -1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, // 2464 -2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, // 2480 - 359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, // 2496 - 837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, // 2512 - 855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, // 2528 -1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, // 2544 -2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, // 2560 - 633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, // 2576 -1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, // 2592 -1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, // 2608 - 353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, // 2624 -1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, // 2640 -1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, // 2656 -1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, // 2672 - 764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, // 2688 -2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, // 2704 - 278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, // 2720 -2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, // 2736 -3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, // 2752 -2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, // 2768 -1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, // 2784 -6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, // 2800 -1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, // 2816 -2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, // 2832 -1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, // 2848 - 470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, // 2864 - 72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, // 2880 -3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, // 2896 -3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, // 2912 -1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, // 2928 -1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, // 2944 -1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, // 2960 -1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, // 2976 - 123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, // 2992 - 913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, // 3008 -2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, // 3024 - 900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, // 3040 -3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, // 3056 -2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, // 3072 - 423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, // 3088 -1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, // 3104 -2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, // 3120 - 220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, // 3136 -1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, // 3152 - 745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, // 3168 -4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, // 3184 -2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, // 3200 -1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, // 3216 - 666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, // 3232 -1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, // 3248 -2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, // 3264 - 376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, // 3280 -6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, // 3296 -1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, // 3312 -1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, // 3328 -2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, // 3344 -3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, // 3360 - 914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, // 3376 -3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, // 3392 -1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, // 3408 - 674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, // 3424 -1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, // 3440 - 199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, // 3456 -3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, // 3472 - 370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, // 3488 -2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, // 3504 - 414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, // 3520 -4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, // 3536 -2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, // 3552 -1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, // 3568 -1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, // 3584 -1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, // 3600 - 166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, // 3616 -1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, // 3632 -3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, // 3648 -1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, // 3664 -3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, // 3680 - 264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, // 3696 - 543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, // 3712 - 983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, // 3728 -2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, // 3744 -1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, // 3760 - 867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, // 3776 -1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, // 3792 - 894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, // 3808 -1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, // 3824 - 530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, // 3840 - 839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, // 3856 - 480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, // 3872 -1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, // 3888 -1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, // 3904 -2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, // 3920 -4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, // 3936 - 227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, // 3952 -1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, // 3968 - 328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, // 3984 -1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, // 4000 -3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, // 4016 -1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, // 4032 -2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, // 4048 -2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, // 4064 -1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, // 4080 -1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, // 4096 -2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, // 4112 - 455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, // 4128 -2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, // 4144 -1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, // 4160 -1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, // 4176 -1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, // 4192 -1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, // 4208 -3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, // 4224 -2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, // 4240 -2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, // 4256 - 575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, // 4272 -3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, // 4288 -3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, // 4304 -1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, // 4320 -2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, // 4336 -1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, // 4352 -2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, // 4368 //last 512 - -/*************************************************************************************** - *Everything below is of no interest for detection purpose * - *************************************************************************************** - -2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, // 4384 -6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, // 4400 -6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, // 4416 -6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, // 4432 -6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, // 4448 -4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, // 4464 -4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, // 4480 -3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, // 4496 -3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, // 4512 -4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, // 4528 -3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, // 4544 -6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, // 4560 -4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, // 4576 -6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, // 4592 -6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, // 4608 -6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, // 4624 -6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, // 4640 -6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, // 4656 -6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, // 4672 -3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, // 4688 -3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, // 4704 -6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, // 4720 -2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, // 4736 -4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, // 4752 -4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, // 4768 -4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, // 4784 -6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, // 4800 -3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, // 4816 -4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, // 4832 -4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, // 4848 -6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, // 4864 -4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, // 4880 -6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, // 4896 -3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, // 4912 -2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, // 4928 -4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, // 4944 -2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, // 4960 -6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, // 4976 -4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, // 4992 -6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, // 5008 -6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, // 5024 -6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, // 5040 -4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, // 5056 -6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, // 5072 -2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, // 5088 -6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, // 5104 -4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, // 5120 -6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, // 5136 -4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, // 5152 -4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, // 5168 -6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, // 5184 -6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, // 5200 -6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, // 5216 -3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, // 5232 -1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, // 5248 -3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, // 5264 -3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, // 5280 -4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, // 5296 -6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, // 5312 -3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, // 5328 -6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, // 5344 -3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, // 5360 -3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, // 5376 -2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, // 5392 -6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, // 5408 -6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, // 5424 -3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, // 5440 -6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, // 5456 -3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, // 5472 -6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, // 5488 -6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, // 5504 -6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, // 5520 -4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, // 5536 -6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, // 5552 -4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, // 5568 -3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, // 5584 -3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, // 5600 -6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, // 5616 -6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, // 5632 -4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, // 5648 -6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, // 5664 -6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, // 5680 -6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, // 5696 -6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, // 5712 -6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, // 5728 -6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, // 5744 -4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, // 5760 -4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, // 5776 -3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, // 5792 -6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, // 5808 -4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, // 5824 -2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, // 5840 -6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, // 5856 -6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, // 5872 -4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, // 5888 -2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, // 5904 -4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, // 5920 -2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, // 5936 -4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, // 5952 -4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, // 5968 -4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, // 5984 -6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, // 6000 -3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, // 6016 -6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, // 6032 -3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, // 6048 -6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, // 6064 -2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, // 6080 -3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, // 6096 -7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, // 6112 -2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, // 6128 -3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, // 6144 -3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, // 6160 -3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, // 6176 -3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, // 6192 -7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, // 6208 -7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, // 6224 -7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, // 6240 -7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, // 6256 -7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, // 6272 -4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, // 6288 -3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, // 6304 -3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, // 6320 -4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, // 6336 -3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, // 6352 -3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, // 6368 -7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, // 6384 -4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, // 6400 -7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, // 6416 -7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, // 6432 -7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, // 6448 -7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, // 6464 -7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, // 6480 -4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, // 6496 -4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, // 6512 -7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, // 6528 -3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, // 6544 -4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, // 6560 -7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, // 6576 -7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, // 6592 -4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, // 6608 -3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, // 6624 -3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, // 6640 -7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, // 6656 -4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, // 6672 -4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, // 6688 -4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, // 6704 -4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, // 6720 -4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, // 6736 -4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, // 6752 -7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, // 6768 -7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, // 6784 -7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, // 6800 -7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, // 6816 -7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, // 6832 -2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, // 6848 -3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, // 6864 -7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, // 6880 -7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, // 6896 -3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, // 6912 -4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, // 6928 -3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, // 6944 -3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, // 6960 -2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, // 6976 -7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, // 6992 -7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, // 7008 -4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, // 7024 -3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, // 7040 -3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, // 7056 -7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, // 7072 -7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, // 7088 -7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, // 7104 -4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, // 7120 -7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, // 7136 -2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, // 7152 -3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, // 7168 -4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, // 7184 -7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, // 7200 -4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, // 7216 -4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, // 7232 -7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, // 7248 -7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, // 7264 -5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, // 7280 -7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, // 7296 -7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, // 7312 -7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, // 7328 -7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, // 7344 -7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, // 7360 -5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, // 7376 -5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, // 7392 -7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, // 7408 -3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, // 7424 -7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, // 7440 -7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, // 7456 -3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, // 7472 -7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, // 7488 -7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, // 7504 -1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, // 7520 -3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, // 7536 -4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, // 7552 -2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, // 7568 -3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, // 7584 -2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, // 7600 -5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, // 7616 -4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, // 7632 -4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, // 7648 -5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, // 7664 -7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, // 7680 -7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, // 7696 -7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, // 7712 -7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, // 7728 -3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, // 7744 -7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, // 7760 -3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, // 7776 -7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, // 7792 -4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, // 7808 -7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, // 7824 -7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, // 7840 -7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, // 7856 -7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, // 7872 -7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, // 7888 -7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, // 7904 -7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, // 7920 -7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, // 7936 -7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, // 7952 -7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, // 7968 -7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, // 7984 -7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, // 8000 -8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, // 8016 -8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, // 8032 -8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, // 8048 -8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, // 8064 -8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, // 8080 -8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, // 8096 -8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, // 8112 -8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, // 8128 -8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, // 8144 -8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, // 8160 -8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, // 8176 -8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, // 8192 -8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, // 8208 -8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, // 8224 -8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, // 8240 -8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256 -8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272 -****************************************************************************************/ - -}; - diff --git a/extensions/universalchardet/src/base/JpCntx.cpp b/extensions/universalchardet/src/base/JpCntx.cpp deleted file mode 100644 index 258dbd5dd128..000000000000 --- a/extensions/universalchardet/src/base/JpCntx.cpp +++ /dev/null @@ -1,600 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nscore.h" -#include "JpCntx.h" - -// This is hiragana 2-char sequence table, the number in each cell represents -// its frequency category -const uint8_t jp2CharContext[83][83] = { - { - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - }, - { - 2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3, - 3, 4, 2, 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5, - 3, 0, 3, 5, 4, 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3, - 0, 3, 3, 0, 4, 0, 2, 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4, - }, - { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, - }, - { - 0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4, - 3, 4, 4, 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5, - 5, 3, 4, 5, 5, 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3, - 2, 3, 5, 0, 4, 0, 2, 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4, - 3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5, - 5, 1, 4, 5, 4, 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3, - 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3, - 3, 3, 3, 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5, - 4, 4, 3, 4, 4, 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2, - 2, 1, 4, 0, 3, 0, 1, 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3, - 3, 4, 3, 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3, - 2, 0, 2, 4, 3, 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2, - 2, 3, 4, 0, 4, 0, 3, 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4, - }, - { - 1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4, - 3, 5, 4, 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5, - 5, 4, 4, 5, 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3, - 3, 4, 5, 0, 3, 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4, - }, - { - 0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4, - 3, 4, 3, 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5, - 3, 2, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3, - 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3, - }, - { - 0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4, - 3, 4, 3, 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5, - 5, 4, 3, 4, 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3, - 3, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3, - }, - { - 0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3, - 3, 3, 1, 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4, - 3, 3, 0, 3, 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1, - 0, 2, 3, 2, 2, 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3, - }, - { - 0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4, - 2, 4, 3, 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5, - 4, 0, 3, 5, 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3, - 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4, - }, - { - 0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3, - 0, 3, 0, 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3, - 4, 0, 0, 3, 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3, - 2, 0, 3, 0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3, - }, - { - 2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4, - 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4, - 4, 3, 3, 4, 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1, - 3, 3, 3, 0, 3, 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4, - }, - { - 0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3, - 2, 3, 1, 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4, - 4, 3, 1, 3, 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, - 1, 1, 2, 0, 3, 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3, - }, - { - 0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3, - 3, 4, 3, 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4, - 5, 1, 3, 5, 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3, - 3, 3, 3, 0, 3, 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5, - }, - { - 0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3, - 4, 3, 2, 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2, - 2, 0, 0, 3, 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2, - 1, 3, 3, 0, 2, 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3, - }, - { - 2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4, - 3, 4, 3, 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4, - 4, 3, 3, 4, 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3, - 3, 3, 4, 0, 4, 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5, - }, - { - 0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2, - 0, 3, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3, - 1, 0, 2, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2, - 2, 3, 1, 0, 3, 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4, - }, - { - 1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4, - 1, 3, 3, 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5, - 4, 3, 3, 4, 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4, - 4, 4, 4, 4, 4, 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4, - }, - { - 0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3, - 0, 2, 2, 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4, - 3, 1, 1, 3, 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3, - 2, 4, 3, 5, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3, - }, - { - 0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3, - 0, 4, 3, 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4, - 4, 0, 4, 5, 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3, - 3, 3, 4, 0, 3, 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3, - }, - { - 0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3, - 0, 4, 3, 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3, - 5, 0, 3, 3, 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3, - 2, 3, 3, 0, 3, 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3, - }, - { - 0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3, - 3, 4, 3, 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4, - 4, 4, 3, 3, 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2, - 3, 3, 3, 0, 3, 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5, - }, - { - 0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1, - 3, 3, 0, 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4, - 2, 2, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, - 2, 0, 1, 0, 1, 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4, - }, - { - 0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1, - 0, 5, 1, 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3, - 3, 0, 2, 5, 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2, - 3, 3, 3, 0, 2, 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5, - }, - { - 0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1, - 0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2, - 3, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3, - 1, 2, 3, 0, 3, 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3, - }, - { - 0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3, - 2, 5, 3, 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4, - 4, 3, 4, 5, 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3, - 3, 5, 5, 0, 3, 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4, - }, - { - 0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5, - 1, 4, 2, 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4, - 4, 0, 4, 4, 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3, - 1, 3, 4, 0, 3, 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4, - }, - { - 0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3, - 1, 3, 3, 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4, - 4, 3, 2, 4, 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3, - 2, 3, 3, 5, 4, 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4, - }, - { - 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1, - 2, 1, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0, - 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 2, 2, 0, 3, 4, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, - }, - { - 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3, - 0, 4, 0, 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0, - 1, 0, 0, 0, 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3, - 0, 4, 3, 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4, - 4, 1, 3, 5, 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3, - 3, 4, 5, 0, 3, 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3, - }, - { - 0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1, - 0, 1, 0, 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1, - 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2, - 0, 2, 2, 0, 1, 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0, - }, - { - 0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4, - 3, 5, 3, 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4, - 4, 3, 4, 5, 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5, - 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3, - }, - { - 0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4, - 3, 5, 3, 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5, - 4, 3, 3, 5, 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4, - 3, 1, 5, 0, 4, 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3, - }, - { - 0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4, - 3, 5, 3, 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5, - 5, 0, 3, 5, 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4, - 3, 4, 5, 0, 4, 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5, - }, - { - 0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3, - 0, 3, 1, 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4, - 4, 0, 3, 5, 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2, - 1, 3, 5, 0, 3, 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4, - }, - { - 2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5, - 2, 5, 3, 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3, - 4, 1, 2, 5, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4, - 3, 3, 5, 0, 3, 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5, - }, - { - 0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4, - 2, 5, 3, 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5, - 3, 1, 3, 4, 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3, - 3, 3, 5, 3, 4, 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3, - }, - { - 0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0, - 0, 3, 1, 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3, - 3, 0, 0, 3, 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0, - 0, 1, 3, 0, 2, 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3, - }, - { - 0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3, - 1, 2, 3, 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3, - 3, 1, 0, 3, 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1, - 3, 2, 2, 0, 1, 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3, - }, - { - 0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4, - 2, 4, 4, 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5, - 5, 1, 3, 4, 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5, - 3, 3, 5, 0, 4, 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3, - }, - { - 0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4, - 4, 4, 4, 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5, - 3, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4, - 4, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4, - }, - { - 0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3, - 3, 4, 1, 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5, - 3, 3, 3, 3, 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1, - 2, 2, 3, 0, 3, 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4, - }, - { - 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, - 0, 2, 0, 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3, - 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2, - }, - { - 0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3, - 3, 4, 2, 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3, - 3, 3, 3, 3, 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2, - 0, 2, 3, 0, 3, 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3, - }, - { - 0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2, - 3, 3, 2, 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3, - 4, 3, 1, 3, 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1, - 1, 0, 3, 0, 3, 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, - 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3, - }, - { - 0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3, - 3, 3, 1, 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3, - 3, 0, 1, 4, 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3, - 2, 1, 3, 0, 3, 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3, - }, - { - 0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3, - 2, 3, 1, 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3, - 3, 1, 2, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1, - 2, 1, 3, 0, 3, 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4, - }, - { - 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3, - }, - { - 0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3, - 3, 3, 2, 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3, - 2, 3, 1, 5, 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1, - 3, 1, 3, 0, 3, 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4, - }, - { - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3, - 1, 4, 0, 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3, - 3, 1, 0, 3, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, - 1, 0, 3, 0, 2, 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3, - }, - { - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3, - }, - { - 0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2, - 2, 3, 1, 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1, - 0, 0, 1, 3, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1, - 1, 3, 2, 0, 2, 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4, - }, - { - 0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3, - 0, 3, 0, 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1, - 1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, - 1, 2, 0, 0, 2, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4, - }, - { - 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1, - 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3, - }, - { - 2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4, - 4, 5, 4, 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4, - 4, 3, 3, 4, 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3, - 2, 3, 3, 0, 3, 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4, - }, - { - 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3, - 3, 3, 3, 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4, - 4, 3, 2, 4, 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3, - 2, 1, 3, 0, 3, 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4, - }, - { - 0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3, - 2, 4, 2, 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3, - 3, 0, 3, 4, 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1, - 2, 1, 3, 0, 4, 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3, - }, - { - 0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3, - 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4, - 5, 3, 2, 5, 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2, - 2, 1, 3, 0, 3, 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4, - }, - { - 0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4, - 1, 5, 3, 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5, - 4, 1, 3, 5, 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3, - 3, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4, - }, - { - 1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1, - 0, 3, 2, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4, - 3, 0, 3, 3, 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2, - 2, 3, 3, 0, 3, 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4, - }, - { - 0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3, - 1, 4, 3, 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4, - 3, 1, 3, 3, 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4, - 3, 4, 3, 0, 4, 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3, - }, - { - 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, - }, - { - 0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1, - 1, 2, 0, 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0, - 0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2, - 0, 3, 0, 0, 0, 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2, - }, - { - 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0, - 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1, - 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3, - }, - { - 0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3, - 0, 4, 1, 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3, - 2, 0, 3, 3, 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3, - 1, 2, 3, 0, 2, 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3, - }, - { - 0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4, - 4, 5, 3, 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5, - 5, 4, 4, 5, 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, - 4, 4, 4, 0, 3, 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5, - }, - { - 0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4, - 3, 5, 3, 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4, - 5, 4, 1, 5, 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3, - 3, 3, 5, 4, 4, 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3, - }, - { - 0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4, - 1, 4, 4, 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5, - 5, 2, 3, 5, 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3, - 2, 3, 5, 0, 4, 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4, - }, - { - 1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3, - 4, 4, 4, 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5, - 5, 4, 3, 5, 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2, - 3, 2, 4, 1, 3, 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4, - }, - { - 0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3, - 2, 4, 3, 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4, - 4, 1, 1, 4, 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3, - 1, 3, 4, 0, 3, 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3, - 4, 4, 2, 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5, - 3, 4, 3, 2, 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, - 1, 3, 1, 0, 2, 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, - 1, 1, 0, 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3, - 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, - 1, 0, 0, 0, 2, 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1, - 2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, - 1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, - }, - { - 0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4, - 3, 5, 3, 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4, - 3, 3, 3, 4, 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4, - 3, 3, 5, 0, 4, 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3, - }, - { - 0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3, - 3, 4, 4, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5, - 4, 3, 4, 5, 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3, - 3, 3, 4, 0, 4, 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1, - }, -}; - -#define MINIMUM_DATA_THRESHOLD 4 - -void JapaneseContextAnalysis::HandleData(const char* aBuf, uint32_t aLen) { - uint32_t charLen; - int32_t order; - uint32_t i; - - if (mDone) return; - - // The buffer we got is byte oriented, and a character may span in more than - // one buffers. In case the last one or two byte in last buffer is not - // complete, we record how many byte needed to complete that character and - // skip these bytes here. We can choose to record those bytes as well and - // analyse the character once it is complete, but since a character will not - // make much difference, by simply skipping this character will simply our - // logic and improve performance. - for (i = mNeedToSkipCharNum; i < aLen;) { - order = GetOrder(aBuf + i, &charLen); - i += charLen; - if (i > aLen) { - mNeedToSkipCharNum = i - aLen; - mLastCharOrder = -1; - } else { - if (order != -1 && mLastCharOrder != -1) { - mTotalRel++; - if (mTotalRel > MAX_REL_THRESHOLD) { - mDone = true; - break; - } - mRelSample[jp2CharContext[mLastCharOrder][order]]++; - } - mLastCharOrder = order; - } - } -} - -void JapaneseContextAnalysis::Reset() { - mTotalRel = 0; - for (uint32_t i = 0; i < NUM_OF_CATEGORY; i++) mRelSample[i] = 0; - mNeedToSkipCharNum = 0; - mLastCharOrder = -1; - mDone = false; - mDataThreshold = 0; -} -#define DONT_KNOW (float)-1 - -float JapaneseContextAnalysis::GetConfidence(void) { - // This is just one way to calculate confidence. It works well for me. - if (mTotalRel > mDataThreshold) - return ((float)(mTotalRel - mRelSample[0])) / mTotalRel; - else - return (float)DONT_KNOW; -} - -int32_t SJISContextAnalysis::GetOrder(const char* str, uint32_t* charLen) { - // find out current char's byte length - if (((unsigned char)*str >= (unsigned char)0x81 && - (unsigned char)*str <= (unsigned char)0x9f) || - ((unsigned char)*str >= (unsigned char)0xe0 && - (unsigned char)*str <= (unsigned char)0xfc)) - *charLen = 2; - else - *charLen = 1; - - // return its order if it is hiragana - if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && - (unsigned char)*(str + 1) <= (unsigned char)0xf1) - return (unsigned char)*(str + 1) - (unsigned char)0x9f; - return -1; -} - -int32_t EUCJPContextAnalysis::GetOrder(const char* str, uint32_t* charLen) { - // find out current char's byte length - if ((unsigned char)*str == (unsigned char)0x8e || - ((unsigned char)*str >= (unsigned char)0xa1 && - (unsigned char)*str <= (unsigned char)0xfe)) - *charLen = 2; - else if ((unsigned char)*str == (unsigned char)0x8f) - *charLen = 3; - else - *charLen = 1; - - // return its order if it is hiragana - if ((unsigned char)*str == (unsigned char)0xa4 && - (unsigned char)*(str + 1) >= (unsigned char)0xa1 && - (unsigned char)*(str + 1) <= (unsigned char)0xf3) - return (unsigned char)*(str + 1) - (unsigned char)0xa1; - return -1; -} diff --git a/extensions/universalchardet/src/base/JpCntx.h b/extensions/universalchardet/src/base/JpCntx.h deleted file mode 100644 index 0f3a8671f3c2..000000000000 --- a/extensions/universalchardet/src/base/JpCntx.h +++ /dev/null @@ -1,97 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef __JPCNTX_H__ -#define __JPCNTX_H__ - -#define NUM_OF_CATEGORY 6 - -#include "nscore.h" - -#define ENOUGH_REL_THRESHOLD 100 -#define MAX_REL_THRESHOLD 1000 - -// hiragana frequency category table -extern const uint8_t jp2CharContext[83][83]; - -class JapaneseContextAnalysis { - public: - JapaneseContextAnalysis() { Reset(); } - - void HandleData(const char* aBuf, uint32_t aLen); - - void HandleOneChar(const char* aStr, uint32_t aCharLen) { - int32_t order; - - // if we received enough data, stop here - if (mTotalRel > MAX_REL_THRESHOLD) mDone = true; - if (mDone) return; - - // Only 2-bytes characters are of our interest - order = (aCharLen == 2) ? GetOrder(aStr) : -1; - if (order != -1 && mLastCharOrder != -1) { - mTotalRel++; - // count this sequence to its category counter - mRelSample[jp2CharContext[mLastCharOrder][order]]++; - } - mLastCharOrder = order; - } - - float GetConfidence(void); - void Reset(); - bool GotEnoughData() { return mTotalRel > ENOUGH_REL_THRESHOLD; } - - protected: - virtual int32_t GetOrder(const char* str, uint32_t* charLen) = 0; - virtual int32_t GetOrder(const char* str) = 0; - - // category counters, each integer counts sequences in its category - uint32_t mRelSample[NUM_OF_CATEGORY]; - - // total sequence received - uint32_t mTotalRel; - - // Number of sequences needed to trigger detection - uint32_t mDataThreshold; - - // The order of previous char - int32_t mLastCharOrder; - - // if last byte in current buffer is not the last byte of a character, we - // need to know how many byte to skip in next buffer. - uint32_t mNeedToSkipCharNum; - - // If this flag is set to true, detection is done and conclusion has been made - bool mDone; -}; - -class SJISContextAnalysis : public JapaneseContextAnalysis { - // SJISContextAnalysis(){}; - protected: - int32_t GetOrder(const char* str, uint32_t* charLen) override; - - int32_t GetOrder(const char* str) override { - // We only interested in Hiragana, so first byte is '\202' - if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && - (unsigned char)*(str + 1) <= (unsigned char)0xf1) - return (unsigned char)*(str + 1) - (unsigned char)0x9f; - return -1; - } -}; - -class EUCJPContextAnalysis : public JapaneseContextAnalysis { - protected: - int32_t GetOrder(const char* str, uint32_t* charLen) override; - int32_t GetOrder(const char* str) override - // We only interested in Hiragana, so first byte is '\244' - { - if (*str == '\244' && (unsigned char)*(str + 1) >= (unsigned char)0xa1 && - (unsigned char)*(str + 1) <= (unsigned char)0xf3) - return (unsigned char)*(str + 1) - (unsigned char)0xa1; - return -1; - } -}; - -#endif /* __JPCNTX_H__ */ diff --git a/extensions/universalchardet/src/base/moz.build b/extensions/universalchardet/src/base/moz.build deleted file mode 100644 index c2dfdd344f1b..000000000000 --- a/extensions/universalchardet/src/base/moz.build +++ /dev/null @@ -1,21 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# vim: set filetype=python: -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -UNIFIED_SOURCES += [ - 'CharDistribution.cpp', - 'JpCntx.cpp', - 'nsCharSetProber.cpp', - 'nsEscCharsetProber.cpp', - 'nsEscSM.cpp', - 'nsEUCJPProber.cpp', - 'nsMBCSGroupProber.cpp', - 'nsMBCSSM.cpp', - 'nsSJISProber.cpp', - 'nsUniversalDetector.cpp', - 'nsUTF8Prober.cpp', -] - -FINAL_LIBRARY = 'xul' diff --git a/extensions/universalchardet/src/base/nsCharSetProber.cpp b/extensions/universalchardet/src/base/nsCharSetProber.cpp deleted file mode 100644 index 517b81373a29..000000000000 --- a/extensions/universalchardet/src/base/nsCharSetProber.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsCharSetProber.h" - -// This filter applies to all scripts which do not use English characters -bool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf, - uint32_t aLen, char** newBuf, - uint32_t& newLen) { - char* newptr; - char *prevPtr, *curPtr; - - bool meetMSB = false; - newptr = *newBuf = (char*)malloc(aLen); - if (!newptr) return false; - - for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) { - if (*curPtr & 0x80) { - meetMSB = true; - } else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || - *curPtr > 'z') { - // current char is a symbol, most likely a punctuation. we treat it as - // segment delimiter - if (meetMSB && curPtr > prevPtr) - // this segment contains more than single symbol, and it has upper ASCII, - // we need to keep it - { - while (prevPtr < curPtr) *newptr++ = *prevPtr++; - prevPtr++; - *newptr++ = ' '; - meetMSB = false; - } else // ignore current segment. (either because it is just a symbol or - // just an English word) - prevPtr = curPtr + 1; - } - } - if (meetMSB && curPtr > prevPtr) - while (prevPtr < curPtr) *newptr++ = *prevPtr++; - - newLen = newptr - *newBuf; - - return true; -} - -// This filter applies to all scripts which contain both English characters and -// upper ASCII characters. -bool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, uint32_t aLen, - char** newBuf, - uint32_t& newLen) { - // do filtering to reduce load to probers - char* newptr; - char *prevPtr, *curPtr; - bool isInTag = false; - - newptr = *newBuf = (char*)malloc(aLen); - if (!newptr) return false; - - for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf + aLen; curPtr++) { - if (*curPtr == '>') - isInTag = false; - else if (*curPtr == '<') - isInTag = true; - - if (!(*curPtr & 0x80) && - (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z')) { - if (curPtr > prevPtr && - !isInTag) // Current segment contains more than just a symbol - // and it is not inside a tag, keep it. - { - while (prevPtr < curPtr) *newptr++ = *prevPtr++; - prevPtr++; - *newptr++ = ' '; - } else - prevPtr = curPtr + 1; - } - } - - // If the current segment contains more than just a symbol - // and it is not inside a tag then keep it. - if (!isInTag) - while (prevPtr < curPtr) *newptr++ = *prevPtr++; - - newLen = newptr - *newBuf; - - return true; -} diff --git a/extensions/universalchardet/src/base/nsCharSetProber.h b/extensions/universalchardet/src/base/nsCharSetProber.h deleted file mode 100644 index 29e749eaf1db..000000000000 --- a/extensions/universalchardet/src/base/nsCharSetProber.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef nsCharSetProber_h__ -#define nsCharSetProber_h__ - -#include "nscore.h" - -//#define DEBUG_chardet // Uncomment this for debug dump. - -typedef enum { - eDetecting = 0, // We are still detecting, no sure answer yet, but caller can - // ask for confidence. - eFoundIt = 1, // That's a positive answer - eNotMe = 2 // Negative answer -} nsProbingState; - -#define SHORTCUT_THRESHOLD (float)0.95 - -class nsCharSetProber { - public: - virtual ~nsCharSetProber() {} - virtual const char* GetCharSetName() = 0; - virtual nsProbingState HandleData(const char* aBuf, uint32_t aLen) = 0; - virtual nsProbingState GetState(void) = 0; - virtual void Reset(void) = 0; - virtual float GetConfidence(void) = 0; - -#ifdef DEBUG_chardet - virtual void DumpStatus(){}; -#endif - - // Helper functions used in the Latin1 and Group probers. - // both functions Allocate a new buffer for newBuf. This buffer should be - // freed by the caller using free(). - // Both functions return false in case of memory allocation failure. - static bool FilterWithoutEnglishLetters(const char* aBuf, uint32_t aLen, - char** newBuf, uint32_t& newLen); - static bool FilterWithEnglishLetters(const char* aBuf, uint32_t aLen, - char** newBuf, uint32_t& newLen); -}; - -#endif /* nsCharSetProber_h__ */ diff --git a/extensions/universalchardet/src/base/nsCodingStateMachine.h b/extensions/universalchardet/src/base/nsCodingStateMachine.h deleted file mode 100644 index 7a43d8e68c13..000000000000 --- a/extensions/universalchardet/src/base/nsCodingStateMachine.h +++ /dev/null @@ -1,85 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef nsCodingStateMachine_h__ -#define nsCodingStateMachine_h__ - -#include "mozilla/ArrayUtils.h" - -#include "nsPkgInt.h" - -/* Apart from these 3 generic states, machine states are specific to - * each charset prober. - */ -#define eStart 0 -#define eError 1 -#define eItsMe 2 - -#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable) - -// state machine model -typedef struct { - nsPkgInt classTable; - uint32_t classFactor; - nsPkgInt stateTable; - const uint32_t* charLenTable; -#ifdef DEBUG - const size_t charLenTableLength; -#endif - const char* name; -} SMModel; - -class nsCodingStateMachine { - public: - explicit nsCodingStateMachine(const SMModel* sm) : mModel(sm) { - mCurrentState = eStart; - } - uint32_t NextState(char c) { - // for each byte we get its class , if it is first byte, we also get byte - // length - uint32_t byteCls = GETCLASS(c); - if (mCurrentState == eStart) { - mCurrentBytePos = 0; - MOZ_ASSERT(byteCls < mModel->charLenTableLength); - mCurrentCharLen = mModel->charLenTable[byteCls]; - } - // from byte's class and stateTable, we get its next state - mCurrentState = GETFROMPCK(mCurrentState * mModel->classFactor + byteCls, - mModel->stateTable); - mCurrentBytePos++; - return mCurrentState; - } - uint32_t GetCurrentCharLen(void) { return mCurrentCharLen; } - void Reset(void) { mCurrentState = eStart; } - const char* GetCodingStateMachine() { return mModel->name; } - - protected: - uint32_t mCurrentState; - uint32_t mCurrentCharLen; - uint32_t mCurrentBytePos; - - const SMModel* mModel; -}; - -extern const SMModel UTF8SMModel; -extern const SMModel Big5SMModel; -extern const SMModel EUCJPSMModel; -extern const SMModel EUCKRSMModel; -extern const SMModel EUCTWSMModel; -extern const SMModel GB18030SMModel; -extern const SMModel SJISSMModel; - -extern const SMModel HZSMModel; -extern const SMModel ISO2022CNSMModel; -extern const SMModel ISO2022JPSMModel; -extern const SMModel ISO2022KRSMModel; - -#undef CHAR_LEN_TABLE -#ifdef DEBUG -# define CHAR_LEN_TABLE(x) x, mozilla::ArrayLength(x) -#else -# define CHAR_LEN_TABLE(x) x -#endif - -#endif /* nsCodingStateMachine_h__ */ diff --git a/extensions/universalchardet/src/base/nsEUCJPProber.cpp b/extensions/universalchardet/src/base/nsEUCJPProber.cpp deleted file mode 100644 index 905d5ca3451b..000000000000 --- a/extensions/universalchardet/src/base/nsEUCJPProber.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -// for japanese encoding, obeserve characteristic: -// 1, kana character (or hankaku?) often have hight frequency of appereance -// 2, kana character often exist in group -// 3, certain combination of kana is never used in japanese language - -#include "nsEUCJPProber.h" -#include "nsDebug.h" - -void nsEUCJPProber::Reset(void) { - mCodingSM->Reset(); - mState = eDetecting; - mContextAnalyser.Reset(); - mDistributionAnalyser.Reset(); -} - -nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) { - NS_ASSERTION(aLen, "HandleData called with empty buffer"); - uint32_t codingState; - - for (uint32_t i = 0; i < aLen; i++) { - codingState = mCodingSM->NextState(aBuf[i]); - if (codingState == eItsMe) { - mState = eFoundIt; - break; - } - if (codingState == eStart) { - uint32_t charLen = mCodingSM->GetCurrentCharLen(); - - if (i == 0) { - mLastChar[1] = aBuf[0]; - mContextAnalyser.HandleOneChar(mLastChar, charLen); - mDistributionAnalyser.HandleOneChar(mLastChar, charLen); - } else { - mContextAnalyser.HandleOneChar(aBuf + i - 1, charLen); - mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen); - } - } - } - - mLastChar[0] = aBuf[aLen - 1]; - - if (mState == eDetecting) - if (mContextAnalyser.GotEnoughData() && - GetConfidence() > SHORTCUT_THRESHOLD) - mState = eFoundIt; - - return mState; -} - -float nsEUCJPProber::GetConfidence(void) { - float contxtCf = mContextAnalyser.GetConfidence(); - float distribCf = mDistributionAnalyser.GetConfidence(); - - return (contxtCf > distribCf ? contxtCf : distribCf); -} diff --git a/extensions/universalchardet/src/base/nsEUCJPProber.h b/extensions/universalchardet/src/base/nsEUCJPProber.h deleted file mode 100644 index 5bedcdcc4c58..000000000000 --- a/extensions/universalchardet/src/base/nsEUCJPProber.h +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -// for S-JIS encoding, obeserve characteristic: -// 1, kana character (or hankaku?) often have hight frequency of appereance -// 2, kana character often exist in group -// 3, certain combination of kana is never used in japanese language - -#ifndef nsEUCJPProber_h__ -#define nsEUCJPProber_h__ - -#include "nsCharSetProber.h" -#include "nsCodingStateMachine.h" -#include "JpCntx.h" -#include "CharDistribution.h" - -class nsEUCJPProber : public nsCharSetProber { - public: - nsEUCJPProber() { - mCodingSM = new nsCodingStateMachine(&EUCJPSMModel); - Reset(); - } - virtual ~nsEUCJPProber(void) { delete mCodingSM; } - nsProbingState HandleData(const char* aBuf, uint32_t aLen) override; - const char* GetCharSetName() override { return "EUC-JP"; } - nsProbingState GetState(void) override { return mState; } - void Reset(void) override; - float GetConfidence(void) override; - - protected: - nsCodingStateMachine* mCodingSM; - nsProbingState mState; - - EUCJPContextAnalysis mContextAnalyser; - EUCJPDistributionAnalysis mDistributionAnalyser; - - char mLastChar[2]; -}; - -#endif /* nsEUCJPProber_h__ */ diff --git a/extensions/universalchardet/src/base/nsEscCharsetProber.cpp b/extensions/universalchardet/src/base/nsEscCharsetProber.cpp deleted file mode 100644 index 436821956fc8..000000000000 --- a/extensions/universalchardet/src/base/nsEscCharsetProber.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsEscCharsetProber.h" -#include "nsUniversalDetector.h" - -nsEscCharSetProber::nsEscCharSetProber() { - mCodingSM = new nsCodingStateMachine(&ISO2022JPSMModel); - mState = eDetecting; - mDetectedCharset = nullptr; -} - -nsEscCharSetProber::~nsEscCharSetProber(void) {} - -void nsEscCharSetProber::Reset(void) { - mState = eDetecting; - mCodingSM->Reset(); - mDetectedCharset = nullptr; -} - -nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, uint32_t aLen) { - uint32_t codingState; - uint32_t i; - - for (i = 0; i < aLen && mState == eDetecting; i++) { - codingState = mCodingSM->NextState(aBuf[i]); - if (codingState == eItsMe) { - mState = eFoundIt; - mDetectedCharset = mCodingSM->GetCodingStateMachine(); - return mState; - } - } - - return mState; -} diff --git a/extensions/universalchardet/src/base/nsEscCharsetProber.h b/extensions/universalchardet/src/base/nsEscCharsetProber.h deleted file mode 100644 index 3dadd31ba425..000000000000 --- a/extensions/universalchardet/src/base/nsEscCharsetProber.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsEscCharSetProber_h__ -#define nsEscCharSetProber_h__ - -#include "nsCharSetProber.h" -#include "nsCodingStateMachine.h" -#include "nsAutoPtr.h" - -class nsEscCharSetProber : public nsCharSetProber { - public: - nsEscCharSetProber(); - virtual ~nsEscCharSetProber(void); - nsProbingState HandleData(const char* aBuf, uint32_t aLen) override; - const char* GetCharSetName() override { return mDetectedCharset; } - nsProbingState GetState(void) override { return mState; } - void Reset(void) override; - float GetConfidence(void) override { return (float)0.99; } - - protected: - void GetDistribution(uint32_t aCharLen, const char* aStr); - - nsAutoPtr mCodingSM; - nsProbingState mState; - const char* mDetectedCharset; -}; - -#endif /* nsEscCharSetProber_h__ */ diff --git a/extensions/universalchardet/src/base/nsEscSM.cpp b/extensions/universalchardet/src/base/nsEscSM.cpp deleted file mode 100644 index 9f1223467e9d..000000000000 --- a/extensions/universalchardet/src/base/nsEscSM.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "nsCodingStateMachine.h" - -static const uint32_t ISO2022JP_cls[256 / 8] = { - PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 - PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 - PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f - PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27 - PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f - PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47 - PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff -}; - -static const uint32_t ISO2022JP_st[9] = { - PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, - eStart), // 00-07 - PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError, - eError), // 08-0f - PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, - eItsMe), // 10-17 - PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, - eError), // 18-1f - PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27 - PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe, - eError), // 28-2f - PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe, - eItsMe), // 30-37 - PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, - eError), // 38-3f - PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart, - eStart) // 40-47 -}; - -static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -const SMModel ISO2022JPSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls}, - 10, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st}, - CHAR_LEN_TABLE(ISO2022JPCharLenTable), - "ISO-2022-JP", -}; diff --git a/extensions/universalchardet/src/base/nsMBCSGroupProber.cpp b/extensions/universalchardet/src/base/nsMBCSGroupProber.cpp deleted file mode 100644 index 18e387212d3b..000000000000 --- a/extensions/universalchardet/src/base/nsMBCSGroupProber.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include - -#include "nsMBCSGroupProber.h" -#include "nsUniversalDetector.h" - -#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers) -const char* ProberName[] = { - "UTF8", - "SJIS", - "EUCJP", -}; - -#endif - -nsMBCSGroupProber::nsMBCSGroupProber() { - mProbers[0] = new nsUTF8Prober(); - mProbers[1] = new nsSJISProber(); - mProbers[2] = new nsEUCJPProber(); - Reset(); -} - -nsMBCSGroupProber::~nsMBCSGroupProber() { - for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) { - delete mProbers[i]; - } -} - -const char* nsMBCSGroupProber::GetCharSetName() { - if (mBestGuess == -1) { - GetConfidence(); - if (mBestGuess == -1) mBestGuess = 0; - } - return mProbers[mBestGuess]->GetCharSetName(); -} - -void nsMBCSGroupProber::Reset(void) { - mActiveNum = 0; - for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) { - if (mProbers[i]) { - mProbers[i]->Reset(); - mIsActive[i] = true; - ++mActiveNum; - } else - mIsActive[i] = false; - } - mBestGuess = -1; - mState = eDetecting; - mKeepNext = 0; -} - -nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) { - nsProbingState st; - uint32_t start = 0; - uint32_t keepNext = mKeepNext; - - // do filtering to reduce load to probers - for (uint32_t pos = 0; pos < aLen; ++pos) { - if (aBuf[pos] & 0x80) { - if (!keepNext) start = pos; - keepNext = 2; - } else if (keepNext) { - if (--keepNext == 0) { - for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) { - if (!mIsActive[i]) continue; - st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start); - if (st == eFoundIt) { - mBestGuess = i; - mState = eFoundIt; - return mState; - } - } - } - } - } - - if (keepNext) { - for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) { - if (!mIsActive[i]) continue; - st = mProbers[i]->HandleData(aBuf + start, aLen - start); - if (st == eFoundIt) { - mBestGuess = i; - mState = eFoundIt; - return mState; - } - } - } - mKeepNext = keepNext; - - return mState; -} - -float nsMBCSGroupProber::GetConfidence(void) { - uint32_t i; - float bestConf = 0.0, cf; - - switch (mState) { - case eFoundIt: - return (float)0.99; - case eNotMe: - return (float)0.01; - default: - for (i = 0; i < NUM_OF_PROBERS; i++) { - if (!mIsActive[i]) continue; - cf = mProbers[i]->GetConfidence(); - if (bestConf < cf) { - bestConf = cf; - mBestGuess = i; - } - } - } - return bestConf; -} - -#ifdef DEBUG_chardet -void nsMBCSGroupProber::DumpStatus() { - uint32_t i; - float cf; - - GetConfidence(); - for (i = 0; i < NUM_OF_PROBERS; i++) { - if (!mIsActive[i]) - printf(" MBCS inactive: [%s] (confidence is too low).\r\n", - ProberName[i]); - else { - cf = mProbers[i]->GetConfidence(); - printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]); - } - } -} -#endif - -#ifdef DEBUG_jgmyers -void nsMBCSGroupProber::GetDetectorState( - nsUniversalDetector::DetectorState ( - &states)[nsUniversalDetector::NumDetectors], - uint32_t& offset) { - for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) { - states[offset].name = ProberName[i]; - states[offset].isActive = mIsActive[i]; - states[offset].confidence = - mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0; - ++offset; - } -} -#endif /* DEBUG_jgmyers */ diff --git a/extensions/universalchardet/src/base/nsMBCSGroupProber.h b/extensions/universalchardet/src/base/nsMBCSGroupProber.h deleted file mode 100644 index efe761521bc9..000000000000 --- a/extensions/universalchardet/src/base/nsMBCSGroupProber.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsMBCSGroupProber_h__ -#define nsMBCSGroupProber_h__ - -#include "nsSJISProber.h" -#include "nsUTF8Prober.h" -#include "nsEUCJPProber.h" - -#define NUM_OF_PROBERS 3 - -class nsMBCSGroupProber : public nsCharSetProber { - public: - nsMBCSGroupProber(); - virtual ~nsMBCSGroupProber(); - nsProbingState HandleData(const char* aBuf, uint32_t aLen) override; - const char* GetCharSetName() override; - nsProbingState GetState(void) override { return mState; } - void Reset(void) override; - float GetConfidence(void) override; - -#ifdef DEBUG_chardet - void DumpStatus(); -#endif -#ifdef DEBUG_jgmyers - void GetDetectorState(nsUniversalDetector::DetectorState ( - &states)[nsUniversalDetector::NumDetectors], - uint32_t& offset); -#endif - - protected: - nsProbingState mState; - nsCharSetProber* mProbers[NUM_OF_PROBERS]; - bool mIsActive[NUM_OF_PROBERS]; - int32_t mBestGuess; - uint32_t mActiveNum; - uint32_t mKeepNext; -}; - -#endif /* nsMBCSGroupProber_h__ */ diff --git a/extensions/universalchardet/src/base/nsMBCSSM.cpp b/extensions/universalchardet/src/base/nsMBCSSM.cpp deleted file mode 100644 index 771ea1fcf681..000000000000 --- a/extensions/universalchardet/src/base/nsMBCSSM.cpp +++ /dev/null @@ -1,200 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "nsCodingStateMachine.h" - -/* -Modification from frank tang's original work: -. 0x00 is allowed as a legal character. Since some web pages contains this char -in text stream. -*/ - -static const uint32_t EUCJP_cls[256 / 8] = { - // PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 00 - 07 - PCK4BITS(4, 4, 4, 4, 4, 4, 5, 5), // 08 - 0f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 10 - 17 - PCK4BITS(4, 4, 4, 5, 4, 4, 4, 4), // 18 - 1f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 20 - 27 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 28 - 2f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 30 - 37 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 38 - 3f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 40 - 47 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 48 - 4f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 50 - 57 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 58 - 5f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 60 - 67 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 68 - 6f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 70 - 77 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 78 - 7f - PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 80 - 87 - PCK4BITS(5, 5, 5, 5, 5, 5, 1, 3), // 88 - 8f - PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 90 - 97 - PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 98 - 9f - PCK4BITS(5, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7 - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7 - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 5) // f8 - ff -}; - -static const uint32_t EUCJP_st[5] = { - PCK4BITS(3, 4, 3, 5, eStart, eError, eError, eError), // 00-07 - PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, - eItsMe), // 08-0f - PCK4BITS(eItsMe, eItsMe, eStart, eError, eStart, eError, eError, - eError), // 10-17 - PCK4BITS(eError, eError, eStart, eError, eError, eError, 3, - eError), // 18-1f - PCK4BITS(3, eError, eError, eError, eStart, eStart, eStart, - eStart) // 20-27 -}; - -static const uint32_t EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0}; - -const SMModel EUCJPSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls}, - 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st}, - CHAR_LEN_TABLE(EUCJPCharLenTable), - "EUC-JP", -}; - -// sjis - -static const uint32_t SJIS_cls[256 / 8] = { - // PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07 - PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17 - PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f - PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 80 - 87 - PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 88 - 8f - PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97 - PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f - // 0xa0 is illegal in sjis encoding, but some pages does - // contain such byte. We need to be more error forgiven. - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df - PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7 - PCK4BITS(3, 3, 3, 3, 3, 4, 4, 4), // e8 - ef - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // f0 - f7 - PCK4BITS(4, 4, 4, 4, 4, 0, 0, 0) // f8 - ff -}; - -static const uint32_t SJIS_st[3] = { - PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError, - eError), // 00-07 - PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, - eItsMe), // 08-0f - PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, eStart, - eStart) // 10-17 -}; - -static const uint32_t SJISCharLenTable[] = {0, 1, 1, 2, 0, 0}; - -const SMModel SJISSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls}, - 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st}, - CHAR_LEN_TABLE(SJISCharLenTable), - "Shift_JIS", -}; - -static const uint32_t UTF8_cls[256 / 8] = { - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07 - PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17 - PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77 - PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 - PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f - PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97 - PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a8 - af - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7 - PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf - PCK4BITS(0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7 - PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf - PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7 - PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d8 - df - PCK4BITS(6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7 - PCK4BITS(7, 7, 7, 7, 7, 8, 7, 7), // e8 - ef - PCK4BITS(9, 10, 10, 10, 11, 0, 0, 0), // f0 - f7 - PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff -}; - -static const uint32_t UTF8_st[15] = { - PCK4BITS(eError, eStart, eError, eError, eError, 3, 4, 5), // 00 - 07 - PCK4BITS(6, 7, 8, 9, eError, eError, eError, eError), // 08 - 0f - PCK4BITS(eError, eError, eError, eError, eError, eError, eError, - eError), // 10 - 17 - PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, - eItsMe), // 18 - 1f - PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart, - eStart), // 20 - 27 - PCK4BITS(eStart, eError, eError, eError, eError, eError, eError, - eError), // 28 - 2f - PCK4BITS(eError, eError, eError, eError, 3, eError, eError, - eError), // 30 - 37 - PCK4BITS(eError, eError, eError, eError, eError, eError, 3, 3), // 38 - 3f - PCK4BITS(3, eError, eError, eError, eError, eError, eError, - eError), // 40 - 47 - PCK4BITS(eError, eError, 3, 3, eError, eError, eError, eError), // 48 - 4f - PCK4BITS(eError, eError, eError, eError, eError, eError, 5, 5), // 50 - 57 - PCK4BITS(eError, eError, eError, eError, eError, eError, eError, - eError), // 58 - 5f - PCK4BITS(eError, eError, 5, 5, 5, eError, eError, eError), // 60 - 67 - PCK4BITS(eError, eError, eError, eError, eError, eError, 5, - eError), // 68 - 6f - PCK4BITS(eError, eError, eError, eError, eError, eError, eError, - eError) // 70 - 77 -}; - -static const uint32_t UTF8CharLenTable[] = {0, 1, 0, 0, 0, 2, 3, 3, 3, 4, 4, 4}; - -const SMModel UTF8SMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls}, - 12, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st}, - CHAR_LEN_TABLE(UTF8CharLenTable), - "UTF-8", -}; diff --git a/extensions/universalchardet/src/base/nsPkgInt.h b/extensions/universalchardet/src/base/nsPkgInt.h deleted file mode 100644 index 8e08ad6177d5..000000000000 --- a/extensions/universalchardet/src/base/nsPkgInt.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsPkgInt_h__ -#define nsPkgInt_h__ -#include "nscore.h" - -typedef enum { eIdxSft4bits = 3, eIdxSft8bits = 2, eIdxSft16bits = 1 } nsIdxSft; - -typedef enum { eSftMsk4bits = 7, eSftMsk8bits = 3, eSftMsk16bits = 1 } nsSftMsk; - -typedef enum { eBitSft4bits = 2, eBitSft8bits = 3, eBitSft16bits = 4 } nsBitSft; - -typedef enum { - eUnitMsk4bits = 0x0000000FL, - eUnitMsk8bits = 0x000000FFL, - eUnitMsk16bits = 0x0000FFFFL -} nsUnitMsk; - -typedef struct nsPkgInt { - nsIdxSft idxsft; - nsSftMsk sftmsk; - nsBitSft bitsft; - nsUnitMsk unitmsk; - const uint32_t* const data; -} nsPkgInt; - -#define PCK16BITS(a, b) ((uint32_t)(((b) << 16) | (a))) - -#define PCK8BITS(a, b, c, d) \ - PCK16BITS(((uint32_t)(((b) << 8) | (a))), ((uint32_t)(((d) << 8) | (c)))) - -#define PCK4BITS(a, b, c, d, e, f, g, h) \ - PCK8BITS(((uint32_t)(((b) << 4) | (a))), ((uint32_t)(((d) << 4) | (c))), \ - ((uint32_t)(((f) << 4) | (e))), ((uint32_t)(((h) << 4) | (g)))) - -#define GETFROMPCK(i, c) \ - (((((c).data)[(i) >> (c).idxsft]) >> (((i) & (c).sftmsk) << (c).bitsft)) & \ - (c).unitmsk) - -#endif /* nsPkgInt_h__ */ diff --git a/extensions/universalchardet/src/base/nsSJISProber.cpp b/extensions/universalchardet/src/base/nsSJISProber.cpp deleted file mode 100644 index 7acc0b371c60..000000000000 --- a/extensions/universalchardet/src/base/nsSJISProber.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -// for S-JIS encoding, obeserve characteristic: -// 1, kana character (or hankaku?) often have hight frequency of appereance -// 2, kana character often exist in group -// 3, certain combination of kana is never used in japanese language - -#include "nsSJISProber.h" -#include "nsDebug.h" - -void nsSJISProber::Reset(void) { - mCodingSM->Reset(); - mState = eDetecting; - mContextAnalyser.Reset(); - mDistributionAnalyser.Reset(); -} - -nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen) { - NS_ASSERTION(aLen, "HandleData called with empty buffer"); - uint32_t codingState; - - for (uint32_t i = 0; i < aLen; i++) { - codingState = mCodingSM->NextState(aBuf[i]); - if (codingState == eItsMe) { - mState = eFoundIt; - break; - } - if (codingState == eStart) { - uint32_t charLen = mCodingSM->GetCurrentCharLen(); - if (i == 0) { - mLastChar[1] = aBuf[0]; - mContextAnalyser.HandleOneChar(mLastChar + 2 - charLen, charLen); - mDistributionAnalyser.HandleOneChar(mLastChar, charLen); - } else { - mContextAnalyser.HandleOneChar(aBuf + i + 1 - charLen, charLen); - mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen); - } - } - } - - mLastChar[0] = aBuf[aLen - 1]; - - if (mState == eDetecting) - if (mContextAnalyser.GotEnoughData() && - GetConfidence() > SHORTCUT_THRESHOLD) - mState = eFoundIt; - - return mState; -} - -float nsSJISProber::GetConfidence(void) { - float contxtCf = mContextAnalyser.GetConfidence(); - float distribCf = mDistributionAnalyser.GetConfidence(); - - return (contxtCf > distribCf ? contxtCf : distribCf); -} diff --git a/extensions/universalchardet/src/base/nsSJISProber.h b/extensions/universalchardet/src/base/nsSJISProber.h deleted file mode 100644 index 8a2761eff9cd..000000000000 --- a/extensions/universalchardet/src/base/nsSJISProber.h +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -// for S-JIS encoding, obeserve characteristic: -// 1, kana character (or hankaku?) often have hight frequency of appereance -// 2, kana character often exist in group -// 3, certain combination of kana is never used in japanese language - -#ifndef nsSJISProber_h__ -#define nsSJISProber_h__ - -#include "nsCharSetProber.h" -#include "nsCodingStateMachine.h" -#include "JpCntx.h" -#include "CharDistribution.h" - -class nsSJISProber : public nsCharSetProber { - public: - nsSJISProber() { - mCodingSM = new nsCodingStateMachine(&SJISSMModel); - Reset(); - } - virtual ~nsSJISProber(void) { delete mCodingSM; } - nsProbingState HandleData(const char* aBuf, uint32_t aLen) override; - const char* GetCharSetName() override { return "Shift_JIS"; } - nsProbingState GetState(void) override { return mState; } - void Reset(void) override; - float GetConfidence(void) override; - - protected: - nsCodingStateMachine* mCodingSM; - nsProbingState mState; - - SJISContextAnalysis mContextAnalyser; - SJISDistributionAnalysis mDistributionAnalyser; - - char mLastChar[2]; -}; - -#endif /* nsSJISProber_h__ */ diff --git a/extensions/universalchardet/src/base/nsUTF8Prober.cpp b/extensions/universalchardet/src/base/nsUTF8Prober.cpp deleted file mode 100644 index a4479848b85a..000000000000 --- a/extensions/universalchardet/src/base/nsUTF8Prober.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsUTF8Prober.h" - -void nsUTF8Prober::Reset(void) { - mCodingSM->Reset(); - mNumOfMBChar = 0; - mState = eDetecting; -} - -nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen) { - uint32_t codingState; - - for (uint32_t i = 0; i < aLen; i++) { - codingState = mCodingSM->NextState(aBuf[i]); - if (codingState == eItsMe) { - mState = eFoundIt; - break; - } - if (codingState == eStart) { - if (mCodingSM->GetCurrentCharLen() >= 2) mNumOfMBChar++; - } - } - - if (mState == eDetecting) - if (GetConfidence() > SHORTCUT_THRESHOLD) mState = eFoundIt; - return mState; -} - -#define ONE_CHAR_PROB (float)0.50 - -float nsUTF8Prober::GetConfidence(void) { - float unlike = (float)0.99; - - if (mNumOfMBChar < 6) { - for (uint32_t i = 0; i < mNumOfMBChar; i++) unlike *= ONE_CHAR_PROB; - return (float)1.0 - unlike; - } else - return (float)0.99; -} diff --git a/extensions/universalchardet/src/base/nsUTF8Prober.h b/extensions/universalchardet/src/base/nsUTF8Prober.h deleted file mode 100644 index 992cea8b0c99..000000000000 --- a/extensions/universalchardet/src/base/nsUTF8Prober.h +++ /dev/null @@ -1,32 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsUTF8Prober_h__ -#define nsUTF8Prober_h__ - -#include "nsCharSetProber.h" -#include "nsCodingStateMachine.h" - -class nsUTF8Prober : public nsCharSetProber { - public: - nsUTF8Prober() { - mNumOfMBChar = 0; - mCodingSM = new nsCodingStateMachine(&UTF8SMModel); - Reset(); - } - virtual ~nsUTF8Prober() { delete mCodingSM; } - nsProbingState HandleData(const char* aBuf, uint32_t aLen) override; - const char* GetCharSetName() override { return "UTF-8"; } - nsProbingState GetState(void) override { return mState; } - void Reset(void) override; - float GetConfidence(void) override; - - protected: - nsCodingStateMachine* mCodingSM; - nsProbingState mState; - uint32_t mNumOfMBChar; -}; - -#endif /* nsUTF8Prober_h__ */ diff --git a/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/extensions/universalchardet/src/base/nsUniversalDetector.cpp deleted file mode 100644 index 263cc5739c21..000000000000 --- a/extensions/universalchardet/src/base/nsUniversalDetector.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nscore.h" - -#include "nsUniversalDetector.h" - -#include "nsMBCSGroupProber.h" -#include "nsEscCharsetProber.h" - -nsUniversalDetector::nsUniversalDetector() { - mDone = false; - mBestGuess = -1; // illegal value as signal - mInTag = false; - mMultibyteProber = nullptr; - mEscCharSetProber = nullptr; - - mStart = true; - mDetectedCharset = nullptr; - mGotData = false; - mInputState = ePureAscii; - mLastChar = '\0'; -} - -nsUniversalDetector::~nsUniversalDetector() { - delete mMultibyteProber; - delete mEscCharSetProber; -} - -void nsUniversalDetector::Reset() { - mDone = false; - mBestGuess = -1; // illegal value as signal - mInTag = false; - - mStart = true; - mDetectedCharset = nullptr; - mGotData = false; - mInputState = ePureAscii; - mLastChar = '\0'; - - if (mMultibyteProber) { - mMultibyteProber->Reset(); - } - - if (mEscCharSetProber) { - mEscCharSetProber->Reset(); - } -} - -//--------------------------------------------------------------------- -#define SHORTCUT_THRESHOLD (float)0.95 -#define MINIMUM_THRESHOLD (float)0.20 - -nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) { - if (mDone) return NS_OK; - - if (aLen > 0) mGotData = true; - - // If the data starts with BOM, we know it is UTF - if (mStart) { - mStart = false; - if (aLen >= 2) { - switch (aBuf[0]) { - case '\xEF': - if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) { - // EF BB BF UTF-8 encoded BOM - mDetectedCharset = "UTF-8"; - } - break; - case '\xFE': - if ('\xFF' == aBuf[1]) { - // FE FF UTF-16, big endian BOM - mDetectedCharset = "UTF-16BE"; - } - break; - case '\xFF': - if ('\xFE' == aBuf[1]) { - // FF FE UTF-16, little endian BOM - mDetectedCharset = "UTF-16LE"; - } - break; - } // switch - } - - if (mDetectedCharset) { - mDone = true; - return NS_OK; - } - } - - uint32_t i; - for (i = 0; i < aLen; i++) { - // other than 0xa0, if every othe character is ascii, the page is ascii - if (aBuf[i] & '\x80' && - aBuf[i] != '\xA0') // Since many Ascii only page contains NBSP - { - // we got a non-ascii byte (high-byte) - if (mInputState != eHighbyte) { - // adjust state - mInputState = eHighbyte; - - // kill mEscCharSetProber if it is active - if (mEscCharSetProber) { - delete mEscCharSetProber; - mEscCharSetProber = nullptr; - } - - // start multibyte charset prober - if (!mMultibyteProber) { - mMultibyteProber = new nsMBCSGroupProber(); - } - } - } else { - // ok, just pure ascii so far - if ((ePureAscii == mInputState) && (aBuf[i] == '\033')) { - // found escape character - mInputState = eEscAscii; - } - mLastChar = aBuf[i]; - } - } - - nsProbingState st; - switch (mInputState) { - case eEscAscii: - if (nullptr == mEscCharSetProber) { - mEscCharSetProber = new nsEscCharSetProber(); - if (nullptr == mEscCharSetProber) return NS_ERROR_OUT_OF_MEMORY; - } - st = mEscCharSetProber->HandleData(aBuf, aLen); - if (st == eFoundIt) { - mDone = true; - mDetectedCharset = mEscCharSetProber->GetCharSetName(); - } - break; - case eHighbyte: - st = mMultibyteProber->HandleData(aBuf, aLen); - if (st == eFoundIt) { - mDone = true; - mDetectedCharset = mMultibyteProber->GetCharSetName(); - return NS_OK; - } - break; - - default: // pure ascii - ; // do nothing here - } - return NS_OK; -} - -//--------------------------------------------------------------------- -void nsUniversalDetector::DataEnd() { - if (!mGotData) { - // we haven't got any data yet, return immediately - // caller program sometimes call DataEnd before anything has been sent to - // detector - return; - } - - if (mDetectedCharset) { - mDone = true; - Report(mDetectedCharset); - return; - } - - switch (mInputState) { - case eHighbyte: { - // do not report anything because we are not confident of it, that's in - // fact a negative answer - if (mMultibyteProber->GetConfidence() > MINIMUM_THRESHOLD) - Report(mMultibyteProber->GetCharSetName()); - } break; - case eEscAscii: - break; - default:; - } -} diff --git a/extensions/universalchardet/src/base/nsUniversalDetector.h b/extensions/universalchardet/src/base/nsUniversalDetector.h deleted file mode 100644 index d16feff2912f..000000000000 --- a/extensions/universalchardet/src/base/nsUniversalDetector.h +++ /dev/null @@ -1,37 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsUniversalDetector_h__ -#define nsUniversalDetector_h__ - -class nsCharSetProber; - -typedef enum { ePureAscii = 0, eEscAscii = 1, eHighbyte = 2 } nsInputState; - -class nsUniversalDetector { - public: - nsUniversalDetector(); - virtual ~nsUniversalDetector(); - virtual nsresult HandleData(const char* aBuf, uint32_t aLen); - virtual void DataEnd(void); - - protected: - virtual void Report(const char* aCharset) = 0; - virtual void Reset(); - nsInputState mInputState; - bool mDone; - bool mInTag; - bool mStart; - bool mGotData; - char mLastChar; - const char* mDetectedCharset; - int32_t mBestGuess; - uint32_t mLanguageFilter; - - nsCharSetProber* mMultibyteProber; - nsCharSetProber* mEscCharSetProber; -}; - -#endif diff --git a/extensions/universalchardet/src/moz.build b/extensions/universalchardet/src/moz.build deleted file mode 100644 index db5b3ff62577..000000000000 --- a/extensions/universalchardet/src/moz.build +++ /dev/null @@ -1,8 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# vim: set filetype=python: -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -DIRS += ['base', 'xpcom'] - diff --git a/extensions/universalchardet/src/xpcom/moz.build b/extensions/universalchardet/src/xpcom/moz.build deleted file mode 100644 index 6039ea4d4b71..000000000000 --- a/extensions/universalchardet/src/xpcom/moz.build +++ /dev/null @@ -1,15 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# vim: set filetype=python: -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -UNIFIED_SOURCES += [ - 'nsUdetXPCOMWrapper.cpp', -] - -FINAL_LIBRARY = 'xul' - -LOCAL_INCLUDES += [ - '../base', -] diff --git a/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.cpp b/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.cpp deleted file mode 100644 index f4f79aeb3bef..000000000000 --- a/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nscore.h" - -#include "nsUniversalDetector.h" -#include "nsUdetXPCOMWrapper.h" -#include "nsCharSetProber.h" // for DumpStatus - -#include "nsUniversalCharDetDll.h" -//---- for XPCOM -#include "nsIFactory.h" -#include "nsISupports.h" -#include "nsCOMPtr.h" - -//--------------------------------------------------------------------- -nsXPCOMDetector::nsXPCOMDetector() : nsUniversalDetector() {} -//--------------------------------------------------------------------- -nsXPCOMDetector::~nsXPCOMDetector() {} -//--------------------------------------------------------------------- - -NS_IMPL_ISUPPORTS(nsXPCOMDetector, nsICharsetDetector) - -//--------------------------------------------------------------------- -NS_IMETHODIMP nsXPCOMDetector::Init(nsICharsetDetectionObserver* aObserver) { - NS_ASSERTION(mObserver == nullptr, "Init twice"); - if (nullptr == aObserver) return NS_ERROR_ILLEGAL_VALUE; - - mObserver = aObserver; - return NS_OK; -} -//---------------------------------------------------------- -NS_IMETHODIMP nsXPCOMDetector::DoIt(const char* aBuf, uint32_t aLen, - bool* oDontFeedMe) { - NS_ASSERTION(mObserver != nullptr, "have not init yet"); - - if ((nullptr == aBuf) || (nullptr == oDontFeedMe)) - return NS_ERROR_ILLEGAL_VALUE; - - this->Reset(); - nsresult rv = this->HandleData(aBuf, aLen); - if (NS_FAILED(rv)) return rv; - - if (mDone) { - if (mDetectedCharset) Report(mDetectedCharset); - - *oDontFeedMe = true; - } - *oDontFeedMe = false; - return NS_OK; -} -//---------------------------------------------------------- -NS_IMETHODIMP nsXPCOMDetector::Done() { - NS_ASSERTION(mObserver != nullptr, "have not init yet"); -#ifdef DEBUG_chardet - for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++) { - // If no data was received the array might stay filled with nulls - // the way it was initialized in the constructor. - if (mCharSetProbers[i]) mCharSetProbers[i]->DumpStatus(); - } -#endif - - this->DataEnd(); - return NS_OK; -} -//---------------------------------------------------------- -void nsXPCOMDetector::Report(const char* aCharset) { - NS_ASSERTION(mObserver != nullptr, "have not init yet"); -#ifdef DEBUG_chardet - printf("Universal Charset Detector report charset %s . \r\n", aCharset); -#endif - mObserver->Notify(aCharset, eBestAnswer); -} diff --git a/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.h b/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.h deleted file mode 100644 index 0a41e3b6e262..000000000000 --- a/extensions/universalchardet/src/xpcom/nsUdetXPCOMWrapper.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef _nsUdetXPCOMWrapper_h__ -#define _nsUdetXPCOMWrapper_h__ -#include "nsISupports.h" -#include "nsICharsetDetector.h" -#include "nsIStringCharsetDetector.h" -#include "nsICharsetDetectionObserver.h" -#include "nsCOMPtr.h" -#include "nsIFactory.h" -#include "nsUniversalDetector.h" - -//===================================================================== -class nsXPCOMDetector : public nsUniversalDetector, public nsICharsetDetector { - NS_DECL_ISUPPORTS - public: - nsXPCOMDetector(); - NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver) override; - NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool* oDontFeedMe) override; - NS_IMETHOD Done() override; - - protected: - virtual ~nsXPCOMDetector(); - virtual void Report(const char* aCharset) override; - - private: - nsCOMPtr mObserver; -}; - -//===================================================================== - -class nsJAPSMDetector final : public nsXPCOMDetector { - public: - nsJAPSMDetector() : nsXPCOMDetector() {} -}; - -#endif //_nsUdetXPCOMWrapper_h__ diff --git a/extensions/universalchardet/src/xpcom/nsUniversalCharDetDll.h b/extensions/universalchardet/src/xpcom/nsUniversalCharDetDll.h deleted file mode 100644 index 404cb4518eba..000000000000 --- a/extensions/universalchardet/src/xpcom/nsUniversalCharDetDll.h +++ /dev/null @@ -1,11 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsCharDetDll_h__ -#define nsCharDetDll_h__ - -#include "prtypes.h" - -#endif /* nsCharDetDll_h__ */ diff --git a/intl/Encoding.h b/intl/Encoding.h index 92147a9855b9..a7bfd9151330 100644 --- a/intl/Encoding.h +++ b/intl/Encoding.h @@ -252,6 +252,14 @@ class Encoding final { return encoding_is_ascii_compatible(this); } + /** + * Checks whether this is a Japanese legacy encoding. + */ + inline bool IsJapaneseLegacy() const { + return this == SHIFT_JIS_ENCODING || this == EUC_JP_ENCODING || + this == ISO_2022_JP_ENCODING; + } + /** * Returns the _output encoding_ of this encoding. This is UTF-8 for * UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise. diff --git a/intl/JapaneseDetector.h b/intl/JapaneseDetector.h new file mode 100644 index 000000000000..86efcdcb2a22 --- /dev/null +++ b/intl/JapaneseDetector.h @@ -0,0 +1,124 @@ +// Copyright 2018 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Mostly copied and pasted from +// third_party/rust/shift_or_euc/src/lib.rs , so +// "top-level directory of this distribution" above refers to +// third_party/rust/shift_or_euc/ + +#ifndef mozilla_JapaneseDetector_h +#define mozilla_JapaneseDetector_h + +#include "mozilla/Encoding.h" + +namespace mozilla { +class JapaneseDetector; +}; // namespace mozilla + +#define SHIFT_OR_EUC_DETECTOR mozilla::JapaneseDetector + +#include "shift_or_euc.h" + +namespace mozilla { + +/** + * A Japanese legacy encoding detector for detecting between Shift_JIS, + * EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the + * encoding is one of those. + * + * # Principle of Operation + * + * The detector is based on two observations: + * + * 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or + * EUC-JP, so encountering such an escape sequence (before non-ASCII has been + * encountered) can be taken as indication of ISO-2022-JP. + * 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is + * decoded as EUC-JP, or vice versa, the result is either an error or + * half-width katakana, and it's very uncommon for Japanese HTML to have + * half-width katakana character before a normal kana or common kanji + * character. Therefore, if decoding as Shift_JIS results in error or + * have-width katakana, the detector decides that the content is EUC-JP, and + * vice versa. + * + * # Failure Modes + * + * The detector gives the wrong answer if the text has a half-width katakana + * character before normal kana or common kanji. Some uncommon kanji are + * undecidable. (All JIS X 0208 Level 1 kanji are decidable.) + * + * The half-width katakana issue is mainly relevant for old 8-bit JIS X + * 0201-only text files that would decode correctly as Shift_JIS but that the + * detector detects as EUC-JP. + * + * The undecidable kanji issue does not realistically show up when a full + * document is fed to the detector, because, realistically, in a full + * document, there is at least one kana or common kanji. It can occur, + * though, if the detector is only run on a prefix of a document and the + * prefix only contains the title of the document. It is possible for + * document title to consist entirely of undecidable kanji. (Indeed, + * Japanese Wikipedia has articles with such titles.) If the detector is + * undecided, a fallback to Shift_JIS should be used. + */ +class JapaneseDetector final { + public: + ~JapaneseDetector() {} + + static void operator delete(void* aDetector) { + shift_or_euc_detector_free(reinterpret_cast(aDetector)); + } + + /** + * Instantiates the detector. If `aAllow2022` is `true` the possible + * guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If + * `aAllow2022` is `false`, the possible guesses are Shift_JIS, EUC-JP, + * and undecided. + */ + static inline UniquePtr Create(bool aAllow2022) { + UniquePtr detector(shift_or_euc_detector_new(aAllow2022)); + return detector; + } + + /** + * Feeds bytes to the detector. If `aLast` is `true` the end of the stream + * is considered to occur immediately after the end of `aBuffer`. + * Otherwise, the stream is expected to continue. `aBuffer` may be empty. + * + * If you're running the detector only on a prefix of a complete + * document, _do not_ pass `aLast` as `true` after the prefix if the + * stream as a whole still contains more content. + * + * Returns `SHIFT_JIS_ENCODING` if the detector guessed + * Shift_JIS. Returns `EUC_JP_ENCODING` if the detector + * guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the + * detector guessed ISO-2022-JP (only possible if `true` was passed as + * `aAllow2022` when instantiating the detector). Returns `nullptr` if the + * detector is undecided. If `nullptr` is returned even when passing `true` + * as `aLast`, falling back to Shift_JIS is the best guess for Web + * purposes. + * + * Do not call again after the method has returned non-`nullptr` or after + * the method has been called with `true` as `aLast`. (Asserts if the + * previous sentence isn't adhered to.) + */ + inline const mozilla::Encoding* Feed(Span aBuffer, + bool aLast) { + return shift_or_euc_detector_feed(this, aBuffer.Elements(), + aBuffer.Length(), aLast); + } + + private: + JapaneseDetector() = delete; + JapaneseDetector(const JapaneseDetector&) = delete; + JapaneseDetector& operator=(const JapaneseDetector&) = delete; +}; + +}; // namespace mozilla + +#endif // mozilla_JapaneseDetector_h \ No newline at end of file diff --git a/intl/moz.build b/intl/moz.build index 5370a6def9ec..05d94899a111 100644 --- a/intl/moz.build +++ b/intl/moz.build @@ -26,11 +26,13 @@ DIRS += [ EXPORTS.mozilla += [ 'Encoding.h', + 'JapaneseDetector.h', ] EXPORTS += [ '../third_party/rust/encoding_c/include/encoding_rs.h', '../third_party/rust/encoding_c/include/encoding_rs_statics.h', + '../third_party/rust/shift_or_euc_c/include/shift_or_euc.h', ] with Files("**"): diff --git a/modules/libpref/init/StaticPrefList.h b/modules/libpref/init/StaticPrefList.h index 6528a44b657a..384e2a362310 100644 --- a/modules/libpref/init/StaticPrefList.h +++ b/modules/libpref/init/StaticPrefList.h @@ -3373,6 +3373,15 @@ VARCACHE_PREF( RelaxedAtomicBool, false ) +// Whether ISO-2022-JP is a permitted content-based encoding detection +// outcome. +VARCACHE_PREF( + Live, + "intl.charset.detector.iso2022jp.allowed", + intl_charset_detector_iso2022jp_allowed, + bool, true +) + //--------------------------------------------------------------------------- // Prefs starting with "layers." //--------------------------------------------------------------------------- diff --git a/parser/html/moz.build b/parser/html/moz.build index 9b33dd44b651..335dd4e14ac0 100644 --- a/parser/html/moz.build +++ b/parser/html/moz.build @@ -95,8 +95,6 @@ FINAL_LIBRARY = 'xul' LOCAL_INCLUDES += [ '/dom/base', - '/extensions/universalchardet/src/base', - '/extensions/universalchardet/src/xpcom', '/intl/chardet', ] diff --git a/parser/html/nsHtml5StreamParser.cpp b/parser/html/nsHtml5StreamParser.cpp index 19a6134032d6..486d76b4a8ba 100644 --- a/parser/html/nsHtml5StreamParser.cpp +++ b/parser/html/nsHtml5StreamParser.cpp @@ -31,7 +31,6 @@ #include "nsIThreadRetargetableRequest.h" #include "nsPrintfCString.h" #include "nsNetUtil.h" -#include "nsUdetXPCOMWrapper.h" #include "nsXULAppAPI.h" #include "mozilla/SchedulerGroup.h" #include "nsJSEnvironment.h" @@ -156,7 +155,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, mBomState(eBomState::BOM_SNIFFING_NOT_STARTED), mCharsetSource(kCharsetUninitialized), mEncoding(WINDOWS_1252_ENCODING), - mFeedChardetIfEncoding(nullptr), + mFeedChardet(true), mReparseForbidden(false), mLastBuffer(nullptr), // Will be filled when starting mExecutor(aExecutor), @@ -181,6 +180,8 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()), mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)), mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)), + mJapaneseDetector(mozilla::JapaneseDetector::Create( + StaticPrefs::intl_charset_detector_iso2022jp_allowed())), mInitialEncodingWasFromParentFrame(false), mHasHadErrors(false), mDecodingLocalFileAsUTF8(false), @@ -210,16 +211,11 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, nsAutoCString detectorName; Preferences::GetLocalizedCString("intl.charset.detector", detectorName); if (!detectorName.IsEmpty()) { - // We recognize one of the three magic strings for the following languages. + // We recognize one of the two magic strings for Russian and Ukranian. if (detectorName.EqualsLiteral("ruprob")) { mChardet = new nsRUProbDetector(); - mFeedChardetIfEncoding = WINDOWS_1251_ENCODING; } else if (detectorName.EqualsLiteral("ukprob")) { mChardet = new nsUKProbDetector(); - mFeedChardetIfEncoding = WINDOWS_1251_ENCODING; - } else if (detectorName.EqualsLiteral("ja_parallel_state_machine")) { - mChardet = new nsJAPSMDetector(); - mFeedChardetIfEncoding = SHIFT_JIS_ENCODING; } if (mChardet) { (void)mChardet->Init(this); @@ -263,7 +259,7 @@ NS_IMETHODIMP nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) { NS_ASSERTION(IsParserThread(), "Wrong thread!"); if (aConf == eBestAnswer || aConf == eSureAnswer) { - mFeedChardetIfEncoding = nullptr; // just in case + mFeedChardet = false; // just in case auto encoding = Encoding::ForLabelNoReplacement(nsDependentCString(aCharset)); if (!encoding) { @@ -271,8 +267,8 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) { } if (HasDecoder()) { if (mEncoding == encoding) { - NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection, - "Why are we running chardet at all?"); + MOZ_ASSERT(mCharsetSource < kCharsetFromAutoDetection, + "Why are we running chardet at all?"); mCharsetSource = kCharsetFromAutoDetection; mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } else { @@ -294,6 +290,62 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) { return NS_OK; } +void nsHtml5StreamParser::FeedJapaneseDetector(Span aBuffer, + bool aLast) { + const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast); + if (!detected) { + return; + } + mFeedChardet = false; + if (mDecodingLocalFileAsUTF8 && detected != ISO_2022_JP_ENCODING) { + return; + } + int32_t source = kCharsetFromAutoDetection; + if (mCharsetSource == kCharsetFromParentForced || + mCharsetSource == kCharsetFromUserForced) { + source = kCharsetFromUserForcedAutoDetection; + } + if (detected == mEncoding) { + MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?"); + mCharsetSource = source; + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); + } else if (HasDecoder()) { + // We've already committed to a decoder. Request a reload from the + // docshell. + mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0); + FlushTreeOpsAndDisarmTimer(); + Interrupt(); + } else { + // Got a confident answer from the sniffing buffer. That code will + // take care of setting up the decoder. + mEncoding = WrapNotNull(detected); + mCharsetSource = source; + mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); + } +} + +void nsHtml5StreamParser::FeedDetector(Span aBuffer, + bool aLast) { + if (mEncoding->IsJapaneseLegacy()) { + FeedJapaneseDetector(aBuffer, aLast); + } else if (mEncoding == WINDOWS_1251_ENCODING && mChardet && + !mDecodingLocalFileAsUTF8) { + if (!aBuffer.IsEmpty()) { + bool dontFeed = false; + mozilla::Unused << mChardet->DoIt((const char*)aBuffer.Elements(), + aBuffer.Length(), &dontFeed); + if (dontFeed) { + mFeedChardet = false; + } + } + if (aLast) { + mozilla::Unused << mChardet->Done(); + } + } else { + mFeedChardet = false; + } +} + void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) { if (recordreplay::IsRecordingOrReplaying()) { nsAutoCString spec; @@ -335,6 +387,11 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment( MOZ_ASSERT(mEncoding != UTF_8_ENCODING); mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval(); } else { + if (mCharsetSource >= kCharsetFromAutoDetection && + !(mCharsetSource == kCharsetFromUserForced || + mCharsetSource == kCharsetFromParentForced)) { + mFeedChardet = false; + } mDecodingLocalFileAsUTF8 = false; mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); } @@ -354,7 +411,7 @@ nsresult nsHtml5StreamParser::SetupDecodingFromBom( mDecodingLocalFileAsUTF8 = false; mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling(); mCharsetSource = kCharsetFromByteOrderMark; - mFeedChardetIfEncoding = nullptr; + mFeedChardet = false; mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); mSniffingBuffer = nullptr; mMetaScanner = nullptr; @@ -412,7 +469,7 @@ void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin( } mCharsetSource = kCharsetFromIrreversibleAutoDetection; mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); - mFeedChardetIfEncoding = nullptr; + mFeedChardet = false; mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0); } @@ -479,12 +536,38 @@ static void HandleProcessingInstruction(void* aUserData, XML_StopParser(ud->mExpat, false); } +void nsHtml5StreamParser::FinalizeSniffingWithDetector( + Span aFromSegment, uint32_t aCountToSniffingLimit, + bool aEof) { + if (mSniffingBuffer) { + FeedDetector(MakeSpan(mSniffingBuffer.get(), mSniffingLength), false); + } + if (mFeedChardet && !aFromSegment.IsEmpty()) { + // Avoid buffer boundary-dependent behavior when + // reparsing is forbidden. If reparse is forbidden, + // act as if we only saw the first 1024 bytes. + // When reparsing isn't forbidden, buffer boundaries + // can have an effect on whether the page is loaded + // once or twice. :-( + FeedDetector(mReparseForbidden ? aFromSegment.To(aCountToSniffingLimit) + : aFromSegment, + false); + } + if (mFeedChardet && aEof && + (!mReparseForbidden || aCountToSniffingLimit == aFromSegment.Length())) { + // Don't signal EOF if reparse is forbidden and we didn't pass all input + // to the detector above. + mFeedChardet = false; + FeedDetector(Span(), true); + } +} + nsresult nsHtml5StreamParser::FinalizeSniffing(Span aFromSegment, uint32_t aCountToSniffingLimit, bool aEof) { - NS_ASSERTION(IsParserThread(), "Wrong thread!"); - NS_ASSERTION(mCharsetSource < kCharsetFromParentForced, - "Should not finalize sniffing when using forced charset."); + MOZ_ASSERT(IsParserThread(), "Wrong thread!"); + MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedAutoDetection, + "Should not finalize sniffing with strong decision already made."); if (mMode == VIEW_SOURCE_XML) { static const XML_Memory_Handling_Suite memsuite = { (void* (*)(size_t))moz_xmalloc, (void* (*)(void*, size_t))moz_xrealloc, @@ -547,50 +630,15 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span aFromSegment, } // meta scan failed. - if (mCharsetSource >= kCharsetFromHintPrevDoc) { - mFeedChardetIfEncoding = nullptr; - return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment); + if (mCharsetSource < kCharsetFromMetaPrescan) { + // Check for BOMless UTF-16 with Basic + // Latin content for compat with IE. See bug 631751. + SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit)); } - // Check for BOMless UTF-16 with Basic - // Latin content for compat with IE. See bug 631751. - SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit)); // the charset may have been set now // maybe try chardet now; - if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) { - bool dontFeed; - nsresult rv; - if (mSniffingBuffer) { - rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, - &dontFeed); - if (dontFeed) { - mFeedChardetIfEncoding = nullptr; - } - NS_ENSURE_SUCCESS(rv, rv); - } - if ((mFeedChardetIfEncoding == mEncoding) && !aFromSegment.IsEmpty()) { - rv = mChardet->DoIt( - (const char*)aFromSegment.Elements(), - // Avoid buffer boundary-dependent behavior when - // reparsing is forbidden. If reparse is forbidden, - // act as if we only saw the first 1024 bytes. - // When reparsing isn't forbidden, buffer boundaries - // can have an effect on whether the page is loaded - // once or twice. :-( - mReparseForbidden ? aCountToSniffingLimit : aFromSegment.Length(), - &dontFeed); - if (dontFeed) { - mFeedChardetIfEncoding = nullptr; - } - NS_ENSURE_SUCCESS(rv, rv); - } - if ((mFeedChardetIfEncoding == mEncoding) && (aEof || mReparseForbidden)) { - // mReparseForbidden is checked so that we get to use the sniffing - // buffer with the best guess so far if we aren't allowed to guess - // better later. - mFeedChardetIfEncoding = nullptr; - rv = mChardet->Done(); - NS_ENSURE_SUCCESS(rv, rv); - } + if (mFeedChardet) { + FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, aEof); // fall thru; callback may have changed charset } if (mCharsetSource == kCharsetUninitialized) { @@ -600,7 +648,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span aFromSegment, mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); } else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) { NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR"); - NS_ASSERTION(!mFeedChardetIfEncoding, "Should not feed chardet for XHR"); + NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR"); NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8"); // Now mark charset source as non-weak to signal that we have a decision mCharsetSource = kCharsetFromDocTypeDefault; @@ -687,7 +735,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes( // earlier call to SetDocumentCharset(), since we didn't find a BOM and // overwrite mEncoding. (Note that if the user has overridden the charset, // we don't come here but check for XSS-dangerous charsets first.) - mFeedChardetIfEncoding = nullptr; mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment); } @@ -719,12 +766,16 @@ nsresult nsHtml5StreamParser::SniffStreamBytes( (encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) { // Honor override + if (mEncoding->IsJapaneseLegacy()) { + mFeedChardet = true; + FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, + false); + } return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( aFromSegment); } mEncoding = WrapNotNull(encoding); mCharsetSource = kCharsetFromMetaPrescan; - mFeedChardetIfEncoding = nullptr; mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( aFromSegment); @@ -733,6 +784,10 @@ nsresult nsHtml5StreamParser::SniffStreamBytes( if (mCharsetSource == kCharsetFromParentForced || mCharsetSource == kCharsetFromUserForced) { // meta not found, honor override + if (mEncoding->IsJapaneseLegacy()) { + mFeedChardet = true; + FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, false); + } return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment); } return FinalizeSniffing(aFromSegment, countToSniffingLimit, false); @@ -761,7 +816,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes( } mEncoding = WrapNotNull(encoding); mCharsetSource = kCharsetFromMetaPrescan; - mFeedChardetIfEncoding = nullptr; mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment); } @@ -841,6 +895,12 @@ void nsHtml5StreamParser::ReDecodeLocalFile() { mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); mHasHadErrors = false; + // We need the detector to start with fresh state. + // Turn off ISO-2022-JP detection, because if this doc was + // ISO-2022-JP, it would have already been detected. + mJapaneseDetector = mozilla::JapaneseDetector::Create(false); + mFeedChardet = true; + // Throw away previous decoded data mLastBuffer = mFirstBuffer; mLastBuffer->next = nullptr; @@ -856,7 +916,7 @@ void nsHtml5StreamParser::ReDecodeLocalFile() { void nsHtml5StreamParser::CommitLocalFileToUTF8() { MOZ_ASSERT(mDecodingLocalFileAsUTF8); mDecodingLocalFileAsUTF8 = false; - mFeedChardetIfEncoding = nullptr; + mFeedChardet = false; mEncoding = UTF_8_ENCODING; mCharsetSource = kCharsetFromFileURLGuess; mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); @@ -1000,7 +1060,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) { // This is the old Gecko behavior but the HTML5 spec disagrees. // Don't reparse on POST. mReparseForbidden = true; - mFeedChardetIfEncoding = nullptr; // can't restart anyway + mFeedChardet = false; // can't restart anyway } } @@ -1031,8 +1091,10 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) { mInitialEncodingWasFromParentFrame = true; } - if (mCharsetSource >= kCharsetFromAutoDetection) { - mFeedChardetIfEncoding = nullptr; + if (mCharsetSource >= kCharsetFromAutoDetection && + !(mCharsetSource == kCharsetFromParentForced || + mCharsetSource == kCharsetFromUserForced)) { + mFeedChardet = false; } if (mCharsetSource < kCharsetFromUtf8OnlyMime) { @@ -1041,11 +1103,11 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) { return NS_OK; } - // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into - // a browsing context. In the latter case, there's no need to remove the - // BOM manually here, because the UTF-8 decoder removes it. + // We are loading JSON/WebVTT/etc. into a browsing context. + // There's no need to remove the BOM manually here, because + // the UTF-8 decoder removes it. mReparseForbidden = true; - mFeedChardetIfEncoding = nullptr; + mFeedChardet = false; // Instantiate the converter here to avoid BOM sniffing. mDecodingLocalFileAsUTF8 = false; @@ -1085,8 +1147,9 @@ void nsHtml5StreamParser::DoStopRequest() { return; } } - if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) { - mChardet->Done(); + if (mFeedChardet) { + mFeedChardet = false; + FeedDetector(Span(), true); } MOZ_ASSERT(mUnicodeDecoder, @@ -1246,13 +1309,8 @@ void nsHtml5StreamParser::DoDataAvailable(Span aBuffer) { nsresult rv; if (HasDecoder()) { - if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) { - bool dontFeed; - mChardet->DoIt((const char*)aBuffer.Elements(), aBuffer.Length(), - &dontFeed); - if (dontFeed) { - mFeedChardetIfEncoding = nullptr; - } + if (mFeedChardet) { + FeedDetector(aBuffer, false); } rv = WriteStreamBytes(aBuffer); } else { @@ -1411,7 +1469,7 @@ const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl( } } mCharsetSource = kCharsetFromMetaTag; // become confident - mFeedChardetIfEncoding = nullptr; // don't feed chardet when confident + mFeedChardet = false; // don't feed chardet when confident return nullptr; } @@ -1450,7 +1508,7 @@ bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) { // Avoid having the chardet ask for another restart after this restart // request. - mFeedChardetIfEncoding = nullptr; + mFeedChardet = false; mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag, mTokenizer->getLineNumber()); FlushTreeOpsAndDisarmTimer(); diff --git a/parser/html/nsHtml5StreamParser.h b/parser/html/nsHtml5StreamParser.h index e9dd80be431b..66f99a986894 100644 --- a/parser/html/nsHtml5StreamParser.h +++ b/parser/html/nsHtml5StreamParser.h @@ -11,6 +11,7 @@ #include "nsICharsetDetectionObserver.h" #include "nsHtml5MetaScanner.h" #include "mozilla/Encoding.h" +#include "mozilla/JapaneseDetector.h" #include "nsHtml5TreeOpExecutor.h" #include "nsHtml5OwningUTF16Buffer.h" #include "nsIInputStream.h" @@ -148,6 +149,16 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver { // Not from an external interface + /** + * Pass a buffer to the JapaneseDetector. + */ + void FeedJapaneseDetector(mozilla::Span aBuffer, bool aLast); + + /** + * Pass a buffer to the Japanese or Cyrillic detector as appropriate. + */ + void FeedDetector(mozilla::Span aBuffer, bool aLast); + /** * Call this method once you've created a parser, and want to instruct it * about what charset to load @@ -282,6 +293,12 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver { */ void SniffBOMlessUTF16BasicLatin(mozilla::Span aFromSegment); + /** + * Write the start of the stream to detector. + */ + void FinalizeSniffingWithDetector(mozilla::Span aFromSegment, + uint32_t aCountToSniffingLimit, bool aEof); + /** * scan failed. Try chardet if applicable. After this, the * the parser will have some encoding even if a last resolt fallback. @@ -411,9 +428,9 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver { NotNull mEncoding; /** - * The character encoding that is the base expectation for detection. + * Whether the Cyrillic or Japanese detector should still be fed. */ - const Encoding* mFeedChardetIfEncoding; + bool mFeedChardet; /** * Whether reparse is forbidden @@ -529,10 +546,15 @@ class nsHtml5StreamParser final : public nsICharsetDetectionObserver { nsCOMPtr mLoadFlusher; /** - * The chardet instance if chardet is enabled. + * The Cyrillic detector if enabled. */ nsCOMPtr mChardet; + /** + * The Japanese detector. + */ + mozilla::UniquePtr mJapaneseDetector; + /** * Whether the initial charset source was kCharsetFromParentFrame */ diff --git a/parser/nsCharsetSource.h b/parser/nsCharsetSource.h index c6b5ff4b60a9..2beece8fb9ce 100644 --- a/parser/nsCharsetSource.h +++ b/parser/nsCharsetSource.h @@ -23,6 +23,7 @@ enum { kCharsetFromOtherComponent, kCharsetFromParentForced, // propagates to child frames kCharsetFromUserForced, // propagates to child frames + kCharsetFromUserForcedAutoDetection kCharsetFromByteOrderMark, kCharsetFromUtf8OnlyMime, // For JSON, WebVTT and such kCharsetFromBuiltIn, // resource: URLs diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json index 7c1901dce515..5497fd022202 100644 --- a/third_party/rust/encoding_rs/.cargo-checksum.json +++ b/third_party/rust/encoding_rs/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"fd56e8d662553f0cc559f8ef7097effefbc815ac3485799b37dee9df08ec803c","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"8ae2a3548dee23c19e20564a90e2fd0dfa600cf4c2dfcc538f3455f4462d7133","build.rs":"82747097b0bb8999cdaf689a9e46195f6df5d691ee90bcde8a7b79f16bd976f0","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"0646bd091892ff7a76f34efccda4e5ddabe1e624e890baa9fdc9d48011d2d38b","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e2917fb9f605662ec4705d8c0b3c179f2264697a761191c3ec8101748cf717dc","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"5498de31e816f51348b8d298d4fc9568da6b0b9363146f87ca5503131d33397f","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"857e61c1bda9d65286c23a6c3910d6814680bbc3064bf0ff92de5bc4f3edb6f3","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"da51def859b870ced29cb87987f02d27b220eac0f222876cb72a1dc616f9d8ec"},"package":"0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"} \ No newline at end of file +{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"48048e755eafab9e99e5951e9eae17721454b59123a65d7450e15a9194ab2919","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"6576f0d1746e40bbbd14c5c204106b4fa1c376e550feb01320a3feff14f23482","build.rs":"82747097b0bb8999cdaf689a9e46195f6df5d691ee90bcde8a7b79f16bd976f0","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"0646bd091892ff7a76f34efccda4e5ddabe1e624e890baa9fdc9d48011d2d38b","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"3f00c31f1620906c648cd79c5bf8d83e9ff1a776df3a159531ead05b998301e6","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"5498de31e816f51348b8d298d4fc9568da6b0b9363146f87ca5503131d33397f","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"857e61c1bda9d65286c23a6c3910d6814680bbc3064bf0ff92de5bc4f3edb6f3","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"da51def859b870ced29cb87987f02d27b220eac0f222876cb72a1dc616f9d8ec"},"package":"4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"} \ No newline at end of file diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml index e29f19fb9afe..fd5958f77b70 100644 --- a/third_party/rust/encoding_rs/Cargo.toml +++ b/third_party/rust/encoding_rs/Cargo.toml @@ -12,7 +12,7 @@ [package] name = "encoding_rs" -version = "0.8.16" +version = "0.8.17" authors = ["Henri Sivonen "] description = "A Gecko-oriented implementation of the Encoding Standard" homepage = "https://docs.rs/encoding_rs/" @@ -35,7 +35,7 @@ optional = true version = "1.0" optional = true [dev-dependencies.bincode] -version = "0.8" +version = "1.0" [dev-dependencies.serde_derive] version = "1.0" diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md index 8a72b515450e..095a921df615 100644 --- a/third_party/rust/encoding_rs/README.md +++ b/third_party/rust/encoding_rs/README.md @@ -404,6 +404,10 @@ To regenerate the generated code: ## Release Notes +### 0.8.17 + +* Update `bincode` (dev dependency) version requirement to 1.0. + ### 0.8.16 * Switch from the `simd` crate to `packed_simd`. diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs index 23069375d6f8..56fbd2ed769b 100644 --- a/third_party/rust/encoding_rs/src/lib.rs +++ b/third_party/rust/encoding_rs/src/lib.rs @@ -11,7 +11,7 @@ feature = "cargo-clippy", allow(doc_markdown, inline_always, new_ret_no_self) )] -#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")] +#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.17")] //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation //! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust. @@ -5635,7 +5635,7 @@ mod tests { let deserialized: Demo = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, demo); - let bincoded = bincode::serialize(&demo, bincode::Infinite).unwrap(); + let bincoded = bincode::serialize(&demo).unwrap(); let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap(); assert_eq!(debincoded, demo); } diff --git a/third_party/rust/shift_or_euc/.cargo-checksum.json b/third_party/rust/shift_or_euc/.cargo-checksum.json new file mode 100644 index 000000000000..cc5816349a54 --- /dev/null +++ b/third_party/rust/shift_or_euc/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"f9f41b76ecbe257a312ab09ed1208189b8dc9952d12d17a216fe2846d1d471c8","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"b7148745a7ef59788e76fbe638d4b41c54dcaa1313a809f4630a020645f892a8","examples/detect.rs":"eb7239ccc802290ef24331db600ca1226198801dd86df86876b4b738ef4b8470","src/lib.rs":"f2a83db125d553af5c6fabae0487ef211aad62f2d93c4418dc510cbd425d472a"},"package":"f930dea4685b9803954b9d74cdc175c6d946a22f2eafe5aa2e9a58cdcae7da8c"} \ No newline at end of file diff --git a/third_party/rust/shift_or_euc/CONTRIBUTING.md b/third_party/rust/shift_or_euc/CONTRIBUTING.md new file mode 100644 index 000000000000..1d41d4c60ecc --- /dev/null +++ b/third_party/rust/shift_or_euc/CONTRIBUTING.md @@ -0,0 +1,38 @@ +If you send a pull request / patch, please observe the following. + +## Licensing + +Since this crate is dual-licensed, +[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions) +is considered to apply in the sense of Contributions being automatically +under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file). +That is, by the act of offering a Contribution, you place your Contribution +under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT` +file. Please do not contribute if you aren't willing or allowed to license your +contributions in this manner. + +You are encouraged to dedicate test code that you contribute to the Public +Domain using the CC0 dedication. If you contribute test code that is not +dedicated to the Public Domain, please be sure not to put it in a part of +source code that the comments designate as being dedicated to the Public +Domain. + +## Copyright Notices + +If you require the addition of your copyright notice, it's up to you to edit in +your notice as part of your Contribution. Not adding a copyright notice is +taken as a waiver of copyright notice. + +## Compatibility with Stable Rust + +Please ensure that your Contribution compiles with the latest stable-channel +rustc. + +## rustfmt + +The `rustfmt` version used for this code is `rustfmt-nightly`. Please either +use that version or avoid using `rustfmt` (so as not to reformat all the code). + +## Unit tests + +Please ensure that `cargo test` succeeds. diff --git a/third_party/rust/shift_or_euc/COPYRIGHT b/third_party/rust/shift_or_euc/COPYRIGHT new file mode 100644 index 000000000000..1cacb3eb05ef --- /dev/null +++ b/third_party/rust/shift_or_euc/COPYRIGHT @@ -0,0 +1,9 @@ +shift_or_euc is copyright 2018 Mozilla Foundation. + +Licensed under the Apache License, Version 2.0 + or the MIT +license , +at your option. All files in the project carrying such +notice may not be copied, modified, or distributed except +according to those terms. diff --git a/third_party/rust/shift_or_euc/Cargo.toml b/third_party/rust/shift_or_euc/Cargo.toml new file mode 100644 index 000000000000..b28ae619a225 --- /dev/null +++ b/third_party/rust/shift_or_euc/Cargo.toml @@ -0,0 +1,30 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "shift_or_euc" +version = "0.1.0" +authors = ["Henri Sivonen "] +description = "Detects among the Japanese legacy encodings" +homepage = "https://docs.rs/shift_or_euc/" +documentation = "https://docs.rs/shift_or_euc/" +readme = "README.md" +keywords = ["encoding", "web", "charset"] +categories = ["text-processing", "encoding", "web-programming", "internationalization"] +license = "MIT/Apache-2.0" +repository = "https://github.com/hsivonen/shift_or_euc" +[dependencies.encoding_rs] +version = "0.8.17" + +[dependencies.memchr] +version = "2.2.0" diff --git a/third_party/rust/shift_or_euc/LICENSE-APACHE b/third_party/rust/shift_or_euc/LICENSE-APACHE new file mode 100644 index 000000000000..d64569567334 --- /dev/null +++ b/third_party/rust/shift_or_euc/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/third_party/rust/shift_or_euc/LICENSE-MIT b/third_party/rust/shift_or_euc/LICENSE-MIT new file mode 100644 index 000000000000..9ac617754c14 --- /dev/null +++ b/third_party/rust/shift_or_euc/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Mozilla Foundation + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/shift_or_euc/README.md b/third_party/rust/shift_or_euc/README.md new file mode 100644 index 000000000000..e0c77d6fcc52 --- /dev/null +++ b/third_party/rust/shift_or_euc/README.md @@ -0,0 +1,73 @@ +# shift_or_euc + +[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT) + +A Japanese legacy encoding detector for detecting between Shift_JIS, EUC-JP, +and, optionally, ISO-2022-JP _given_ the assumption that the encoding is one +of those. + +This detector is generally more accurate (but see below about the failure +mode on half-width katakana) and decides much sooner than machine +learning-based detectors. To decide EUC-JP, machine learning-based detectors +try to gain confidence that the input looks like EUC-JP. To decide EUC-JP, +this detector instead looks for two simple rule-based signs of the input not +being Shift_JIS. + +As a consequence of not containing machine learning tables, the binary size +footprint that this crate adds on top of +[`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny. + +## Documentation + +[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc) + +## Licensing + +See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT). + +## Sample Program Usage + +1. [Install Rust](https://rustup.rs/) +2. `git clone https://github.com/hsivonen/shift_or_euc` +3. `cd shift_or_euc` +4. `cargo run --example detect PATH_TO_FILE` + +The program prints one of: + +* Shift_JIS +* EUC-JP +* ISO-2022-JP +* Undecided + +## Principle of Operation + +The detector is based on two observations: + +1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or +EUC-JP, so encountering such an escape sequence (before non-ASCII has been +encountered) can be taken as indication of ISO-2022-JP. +2. When normal (full-with) kana or common kanji encoded as Shift_JIS is +decoded as EUC-JP, or vice versa, the result is either an error or half-width +katakana, and it's very uncommon for Japanese HTML to have half-width katakana +character before a normal kana or common kanji character. Therefore, if +decoding as Shift_JIS results in error or have-width katakana, the detector +decides that the content is EUC-JP, and vice versa. + +## Failure Modes + +The detector gives the wrong answer if the text has a half-width katakana +character before normal kana or common kanji. Some uncommon kanji are +undecidable. (All JIS X 0208 Level 1 kanji are decidable.) + +The half-width katakana issue is mainly relevant for old 8-bit JIS X 0201-only +text files that would decode correctly as Shift_JIS but that the detector +detects as EUC-JP. + +The undecidable kanji issue does not realistically show up when a full +document is fed to the detector, because, realistically, in a full document, +there is at least one kana or common kanji. It can occur, though, if the +detector is only run on a prefix of a document and the prefix only contains +the title of the document. It is possible for document title to consist +entirely of undecidable kanji. (Indeed, Japanese Wikipedia has articles with +such titles.) If the detector is undecided, falling back to Shift_JIS is +typically the Web oriented better guess. \ No newline at end of file diff --git a/third_party/rust/shift_or_euc/examples/detect.rs b/third_party/rust/shift_or_euc/examples/detect.rs new file mode 100644 index 000000000000..9ab21a3561b4 --- /dev/null +++ b/third_party/rust/shift_or_euc/examples/detect.rs @@ -0,0 +1,56 @@ +// Copyright 2018 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::fs::File; +use std::io::Read; + +use shift_or_euc::Detector; + +fn main() { + let mut args = std::env::args_os(); + if args.next().is_none() { + eprintln!("Error: Program name missing from arguments."); + std::process::exit(-1); + } + if let Some(path) = args.next() { + if args.next().is_some() { + eprintln!("Error: Too many arguments."); + std::process::exit(-3); + } + if let Ok(mut file) = File::open(path) { + let mut buffer = [0u8; 4096]; + let mut detector = Detector::new(true); + loop { + if let Ok(num_read) = file.read(&mut buffer[..]) { + let opt_enc = if num_read == 0 { + detector.feed(b"", true) + } else { + detector.feed(&buffer[..num_read], false) + }; + if let Some(encoding) = opt_enc { + println!("{}", encoding.name()); + return; + } else if num_read == 0 { + println!("Undecided"); + return; + } + } else { + eprintln!("Error: Error reading file."); + std::process::exit(-5); + } + } + } else { + eprintln!("Error: Could not open file."); + std::process::exit(-4); + } + } else { + eprintln!("Error: One path argument needed."); + std::process::exit(-2); + } +} diff --git a/third_party/rust/shift_or_euc/src/lib.rs b/third_party/rust/shift_or_euc/src/lib.rs new file mode 100644 index 000000000000..978fc7f27e0f --- /dev/null +++ b/third_party/rust/shift_or_euc/src/lib.rs @@ -0,0 +1,278 @@ +// Copyright 2018 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![doc(html_root_url = "https://docs.rs/shift_or_euc/0.1.0")] + +//! A Japanese legacy encoding detector for detecting between Shift_JIS, +//! EUC-JP, and, optionally, ISO-2022-JP _given_ the assumption that the +//! encoding is one of those. +//! +//! This detector is generally more accurate (but see below about the failure +//! mode on half-width katakana) and decides much sooner than machine +//! learning-based detectors. To decide EUC-JP, machine learning-based +//! detectors try to gain confidence that the input looks like EUC-JP. To +//! decide EUC-JP, this detector instead looks for two simple rule-based +//! signs of the input not being Shift_JIS. +//! +//! As a consequence of not containing machine learning tables, the binary +//! size footprint that this crate adds on top of +//! [`encoding_rs`](https://docs.rs/crate/encoding_rs) is tiny. +//! +//! # Licensing +//! +//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT). +//! +//! # Principle of Operation +//! +//! The detector is based on two observations: +//! +//! 1. The ISO-2022-JP escape sequences don't normally occur in Shift_JIS or +//! EUC-JP, so encountering such an escape sequence (before non-ASCII has been +//! encountered) can be taken as indication of ISO-2022-JP. +//! 2. When normal (full-with) kana or common kanji encoded as Shift_JIS is +//! decoded as EUC-JP, or vice versa, the result is either an error or +//! half-width katakana, and it's very uncommon for Japanese HTML to have +//! half-width katakana character before a normal kana or common kanji +//! character. Therefore, if decoding as Shift_JIS results in error or +//! have-width katakana, the detector decides that the content is EUC-JP, and +//! vice versa. +//! +//! # Failure Modes +//! +//! The detector gives the wrong answer if the text has a half-width katakana +//! character before normal kana or common kanji. Some uncommon kanji are +//! undecidable. (All JIS X 0208 Level 1 kanji are decidable.) +//! +//! The half-width katakana issue is mainly relevant for old 8-bit JIS X +//! 0201-only text files that would decode correctly as Shift_JIS but that the +//! detector detects as EUC-JP. +//! +//! The undecidable kanji issue does not realistically show up when a full +//! document is fed to the detector, because, realistically, in a full +//! document, there is at least one kana or common kanji. It can occur, +//! though, if the detector is only run on a prefix of a document and the +//! prefix only contains the title of the document. It is possible for +//! document title to consist entirely of undecidable kanji. (Indeed, +//! Japanese Wikipedia has articles with such titles.) If the detector is +//! undecided, falling back to Shift_JIS is typically the Web oriented better +//! guess. + +use encoding_rs::Decoder; +use encoding_rs::DecoderResult; +use encoding_rs::Encoding; +use encoding_rs::EUC_JP; +use encoding_rs::ISO_2022_JP; +use encoding_rs::SHIFT_JIS; + +/// Returns the index of the first non-ASCII byte or the first +/// 0x1B, whichever comes first, or the length of the buffer +/// if neither is found. +fn find_non_ascii_or_escape(buffer: &[u8]) -> usize { + let ascii_up_to = Encoding::ascii_valid_up_to(buffer); + if let Some(escape) = memchr::memchr(0x1B, &buffer[..ascii_up_to]) { + escape + } else { + ascii_up_to + } +} + +/// Feed decoder with one byte (if `last` is `false`) or EOF (if `last` is +/// `true`). `byte` is ignored if `last` is `true`. +/// Returns `true` if there was no rejection or `false` upon rejecting the +/// encoding hypothesis represented by this decoder. +#[inline(always)] +fn feed_decoder(decoder: &mut Decoder, byte: u8, last: bool) -> bool { + let mut output = [0u16; 1]; + let input = [byte]; + let (result, _read, written) = decoder.decode_to_utf16_without_replacement( + if last { b"" } else { &input }, + &mut output, + last, + ); + match result { + DecoderResult::InputEmpty => { + if written == 1 { + match output[0] { + 0xFF61...0xFF9F => { + return false; + } + _ => {} + } + } + } + DecoderResult::Malformed(_, _) => { + return false; + } + DecoderResult::OutputFull => { + unreachable!(); + } + } + true +} + +/// A detector for detecting the character encoding of input on the +/// precondition that the encoding is a Japanese legacy encoding. +pub struct Detector { + shift_jis_decoder: Decoder, + euc_jp_decoder: Decoder, + second_byte_in_escape: u8, + iso_2022_jp_disqualified: bool, + escape_seen: bool, + finished: bool, +} + +impl Detector { + /// Instantiates the detector. If `allow_2022` is `true` the possible + /// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If + /// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP, + /// and undecided. + pub fn new(allow_2022: bool) -> Self { + Detector { + shift_jis_decoder: SHIFT_JIS.new_decoder_without_bom_handling(), + euc_jp_decoder: EUC_JP.new_decoder_without_bom_handling(), + second_byte_in_escape: 0, + iso_2022_jp_disqualified: !allow_2022, + escape_seen: false, + finished: false, + } + } + + /// Feeds bytes to the detector. If `last` is `true` the end of the stream + /// is considered to occur immediately after the end of `buffer`. + /// Otherwise, the stream is expected to continue. `buffer` may be empty. + /// + /// If you're running the detector only on a prefix of a complete + /// document, _do not_ pass `last` as `true` after the prefix if the + /// stream as a whole still contains more content. + /// + /// Returns `Some(encoding_rs::SHIFT_JIS)` if the detector guessed + /// Shift_JIS. Returns `Some(encoding_rs::EUC_JP)` if the detector + /// guessed EUC-JP. Returns `Some(encoding_rs::ISO_2022_JP)` if the + /// detector guessed ISO-2022-JP (only possible if `true` was passed as + /// `allow_2022` when instantiating the detector). Returns `None` if the + /// detector is undecided. If `None` is returned even when passing `true` + /// as `last`, falling back to Shift_JIS is the best guess for Web + /// purposes. + /// + /// Do not call again after the method has returned `Some(_)` or after + /// the method has been called with `true` as `last`. + /// + /// # Panics + /// + /// If called after the method has returned `Some(_)` or after the method + /// has been called with `true` as `last`. + pub fn feed(&mut self, buffer: &[u8], last: bool) -> Option<&'static Encoding> { + assert!( + !self.finished, + "Tried to used a detector that has finished." + ); + self.finished = true; // Will change back to false unless we return early + let mut i = 0; + if !self.iso_2022_jp_disqualified { + if !self.escape_seen { + i = find_non_ascii_or_escape(buffer); + } + while i < buffer.len() { + let byte = buffer[i]; + if byte > 0x7F { + self.iso_2022_jp_disqualified = true; + break; + } + if !self.escape_seen && byte == 0x1B { + self.escape_seen = true; + i += 1; + continue; + } + if self.escape_seen && self.second_byte_in_escape == 0 { + self.second_byte_in_escape = byte; + i += 1; + continue; + } + match (self.second_byte_in_escape, byte) { + (0x28, 0x42) | (0x28, 0x4A) | (0x28, 0x49) | (0x24, 0x40) | (0x24, 0x42) => { + return Some(ISO_2022_JP); + } + _ => {} + } + if self.escape_seen { + self.iso_2022_jp_disqualified = true; + break; + } + i += 1; + } + } + for &byte in &buffer[i..] { + if !feed_decoder(&mut self.euc_jp_decoder, byte, false) { + return Some(SHIFT_JIS); + } + if !feed_decoder(&mut self.shift_jis_decoder, byte, false) { + return Some(EUC_JP); + } + } + if last { + if !feed_decoder(&mut self.euc_jp_decoder, 0, true) { + return Some(SHIFT_JIS); + } + if !feed_decoder(&mut self.shift_jis_decoder, 0, true) { + return Some(EUC_JP); + } + return None; + } + self.finished = false; + None + } +} + +// Any copyright to the test code below this comment is dedicated to the +// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_iso_2022_jp() { + let mut detector = Detector::new(true); + assert_eq!( + detector.feed(b"abc\x1B\x28\x42\xFF", true), + Some(ISO_2022_JP) + ); + } + + #[test] + fn test_error_precedence() { + let mut detector = Detector::new(true); + assert_eq!(detector.feed(b"abc\xFF", true), Some(SHIFT_JIS)); + } + + #[test] + fn test_invalid_euc_jp() { + let mut detector = Detector::new(true); + assert_eq!(detector.feed(b"abc\x81\x40", true), Some(SHIFT_JIS)); + } + + #[test] + fn test_invalid_shift_jis() { + let mut detector = Detector::new(true); + assert_eq!(detector.feed(b"abc\xEB\xA8", true), Some(EUC_JP)); + } + + #[test] + fn test_invalid_shift_jis_before_invalid_euc_jp() { + let mut detector = Detector::new(true); + assert_eq!(detector.feed(b"abc\xEB\xA8\x81\x40", true), Some(EUC_JP)); + } + + #[test] + fn test_undecided() { + let mut detector = Detector::new(true); + assert_eq!(detector.feed(b"abc", false), None); + assert_eq!(detector.feed(b"abc", false), None); + } + +} diff --git a/third_party/rust/shift_or_euc_c/.cargo-checksum.json b/third_party/rust/shift_or_euc_c/.cargo-checksum.json new file mode 100644 index 000000000000..1ba22660be26 --- /dev/null +++ b/third_party/rust/shift_or_euc_c/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CONTRIBUTING.md":"0e64fb3dd5a00e3fd528de6442de3f2ca851bd718c45cca0871aaf4eedac9ee1","COPYRIGHT":"3a7313aa2f19bf7095a2fd731c3d5e76f38d5e4640bd2a115d53032f24b2aa6c","Cargo.toml":"342e5345f4fb433b89f397b07e4e7162376b30cbbc1d6f6ccb11523116e6ed6b","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"dac4dde23582d18b01701032860d8f8a1979fb2cf626060ca8de77e081a2a3d5","README.md":"a323f1f4537bc7b3f9b3b216c8ac5041b83aa0321f5349a52627aade947c6272","include/shift_or_euc.h":"47c3b9832cb7eb8995aa37dcc2e76be7d4f5c7b3fa6b43135e579831ab449cd8","src/lib.rs":"cab1898dd6724e0a0324a1e44f6348c107f13916da8873dba69c70dbc95ba9cd"},"package":"c81ec08c8a68c45c48d8ef58b80ce038cc9945891c4a4996761e2ec5cba05abc"} \ No newline at end of file diff --git a/third_party/rust/shift_or_euc_c/CONTRIBUTING.md b/third_party/rust/shift_or_euc_c/CONTRIBUTING.md new file mode 100644 index 000000000000..1d41d4c60ecc --- /dev/null +++ b/third_party/rust/shift_or_euc_c/CONTRIBUTING.md @@ -0,0 +1,38 @@ +If you send a pull request / patch, please observe the following. + +## Licensing + +Since this crate is dual-licensed, +[section 5 of the Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0#contributions) +is considered to apply in the sense of Contributions being automatically +under the Apache License 2.0 or MIT dual license (see the `COPYRIGHT` file). +That is, by the act of offering a Contribution, you place your Contribution +under the Apache License 2.0 or MIT dual license stated in the `COPYRIGHT` +file. Please do not contribute if you aren't willing or allowed to license your +contributions in this manner. + +You are encouraged to dedicate test code that you contribute to the Public +Domain using the CC0 dedication. If you contribute test code that is not +dedicated to the Public Domain, please be sure not to put it in a part of +source code that the comments designate as being dedicated to the Public +Domain. + +## Copyright Notices + +If you require the addition of your copyright notice, it's up to you to edit in +your notice as part of your Contribution. Not adding a copyright notice is +taken as a waiver of copyright notice. + +## Compatibility with Stable Rust + +Please ensure that your Contribution compiles with the latest stable-channel +rustc. + +## rustfmt + +The `rustfmt` version used for this code is `rustfmt-nightly`. Please either +use that version or avoid using `rustfmt` (so as not to reformat all the code). + +## Unit tests + +Please ensure that `cargo test` succeeds. diff --git a/third_party/rust/shift_or_euc_c/COPYRIGHT b/third_party/rust/shift_or_euc_c/COPYRIGHT new file mode 100644 index 000000000000..1cacb3eb05ef --- /dev/null +++ b/third_party/rust/shift_or_euc_c/COPYRIGHT @@ -0,0 +1,9 @@ +shift_or_euc is copyright 2018 Mozilla Foundation. + +Licensed under the Apache License, Version 2.0 + or the MIT +license , +at your option. All files in the project carrying such +notice may not be copied, modified, or distributed except +according to those terms. diff --git a/third_party/rust/shift_or_euc_c/Cargo.toml b/third_party/rust/shift_or_euc_c/Cargo.toml new file mode 100644 index 000000000000..a7c91be4548e --- /dev/null +++ b/third_party/rust/shift_or_euc_c/Cargo.toml @@ -0,0 +1,30 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "shift_or_euc_c" +version = "0.1.0" +authors = ["Henri Sivonen "] +description = "C API for shift_or_euc" +homepage = "https://docs.rs/shift_or_euc_c/" +documentation = "https://docs.rs/shift_or_euc_c/" +readme = "README.md" +keywords = ["encoding", "web", "charset"] +categories = ["text-processing", "encoding", "web-programming", "internationalization"] +license = "MIT/Apache-2.0" +repository = "https://github.com/hsivonen/shift_or_euc_c" +[dependencies.encoding_rs] +version = "0.8.17" + +[dependencies.shift_or_euc] +version = "0.1.0" diff --git a/third_party/rust/shift_or_euc_c/LICENSE-APACHE b/third_party/rust/shift_or_euc_c/LICENSE-APACHE new file mode 100644 index 000000000000..d64569567334 --- /dev/null +++ b/third_party/rust/shift_or_euc_c/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/third_party/rust/shift_or_euc_c/LICENSE-MIT b/third_party/rust/shift_or_euc_c/LICENSE-MIT new file mode 100644 index 000000000000..9ac617754c14 --- /dev/null +++ b/third_party/rust/shift_or_euc_c/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Mozilla Foundation + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/shift_or_euc_c/README.md b/third_party/rust/shift_or_euc_c/README.md new file mode 100644 index 000000000000..a2e70b0f45ac --- /dev/null +++ b/third_party/rust/shift_or_euc_c/README.md @@ -0,0 +1,13 @@ +# shift_or_euc_c + +[![Apache 2 / MIT dual-licensed](https://img.shields.io/badge/license-Apache%202%20%2F%20MIT-blue.svg)](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT) + +C API for [`shift_or_euc`](https://docs.rs/crate/shift_or_euc). + +## Documentation + +[API documentation on docs.rs](https://docs.rs/crate/shift_or_euc_c) + +## Licensing + +See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc_c/blob/master/COPYRIGHT). diff --git a/third_party/rust/shift_or_euc_c/include/shift_or_euc.h b/third_party/rust/shift_or_euc_c/include/shift_or_euc.h new file mode 100644 index 000000000000..3a5f4da6e518 --- /dev/null +++ b/third_party/rust/shift_or_euc_c/include/shift_or_euc.h @@ -0,0 +1,88 @@ +// Copyright 2018 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#ifndef shift_or_euc_h +#define shift_or_euc_h + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include "encoding_rs.h" + +#ifndef SHIFT_OR_EUC_DETECTOR +#define SHIFT_OR_EUC_DETECTOR Detector +#ifndef __cplusplus +typedef struct Detector_ Detector; +#endif +#endif + +/// Instantiates the detector. If `allow_2022` is `true` the possible +/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If +/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP, +/// and undecided. +/// +/// The instantiated detector must be freed after use using +/// `shift_or_euc_detector_free`. +SHIFT_OR_EUC_DETECTOR* shift_or_euc_detector_new(bool allow_2022); + +/// Deallocates a detector obtained from `shift_or_euc_detector_new`. +void shift_or_euc_detector_free(SHIFT_OR_EUC_DETECTOR* detector); + +/// Feeds bytes to the detector. If `last` is `true` the end of the stream +/// is considered to occur immediately after the end of `buffer`. +/// Otherwise, the stream is expected to continue. `buffer_len` may be zero. +/// `buffer` must not be `NULL` but may be undereferencable when +/// `buffer_len` is zero. +/// +/// If you're running the detector only on a prefix of a complete +/// document, _do not_ pass `last` as `true` after the prefix if the +/// stream as a whole still contains more content. +/// +/// Returns `SHIFT_JIS_ENCODING` if the detector guessed +/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector +/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the +/// detector guessed ISO-2022-JP (only possible if `true` was passed as +/// `allow_2022` when instantiating the detector). Returns `NULL` if the +/// detector is undecided. If `NULL` is returned even when passing `true` +/// as `last`, falling back to Shift_JIS is the best guess for Web +/// purposes. +/// +/// Do not call again after the function has returned non-`NULL` or after +/// the function has been called with `true` as `last`. +/// +/// # Panics +/// +/// If called after the function has returned non-`NULL` or after the +/// function has been called with `true` as `last`. +/// +/// # Undefined Behavior +/// +/// UB ensues if +/// +/// * `detector` does not point to a detector obtained from +/// `shift_or_euc_detector_new` but not yet freed with +/// `shift_or_euc_detector_free`. +/// * `buffer` is `NULL`. +/// * `buffer` and `buffer_len` don't designate a range of memory +/// valid for reading. +ENCODING_RS_ENCODING const* shift_or_euc_detector_feed( + SHIFT_OR_EUC_DETECTOR* detector, + uint8_t const* buffer, + size_t buffer_len, + bool last +); + +#ifdef __cplusplus +} +#endif + +#endif // shift_or_euc_h \ No newline at end of file diff --git a/third_party/rust/shift_or_euc_c/src/lib.rs b/third_party/rust/shift_or_euc_c/src/lib.rs new file mode 100644 index 000000000000..3f168d631796 --- /dev/null +++ b/third_party/rust/shift_or_euc_c/src/lib.rs @@ -0,0 +1,94 @@ +// Copyright 2018 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![doc(html_root_url = "https://docs.rs/shift_or_euc_c/0.1.0")] + +//! C API for [`shift_or_euc`](https://docs.rs/shift_or_euc/) +//! +//! # Panics +//! +//! This crate is designed to be used only in a `panic=abort` scenario. +//! Panic propagation across FFI is not handled! +//! +//! # Licensing +//! +//! See the file named [COPYRIGHT](https://github.com/hsivonen/shift_or_euc/blob/master/COPYRIGHT). + +use encoding_rs::Encoding; +use shift_or_euc::*; + +/// Instantiates the detector. If `allow_2022` is `true` the possible +/// guesses are Shift_JIS, EUC-JP, ISO-2022-JP, and undecided. If +/// `allow_2022` is `false`, the possible guesses are Shift_JIS, EUC-JP, +/// and undecided. +/// +/// The instantiated detector must be freed after use using +/// `shift_or_euc_detector_free`. +#[no_mangle] +pub unsafe extern "C" fn shift_or_euc_detector_new(allow_2022: bool) -> *mut Detector { + Box::into_raw(Box::new(Detector::new(allow_2022))) +} + +/// Deallocates a detector obtained from `shift_or_euc_detector_new`. +#[no_mangle] +pub unsafe extern "C" fn shift_or_euc_detector_free(detector: *mut Detector) { + let _ = Box::from_raw(detector); +} + +/// Feeds bytes to the detector. If `last` is `true` the end of the stream +/// is considered to occur immediately after the end of `buffer`. +/// Otherwise, the stream is expected to continue. `buffer_len` may be zero. +/// `buffer` must not be `NULL` but may be undereferencable when +/// `buffer_len` is zero. +/// +/// If you're running the detector only on a prefix of a complete +/// document, _do not_ pass `last` as `true` after the prefix if the +/// stream as a whole still contains more content. +/// +/// Returns `SHIFT_JIS_ENCODING` if the detector guessed +/// Shift_JIS. Returns `EUC_JP_ENCODING` if the detector +/// guessed EUC-JP. Returns `ISO_2022_JP_ENCODING` if the +/// detector guessed ISO-2022-JP (only possible if `true` was passed as +/// `allow_2022` when instantiating the detector). Returns `NULL` if the +/// detector is undecided. If `NULL` is returned even when passing `true` +/// as `last`, falling back to Shift_JIS is the best guess for Web +/// purposes. +/// +/// Do not call again after the function has returned non-`NULL` or after +/// the function has been called with `true` as `last`. +/// +/// # Panics +/// +/// If called after the function has returned non-`NULL` or after the +/// function has been called with `true` as `last`. +/// +/// # Undefined Behavior +/// +/// UB ensues if +/// +/// * `detector` does not point to a detector obtained from +/// `shift_or_euc_detector_new` but not yet freed with +/// `shift_or_euc_detector_free`. +/// * `buffer` is `NULL`. +/// * `buffer` and `buffer_len` don't designate a range of memory +/// valid for reading. +#[no_mangle] +pub unsafe extern "C" fn shift_or_euc_detector_feed( + detector: *mut Detector, + buffer: *const u8, + buffer_len: usize, + last: bool, +) -> *const Encoding { + if let Some(encoding) = (*detector).feed(::std::slice::from_raw_parts(buffer, buffer_len), last) + { + encoding + } else { + ::std::ptr::null() + } +} diff --git a/toolkit/content/widgets/browser-custom-element.js b/toolkit/content/widgets/browser-custom-element.js index 722d37257be4..611ca02480cf 100644 --- a/toolkit/content/widgets/browser-custom-element.js +++ b/toolkit/content/widgets/browser-custom-element.js @@ -245,6 +245,8 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) { this._mayEnableCharacterEncodingMenu = null; + this._charsetAutodetected = false; + this._contentPrincipal = null; this._csp = null; @@ -619,6 +621,16 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) { } } + get charsetAutodetected() { + return this.isRemoteBrowser ? this._charsetAutodetected : this.docShell.charsetAutodetected; + } + + set charsetAutodetected(aAutodetected) { + if (this.isRemoteBrowser) { + this._charsetAutodetected = aAutodetected; + } + } + get contentPrincipal() { return this.isRemoteBrowser ? this._contentPrincipal : this.contentDocument.nodePrincipal; } @@ -1709,6 +1721,7 @@ class MozBrowser extends MozElements.MozElementMixin(XULFrameElement) { "_contentTitle", "_characterSet", "_mayEnableCharacterEncodingMenu", + "_charsetAutodetected", "_contentPrincipal", "_imageDocument", "_fullZoom", diff --git a/toolkit/library/rust/shared/Cargo.toml b/toolkit/library/rust/shared/Cargo.toml index a87155680f63..26a2403cd261 100644 --- a/toolkit/library/rust/shared/Cargo.toml +++ b/toolkit/library/rust/shared/Cargo.toml @@ -38,6 +38,7 @@ cert_storage = { path = "../../../../security/manager/ssl/cert_storage", optiona bitsdownload = { path = "../../../components/bitsdownload", optional = true } storage = { path = "../../../../storage/rust" } bookmark_sync = { path = "../../../components/places/bookmark_sync", optional = true } +shift_or_euc_c = "0.1.0" [build-dependencies] rustc_version = "0.2" diff --git a/toolkit/library/rust/shared/lib.rs b/toolkit/library/rust/shared/lib.rs index 1f6c8eec01c9..a648c7ea1361 100644 --- a/toolkit/library/rust/shared/lib.rs +++ b/toolkit/library/rust/shared/lib.rs @@ -44,6 +44,7 @@ extern crate bitsdownload; extern crate storage; #[cfg(feature = "moz_places")] extern crate bookmark_sync; +extern crate shift_or_euc_c; extern crate arrayvec; diff --git a/toolkit/locales/en-US/chrome/global/charsetMenu.properties b/toolkit/locales/en-US/chrome/global/charsetMenu.properties index 3868d32a8ec6..3dfba32569c0 100644 --- a/toolkit/locales/en-US/chrome/global/charsetMenu.properties +++ b/toolkit/locales/en-US/chrome/global/charsetMenu.properties @@ -31,8 +31,6 @@ charsetMenuAutodet = Auto-Detect charsetMenuAutodet.key = D charsetMenuAutodet.off = (off) charsetMenuAutodet.off.key = o -charsetMenuAutodet.ja = Japanese -charsetMenuAutodet.ja.key = J charsetMenuAutodet.ru = Russian charsetMenuAutodet.ru.key = R charsetMenuAutodet.uk = Ukrainian @@ -104,25 +102,8 @@ windows-1255 = Hebrew # sorts right after that one in the collation order for your locale. ISO-8859-8 = Hebrew, Visual -# Japanese -Shift_JIS.key = J -Shift_JIS = Japanese (Shift_JIS) -EUC-JP.key = p -EUC-JP = Japanese (EUC-JP) -ISO-2022-JP.key = n -ISO-2022-JP = Japanese (ISO-2022-JP) - -# UI string in anticipation of bug 1543077; deliberately not in use yet - -# LOCALIZATION NOTE (Japanese.key): If taken into use, this string will appear -# instead of the string for Shift_JIS.key, so the use of the same -# accelerator is deliberate. +# Japanese (NOT AN ENCODING NAME) Japanese.key = J -# LOCALIZATION NOTE (Japanese): If taken into use, this string will appear -# as a single item in place of the strings for the three items Shift_JIS, -# EUC-JP, and ISO-2022-JP, so this string does not need to make sense together -# with those strings and should be translated the way those were -# but omitting the part in parentheses. Japanese = Japanese # Korean diff --git a/toolkit/modules/CharsetMenu.jsm b/toolkit/modules/CharsetMenu.jsm index 42bb73f31588..a8a00e70ae82 100644 --- a/toolkit/modules/CharsetMenu.jsm +++ b/toolkit/modules/CharsetMenu.jsm @@ -16,18 +16,16 @@ ChromeUtils.defineModuleGetter(this, "Deprecated", const kAutoDetectors = [ ["off", ""], - ["ja", "ja_parallel_state_machine"], ["ru", "ruprob"], ["uk", "ukprob"], ]; /** * This set contains encodings that are in the Encoding Standard, except: - * - XSS-dangerous encodings (except ISO-2022-JP which is assumed to be - * too common not to be included). + * - Japanese encodings are represented by one autodetection item * - x-user-defined, which practically never makes sense as an end-user-chosen * override. - * - Encodings that IE11 doesn't have in its correspoding menu. + * - Encodings that IE11 doesn't have in its corresponding menu. */ const kEncodings = new Set([ // Globally relevant @@ -60,10 +58,8 @@ const kEncodings = new Set([ // Hebrew "windows-1255", "ISO-8859-8", - // Japanese - "Shift_JIS", - "EUC-JP", - "ISO-2022-JP", + // Japanese (NOT AN ENCODING NAME) + "Japanese", // Korean "EUC-KR", // Thai @@ -95,8 +91,7 @@ function CharsetComparator(a, b) { // happens to make the less frequently-used items first. let titleA = a.label.replace(/\(.*/, "") + b.value; let titleB = b.label.replace(/\(.*/, "") + a.value; - // Secondarily reverse sort by encoding name to sort "windows" or - // "shift_jis" first. + // Secondarily reverse sort by encoding name to sort "windows" return titleA.localeCompare(titleB) || b.value.localeCompare(a.value); } @@ -239,7 +234,17 @@ var CharsetMenu = { * For substantially similar encodings, treat two encodings as the same * for the purpose of the check mark. */ - foldCharset(charset) { + foldCharset(charset, isAutodetected) { + if (isAutodetected) { + switch (charset) { + case "Shift_JIS": + case "EUC-JP": + case "ISO-2022-JP": + return "Japanese"; + default: + // fall through + } + } switch (charset) { case "ISO-8859-8-I": return "windows-1255"; @@ -252,8 +257,11 @@ var CharsetMenu = { } }, + /** + * This method is for comm-central callers only. + */ update(parent, charset) { - let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset)).item(0); + let menuitem = parent.getElementsByAttribute("charset", this.foldCharset(charset, false)).item(0); if (menuitem) { menuitem.setAttribute("checked", "true"); } diff --git a/toolkit/modules/RemoteWebProgress.jsm b/toolkit/modules/RemoteWebProgress.jsm index fa10c8ac30e6..8b986a2b47fc 100644 --- a/toolkit/modules/RemoteWebProgress.jsm +++ b/toolkit/modules/RemoteWebProgress.jsm @@ -196,6 +196,7 @@ class RemoteWebProgressManager { if (json.charset) { this._browser._characterSet = json.charset; this._browser._mayEnableCharacterEncodingMenu = json.mayEnableCharacterEncodingMenu; + this._browser._charsetAutodetected = json.charsetAutodetected; } } diff --git a/toolkit/modules/WebProgressChild.jsm b/toolkit/modules/WebProgressChild.jsm index 0bf9469fe5f8..b4c4e05f8615 100644 --- a/toolkit/modules/WebProgressChild.jsm +++ b/toolkit/modules/WebProgressChild.jsm @@ -116,6 +116,7 @@ class WebProgressChild { json.title = this.mm.content.document.title; json.charset = this.mm.content.document.characterSet; json.mayEnableCharacterEncodingMenu = this.mm.docShell.mayEnableCharacterEncodingMenu; + json.charsetAutodetected = this.mm.docShell.charsetAutodetected; json.principal = this.mm.content.document.nodePrincipal; let csp = this.mm.content.document.csp; json.csp = E10SUtils.serializeCSP(csp);