Add docs for the individual encodings.

2026-06-30 21:17:58 -04:00 · 2018-05-23 15:43:32 +03:00
parent 06caa06e60
commit 35c6e946fe
51 changed files with 855 additions and 68 deletions
@@ -0,0 +1,16 @@
+/// This is Big5 with HKSCS with mappings to more recent Unicode assignments
+/// instead of the Private Use Area code points that have been used historically.
+/// It is believed to be able to decode existing Web content in a way that makes
+/// sense.
+///
+/// To avoid form submissions generating data that Web servers don't understand,
+/// the encoder doesn't use the HKSCS byte sequences that precede the unextended
+/// Big5 in the lexical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/big5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/big5-bmp.html)
+///
+/// This encoding is designed to be suited for decoding the Windows code page 950
+/// and its HKSCS patched "951" variant such that the text makes sense, given
+/// assignments that Unicode has made after those encodings used Private Use
+/// Area characters.
@@ -0,0 +1,12 @@
+/// This is the legacy Unix encoding for Japanese.
+///
+/// For compatibility with Web servers that don't expect three-byte sequences
+/// in form submissions, the encoder doesn't generate three-byte sequences.
+/// That is, the JIS X 0212 support is decode-only.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-jp.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-jp-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 20932. There are error
+/// handling differences and a handful of 2-byte sequences that decode differently.
+/// Additionall, Windows doesn't support 3-byte sequences.
@@ -0,0 +1,10 @@
+/// This is the Korean encoding for Windows. It extends the Unix legacy encoding
+/// for Korean, based on KS X 1001 (which also formed the base of MacKorean on Mac OS
+/// Classic), with all the characters from the Hangul Syllables block of Unicode.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-kr.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-kr-bmp.html)
+///
+/// This encoding matches the Windows code page 949, except Windows decodes byte 0x80
+/// to U+0080 and some byte sequences that are error per the Encoding Standard to
+/// the question mark or the Private Use Area.
@@ -0,0 +1,16 @@
+/// The decoder for this encoding is the same as the decoder for gb18030.
+/// The encoder side of this encoding is GBK with Windows code page 936 euro
+/// sign behavior. GBK extends GB2312-80 to cover the CJK Unified Ideographs
+/// Unicode block as well as a handful of ideographs from the CJK Unified
+/// Ideographs Extension A and CJK Compatibility Ideographs blocks.
+///
+/// Unlike e.g. in the case of ISO-8859-1 and windows-1252, GBK encoder wasn't
+/// unified with the gb18030 encoder in the Encoding Standard out of concern
+/// that servers that expect GBK form submissions might not be able to handle
+/// the four-byte sequences.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// The encoder of this encoding roughly matches the Windows code page 936.
+/// The decoder side is a superset.
@@ -0,0 +1,8 @@
+/// This the most notable one of the DOS Cyrillic code pages. It has the same
+/// box drawing characters as code page 437, so it can be used for decoding
+/// DOS-era ASCII + box drawing data.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/ibm866.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/ibm866-bmp.html)
+///
+/// This encoding matches the Windows code page 866.
@@ -0,0 +1,10 @@
+/// This the primary pre-UTF-8 encoding for Japanese email. It uses the ASCII
+/// byte range to encode non-Basic Latin characters. It's the only encoding
+/// supported by this crate whose encoder is stateful.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/jis0208.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/jis0208-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 50220. Notably, Windows
+/// uses U+30FB in place of the REPLACEMENT CHARACTER and otherwise differs in
+/// error handling.
@@ -0,0 +1,8 @@
+/// This is the Nordic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 6.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-10.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-10-bmp.html)
+///
+/// The Windows code page number for this encoding is 28600, but kernel32.dll
+/// does not support this encoding.
@@ -0,0 +1,8 @@
+/// This is the Baltic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 7.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-13.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-13-bmp.html)
+///
+/// This encoding matches the Windows code page 28603, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
@@ -0,0 +1,8 @@
+/// This is the Celtic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 8.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-14.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-14-bmp.html)
+///
+/// The Windows code page number for this encoding is 28604, but kernel32.dll
+/// does not support this encoding.
@@ -0,0 +1,7 @@
+/// This is the revised Western European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 9.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-15.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-15-bmp.html)
+///
+/// This encoding matches the Windows code page 28605.
@@ -0,0 +1,8 @@
+/// This is the South-Eastern European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 10.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-16.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-16-bmp.html)
+///
+/// The Windows code page number for this encoding is 28606, but kernel32.dll
+/// does not support this encoding.
@@ -0,0 +1,6 @@
+/// This is the Central European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 2.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-2.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-2-bmp.html)
+///
+/// This encoding matches the Windows code page 28592.
@@ -0,0 +1,6 @@
+/// This is the South European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 3.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-3.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-3-bmp.html)
+///
+/// This encoding matches the Windows code page 28593.
@@ -0,0 +1,6 @@
+/// This is the North European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 4.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-4.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-4-bmp.html)
+///
+/// This encoding matches the Windows code page 28594.
@@ -0,0 +1,6 @@
+/// This is the Cyrillic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-5-bmp.html)
+///
+/// This encoding matches the Windows code page 28595.
@@ -0,0 +1,7 @@
+/// This is the Arabic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-6.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-6-bmp.html)
+///
+/// This encoding matches the Windows code page 28596, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
@@ -0,0 +1,11 @@
+/// This is the Greek part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-7.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-7-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28597. Windows decodes
+/// unassigned code points, the currency signs at 0xA4 and 0xA5 as well as
+/// 0xAA, which should be U+037A GREEK YPOGEGRAMMENI, to the Private Use Area
+/// of Unicode. Windows decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA
+/// instead of U+2018 LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER
+/// LETTER APOSTROPHE instead of U+2019 RIGHT SINGLE QUOTATION MARK.
@@ -0,0 +1,9 @@
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in logical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 38598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
@@ -0,0 +1,9 @@
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in visual order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
@@ -0,0 +1,6 @@
+/// This is an encoding for Russian from [RFC 1489](https://tools.ietf.org/html/rfc1489).
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-r.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-r-bmp.html)
+///
+/// This encoding matches the Windows code page 20866.
@@ -0,0 +1,6 @@
+/// This is an encoding for Ukrainian adapted from KOI8-R.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-u.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-u-bmp.html)
+///
+/// This encoding matches the Windows code page 21866.
@@ -0,0 +1,8 @@
+/// This is the Japanese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/shift_jis.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/shift_jis-bmp.html)
+///
+/// This encoding matches the Windows code page 932, except Windows decodes some byte
+/// sequences that are error per the Encoding Standard to the question mark or the
+/// Private Use Area and generally uses U+30FB in place of the REPLACEMENT CHARACTER.
@@ -0,0 +1,8 @@
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the big endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1201.
@@ -0,0 +1,8 @@
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the little endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1200.
@@ -0,0 +1,5 @@
+/// This is the encoding that should be used for all new development it can
+/// represent all of Unicode.
+///
+/// This encoding matches the Windows code page 65001, except Windows differs
+/// in the number of errors generated for some erroneous byte sequences.
@@ -0,0 +1,9 @@
+/// This encoding matches GB18030-2005 except the two-byte sequence 0xA3 0xA0
+/// maps to U+3000 for compatibility with existing Web content. As a result,
+/// this encoding can represent all of Unicode except for the private-use
+/// character U+E5E5.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// This encoding matches the Windows code page 54936.
@@ -0,0 +1,7 @@
+/// This is the MacRoman encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/macintosh.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/macintosh-bmp.html)
+///
+/// This encoding matches the Windows code page 10000, except Windows decodes
+/// 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
@@ -0,0 +1,10 @@
+/// This decode-only encoding decodes all non-zero-length streams to a single
+/// REPLACEMENT CHARACTER. Its purpose is to avoid the use of an
+/// ASCII-compatible fallback encoding (typically windows-1252) for some
+/// encodings that are no longer supported by the Web Platform and that
+/// would be dangerous to treat as ASCII-compatible.
+///
+/// There is no corresponding encoder. The output encoding of this encoding
+/// is UTF-8.
+///
+/// This encoding does not have a Windows code page number.
@@ -0,0 +1,6 @@
+/// This is the Central European encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1250.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1250-bmp.html)
+///
+/// This encoding matches the Windows code page 1250.
@@ -0,0 +1,6 @@
+/// This is the Cyrillic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1251.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1251-bmp.html)
+///
+/// This encoding matches the Windows code page 1251.
@@ -0,0 +1,7 @@
+/// This is the Western encoding for Windows. It is an extension of ISO-8859-1,
+/// which is known as Latin 1.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1252.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1252-bmp.html)
+///
+/// This encoding matches the Windows code page 1252.
@@ -0,0 +1,8 @@
+/// This is the Greek encoding for Windows. It is mostly an extension of
+/// ISO-8859-7, but U+0386 is mapped to a different byte.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1253.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1253-bmp.html)
+///
+/// This encoding matches the Windows code page 1253, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
@@ -0,0 +1,7 @@
+/// This is the Turkish encoding for Windows. It is an extension of ISO-8859-9,
+/// which is known as Latin 5.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1254.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1254-bmp.html)
+///
+/// This encoding matches the Windows code page 1254.
@@ -0,0 +1,8 @@
+/// This is the Hebrew encoding for Windows. It is an extension of ISO-8859-8-I,
+/// except for a currency sign swap.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1255.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1255-bmp.html)
+///
+/// This encoding matches the Windows code page 1255, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
@@ -0,0 +1,6 @@
+/// This is the Arabic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1256.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1256-bmp.html)
+///
+/// This encoding matches the Windows code page 1256.
@@ -0,0 +1,7 @@
+/// This is the Baltic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1257.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1257-bmp.html)
+///
+/// This encoding matches the Windows code page 1257, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
@@ -0,0 +1,11 @@
+/// This is the Vietnamese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1258.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1258-bmp.html)
+///
+/// This encoding matches the Windows code page 1258 when used in the
+/// non-normalizing mode. Unlike with the other single-byte encodings, the
+/// result of decoding is not necessarily in Normalization Form C. On the
+/// other hand, input in the Normalization Form C is not encoded without
+/// replacement. In general, it's a bad idea to encode to encodings other
+/// than UTF-8, but this encoding is especially hazardous to encode to.
@@ -0,0 +1,7 @@
+/// This is the Thai encoding for Windows. It is an extension of TIS-620 / ISO-8859-11.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-874.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-874-bmp.html)
+///
+/// This encoding matches the Windows code page 874, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
@@ -0,0 +1,6 @@
+/// This is the MacUkrainian encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/x-mac-cyrillic.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/x-mac-cyrillic-bmp.html)
+///
+/// This encoding matches the Windows code page 10017.
@@ -0,0 +1,6 @@
+/// This encoding offsets the non-ASCII bytes by `0xF700` thereby decoding
+/// them to the Private Use Area of Unicode. It was used for loading binary
+/// data into a JavaScript string using `XMLHttpRequest` before XHR supported
+/// the `"arraybuffer"` response type.
+///
+/// This encoding does not have a Windows code page number.
@@ -33,6 +33,13 @@ class Label:
  def __cmp__(self, other):
    return cmp_from_end(self.label, other.label)

+class CodePage:
+  def __init__(self, code_page, preferred):
+    self.code_page = code_page
+    self.preferred = preferred
+  def __cmp__(self, other):
+    return self.code_page, other.code_page
+
 def static_u16_table(name, data):
  data_file.write('''pub static %s: [u16; %d] = [
  ''' % (name, len(data)))
@@ -82,6 +89,8 @@ single_byte = []

 multi_byte = []

+code_pages = []
+
 def to_camel_name(name):
  if name == u"iso-8859-8-i":
    return u"Iso8I"
@@ -98,6 +107,66 @@ def to_snake_name(name):
 def to_dom_name(name):
  return name

+encodings_by_code_page = {
+  932: "Shift_JIS",
+  936: "GBK",
+  949: "EUC-KR",
+  950: "Big5",
+  866: "IBM866",
+  874: "windows-874",
+  1200: "UTF-16LE",
+  1201: "UTF-16BE",
+  1250: "windows-1250",
+  1251: "windows-1251",
+  1252: "windows-1252",
+  1253: "windows-1253",
+  1254: "windows-1254",
+  1255: "windows-1255",
+  1256: "windows-1256",
+  1257: "windows-1257",
+  1258: "windows-1258",
+  10000: "macintosh",
+  10017: "x-mac-cyrillic",
+  20866: "KOI8-R",
+  20932: "EUC-JP",
+  21866: "KOI8-U",
+  28592: "ISO-8859-2",
+  28593: "ISO-8859-3",
+  28594: "ISO-8859-4",
+  28595: "ISO-8859-5",
+  28596: "ISO-8859-6",
+  28597: "ISO-8859-7",
+  28598: "ISO-8859-8",
+  28600: "ISO-8859-10",
+  28603: "ISO-8859-13",
+  28604: "ISO-8859-14",
+  28605: "ISO-8859-15",
+  28606: "ISO-8859-16",
+  38598: "ISO-8859-8-I",
+  50221: "ISO-2022-JP",
+  54936: "gb18030",
+  65001: "UTF-8",
+}
+
+code_pages_by_encoding = {}
+
+for code_page, encoding in encodings_by_code_page.iteritems():
+  code_pages_by_encoding[encoding] = code_page
+
+encoding_by_alias_code_page = {
+  951: "Big5",
+  20936: "GBK",
+  20949: "EUC-KR",
+  28591: "windows-1252",
+  28599: "windows-1254",
+  28601: "windows-847",
+  50220: "ISO-2022-JP",
+  50222: "ISO-2022-JP",
+  51949: "EUC-JP",
+  51936: "GBK",
+  51949: "EUC-KR",
+}
+
 #

 for group in data:
@@ -177,7 +246,11 @@ for name in preferred:
  else:
    variant = to_camel_name(name)

-  label_file.write('''/// The initializer for the %s encoding.
+  docfile = open("doc/%s.txt" % name, "r")
+  doctext = docfile.read()
+  docfile.close()
+
+  label_file.write('''/// The initializer for the [%s](static.%s.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -196,13 +269,14 @@ pub static %s_INIT: Encoding = Encoding {

 /// The %s encoding.
 ///
+%s///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static %s: &'static Encoding = &%s_INIT;

-''' % (to_dom_name(name), to_constant_name(name), to_dom_name(name), variant, to_dom_name(name), to_constant_name(name), to_constant_name(name)))
+''' % (to_dom_name(name), to_constant_name(name), to_constant_name(name), to_dom_name(name), variant, to_dom_name(name), doctext, to_constant_name(name), to_constant_name(name)))

 label_file.write("""static LABELS_SORTED: [&'static str; %d] = [
 """ % len(labels))
@@ -286,7 +286,8 @@ impl EucJpEncoder {
                        let lead = (pointer / 94) + 0xA1;
                        let trail = (pointer % 94) + 0xA1;
                        handle.write_two(lead as u8, trail as u8)
-                    } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929
+                    } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D)
+                        || bmp == 0xF929
                        || bmp == 0xF9DC
                    {
                        // Guaranteed to be found in IBM_KANJI
@@ -205,7 +205,8 @@ fn ksx1001_encode_misc(bmp: u16) -> Option<(usize, usize)> {
            return Some((0x81 + 0x25, 0xA1 + pos));
        }
    }
-    if in_inclusive_range16(bmp, 0x2015, 0x266D) || in_inclusive_range16(bmp, 0x321C, 0x33D8)
+    if in_inclusive_range16(bmp, 0x2015, 0x266D)
+        || in_inclusive_range16(bmp, 0x321C, 0x33D8)
        || in_inclusive_range16(bmp, 0xFF3C, 0xFFE5)
        || in_inclusive_range16(bmp, 0x00A1, 0x00F7)
        || in_inclusive_range16(bmp, 0x02C7, 0x02DD)
@@ -1477,12 +1477,14 @@ impl<'a> Utf8Source<'a> {
            return unsafe { ::std::mem::transmute(point) };
        }
        if unit < 0xF0u32 {
-            let point = ((unit & 0xFu32) << 12) | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 6)
+            let point = ((unit & 0xFu32) << 12)
+                | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 6)
                | (self.slice[self.pos + 2] as u32 & 0x3Fu32);
            self.pos += 3;
            return unsafe { ::std::mem::transmute(point) };
        }
-        let point = ((unit & 0x7u32) << 18) | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 12)
+        let point = ((unit & 0x7u32) << 18)
+            | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 12)
            | ((self.slice[self.pos + 2] as u32 & 0x3Fu32) << 6)
            | (self.slice[self.pos + 3] as u32 & 0x3Fu32);
        self.pos += 4;
@@ -667,7 +667,8 @@ impl Iso2022JpEncoder {
                                let trail = (pointer % 94) + 0x21;
                                handle.write_two(lead as u8, trail as u8);
                                continue;
-                            } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929
+                            } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D)
+                                || bmp == 0xF929
                                || bmp == 0xF9DC
                            {
                                // Guaranteed to be found in IBM_KANJI
@@ -561,8 +561,14 @@
 //! <tr><th>Encoding</th><th>Code Page</th><th>PUA</th><th>Remarks</th></tr>
 //! </thead>
 //! <tbody>
+//! <tr><td>Shift_JIS</td><td>932</td><td></td><td></td></tr>
+//! <tr><td>GBK</td><td>936</td><td></td><td></td></tr>
+//! <tr><td>EUC-KR</td><td>949</td><td></td><td></td></tr>
+//! <tr><td>Big5</td><td>950</td><td></td><td></td></tr>
 //! <tr><td>IBM866</td><td>866</td><td></td><td></td></tr>
 //! <tr><td>windows-874</td><td>874</td><td>&bullet;</td><td></td></tr>
+//! <tr><td>UTF-16LE</td><td>1200</td><td></td><td></td></tr>
+//! <tr><td>UTF-16BE</td><td>1201</td><td></td><td></td></tr>
 //! <tr><td>windows-1250</td><td>1250</td><td></td><td></td></tr>
 //! <tr><td>windows-1251</td><td>1251</td><td></td><td></td></tr>
 //! <tr><td>windows-1252</td><td>1252</td><td></td><td></td></tr>
@@ -575,6 +581,7 @@
 //! <tr><td>macintosh</td><td>10000</td><td></td><td>1</td></tr>
 //! <tr><td>x-mac-cyrillic</td><td>10017</td><td></td><td>2</td></tr>
 //! <tr><td>KOI8-R</td><td>20866</td><td></td><td></td></tr>
+//! <tr><td>EUC-JP</td><td>20932</td><td></td><td></td></tr>
 //! <tr><td>KOI8-U</td><td>21866</td><td></td><td></td></tr>
 //! <tr><td>ISO-8859-2</td><td>28592</td><td></td><td></td></tr>
 //! <tr><td>ISO-8859-3</td><td>28593</td><td></td><td></td></tr>
@@ -586,6 +593,9 @@
 //! <tr><td>ISO-8859-13</td><td>28603</td><td>&bullet;</td><td></td></tr>
 //! <tr><td>ISO-8859-15</td><td>28605</td><td></td><td></td></tr>
 //! <tr><td>ISO-8859-8-I</td><td>38598</td><td></td><td>5</td></tr>
+//! <tr><td>ISO-2022-JP</td><td>50220</td><td></td><td></td></tr>
+//! <tr><td>gb18030</td><td>54936</td><td></td><td></td></tr>
+//! <tr><td>UTF-8</td><td>65001</td><td></td><td></td></tr>
 //! </tbody>
 //! </table>
 //!
@@ -739,7 +749,7 @@ const NCR_EXTRA: usize = 10; // &#1114111;

 const LONGEST_LABEL_LENGTH: usize = 19; // cseucpkdfmtjapanese

-/// The initializer for the Big5 encoding.
+/// The initializer for the [Big5](static.BIG5.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -758,13 +768,30 @@ pub static BIG5_INIT: Encoding = Encoding {

 /// The Big5 encoding.
 ///
+/// This is Big5 with HKSCS with mappings to more recent Unicode assignments
+/// instead of the Private Use Area code points that have been used historically.
+/// It is believed to be able to decode existing Web content in a way that makes
+/// sense.
+///
+/// To avoid form submissions generating data that Web servers don't understand,
+/// the encoder doesn't use the HKSCS byte sequences that precede the unextended
+/// Big5 in the lexical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/big5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/big5-bmp.html)
+///
+/// This encoding is designed to be suited for decoding the Windows code page 950
+/// and its HKSCS patched "951" variant such that the text makes sense, given
+/// assignments that Unicode has made after those encodings used Private Use
+/// Area characters.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static BIG5: &'static Encoding = &BIG5_INIT;

-/// The initializer for the EUC-JP encoding.
+/// The initializer for the [EUC-JP](static.EUC_JP.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -783,13 +810,26 @@ pub static EUC_JP_INIT: Encoding = Encoding {

 /// The EUC-JP encoding.
 ///
+/// This is the legacy Unix encoding for Japanese.
+///
+/// For compatibility with Web servers that don't expect three-byte sequences
+/// in form submissions, the encoder doesn't generate three-byte sequences.
+/// That is, the JIS X 0212 support is decode-only.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-jp.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-jp-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 20932. There are error
+/// handling differences and a handful of 2-byte sequences that decode differently.
+/// Additionall, Windows doesn't support 3-byte sequences.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static EUC_JP: &'static Encoding = &EUC_JP_INIT;

-/// The initializer for the EUC-KR encoding.
+/// The initializer for the [EUC-KR](static.EUC_KR.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -808,13 +848,24 @@ pub static EUC_KR_INIT: Encoding = Encoding {

 /// The EUC-KR encoding.
 ///
+/// This is the Korean encoding for Windows. It extends the Unix legacy encoding
+/// for Korean, based on KS X 1001 (which also formed the base of MacKorean on Mac OS
+/// Classic), with all the characters from the Hangul Syllables block of Unicode.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-kr.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-kr-bmp.html)
+///
+/// This encoding matches the Windows code page 949, except Windows decodes byte 0x80
+/// to U+0080 and some byte sequences that are error per the Encoding Standard to
+/// the question mark or the Private Use Area.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static EUC_KR: &'static Encoding = &EUC_KR_INIT;

-/// The initializer for the GBK encoding.
+/// The initializer for the [GBK](static.GBK.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -833,13 +884,30 @@ pub static GBK_INIT: Encoding = Encoding {

 /// The GBK encoding.
 ///
+/// The decoder for this encoding is the same as the decoder for gb18030.
+/// The encoder side of this encoding is GBK with Windows code page 936 euro
+/// sign behavior. GBK extends GB2312-80 to cover the CJK Unified Ideographs
+/// Unicode block as well as a handful of ideographs from the CJK Unified
+/// Ideographs Extension A and CJK Compatibility Ideographs blocks.
+///
+/// Unlike e.g. in the case of ISO-8859-1 and windows-1252, GBK encoder wasn't
+/// unified with the gb18030 encoder in the Encoding Standard out of concern
+/// that servers that expect GBK form submissions might not be able to handle
+/// the four-byte sequences.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// The encoder of this encoding roughly matches the Windows code page 936.
+/// The decoder side is a superset.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static GBK: &'static Encoding = &GBK_INIT;

-/// The initializer for the IBM866 encoding.
+/// The initializer for the [IBM866](static.IBM866.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -858,13 +926,22 @@ pub static IBM866_INIT: Encoding = Encoding {

 /// The IBM866 encoding.
 ///
+/// This the most notable one of the DOS Cyrillic code pages. It has the same
+/// box drawing characters as code page 437, so it can be used for decoding
+/// DOS-era ASCII + box drawing data.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/ibm866.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/ibm866-bmp.html)
+///
+/// This encoding matches the Windows code page 866.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static IBM866: &'static Encoding = &IBM866_INIT;

-/// The initializer for the ISO-2022-JP encoding.
+/// The initializer for the [ISO-2022-JP](static.ISO_2022_JP.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -883,13 +960,24 @@ pub static ISO_2022_JP_INIT: Encoding = Encoding {

 /// The ISO-2022-JP encoding.
 ///
+/// This the primary pre-UTF-8 encoding for Japanese email. It uses the ASCII
+/// byte range to encode non-Basic Latin characters. It's the only encoding
+/// supported by this crate whose encoder is stateful.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/jis0208.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/jis0208-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 50220. Notably, Windows
+/// uses U+30FB in place of the REPLACEMENT CHARACTER and otherwise differs in
+/// error handling.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_2022_JP: &'static Encoding = &ISO_2022_JP_INIT;

-/// The initializer for the ISO-8859-10 encoding.
+/// The initializer for the [ISO-8859-10](static.ISO_8859_10.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -908,13 +996,22 @@ pub static ISO_8859_10_INIT: Encoding = Encoding {

 /// The ISO-8859-10 encoding.
 ///
+/// This is the Nordic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 6.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-10.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-10-bmp.html)
+///
+/// The Windows code page number for this encoding is 28600, but kernel32.dll
+/// does not support this encoding.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_10: &'static Encoding = &ISO_8859_10_INIT;

-/// The initializer for the ISO-8859-13 encoding.
+/// The initializer for the [ISO-8859-13](static.ISO_8859_13.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -933,13 +1030,22 @@ pub static ISO_8859_13_INIT: Encoding = Encoding {

 /// The ISO-8859-13 encoding.
 ///
+/// This is the Baltic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 7.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-13.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-13-bmp.html)
+///
+/// This encoding matches the Windows code page 28603, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_13: &'static Encoding = &ISO_8859_13_INIT;

-/// The initializer for the ISO-8859-14 encoding.
+/// The initializer for the [ISO-8859-14](static.ISO_8859_14.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -958,13 +1064,22 @@ pub static ISO_8859_14_INIT: Encoding = Encoding {

 /// The ISO-8859-14 encoding.
 ///
+/// This is the Celtic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 8.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-14.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-14-bmp.html)
+///
+/// The Windows code page number for this encoding is 28604, but kernel32.dll
+/// does not support this encoding.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_14: &'static Encoding = &ISO_8859_14_INIT;

-/// The initializer for the ISO-8859-15 encoding.
+/// The initializer for the [ISO-8859-15](static.ISO_8859_15.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -983,13 +1098,21 @@ pub static ISO_8859_15_INIT: Encoding = Encoding {

 /// The ISO-8859-15 encoding.
 ///
+/// This is the revised Western European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 9.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-15.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-15-bmp.html)
+///
+/// This encoding matches the Windows code page 28605.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_15: &'static Encoding = &ISO_8859_15_INIT;

-/// The initializer for the ISO-8859-16 encoding.
+/// The initializer for the [ISO-8859-16](static.ISO_8859_16.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1008,13 +1131,22 @@ pub static ISO_8859_16_INIT: Encoding = Encoding {

 /// The ISO-8859-16 encoding.
 ///
+/// This is the South-Eastern European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 10.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-16.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-16-bmp.html)
+///
+/// The Windows code page number for this encoding is 28606, but kernel32.dll
+/// does not support this encoding.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_16: &'static Encoding = &ISO_8859_16_INIT;

-/// The initializer for the ISO-8859-2 encoding.
+/// The initializer for the [ISO-8859-2](static.ISO_8859_2.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1033,13 +1165,20 @@ pub static ISO_8859_2_INIT: Encoding = Encoding {

 /// The ISO-8859-2 encoding.
 ///
+/// This is the Central European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 2.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-2.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-2-bmp.html)
+///
+/// This encoding matches the Windows code page 28592.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_2: &'static Encoding = &ISO_8859_2_INIT;

-/// The initializer for the ISO-8859-3 encoding.
+/// The initializer for the [ISO-8859-3](static.ISO_8859_3.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1058,13 +1197,20 @@ pub static ISO_8859_3_INIT: Encoding = Encoding {

 /// The ISO-8859-3 encoding.
 ///
+/// This is the South European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 3.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-3.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-3-bmp.html)
+///
+/// This encoding matches the Windows code page 28593.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_3: &'static Encoding = &ISO_8859_3_INIT;

-/// The initializer for the ISO-8859-4 encoding.
+/// The initializer for the [ISO-8859-4](static.ISO_8859_4.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1083,13 +1229,20 @@ pub static ISO_8859_4_INIT: Encoding = Encoding {

 /// The ISO-8859-4 encoding.
 ///
+/// This is the North European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 4.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-4.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-4-bmp.html)
+///
+/// This encoding matches the Windows code page 28594.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_4: &'static Encoding = &ISO_8859_4_INIT;

-/// The initializer for the ISO-8859-5 encoding.
+/// The initializer for the [ISO-8859-5](static.ISO_8859_5.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1108,13 +1261,20 @@ pub static ISO_8859_5_INIT: Encoding = Encoding {

 /// The ISO-8859-5 encoding.
 ///
+/// This is the Cyrillic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-5-bmp.html)
+///
+/// This encoding matches the Windows code page 28595.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_5: &'static Encoding = &ISO_8859_5_INIT;

-/// The initializer for the ISO-8859-6 encoding.
+/// The initializer for the [ISO-8859-6](static.ISO_8859_6.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1133,13 +1293,21 @@ pub static ISO_8859_6_INIT: Encoding = Encoding {

 /// The ISO-8859-6 encoding.
 ///
+/// This is the Arabic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-6.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-6-bmp.html)
+///
+/// This encoding matches the Windows code page 28596, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_6: &'static Encoding = &ISO_8859_6_INIT;

-/// The initializer for the ISO-8859-7 encoding.
+/// The initializer for the [ISO-8859-7](static.ISO_8859_7.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1158,13 +1326,25 @@ pub static ISO_8859_7_INIT: Encoding = Encoding {

 /// The ISO-8859-7 encoding.
 ///
+/// This is the Greek part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-7.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-7-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28597. Windows decodes
+/// unassigned code points, the currency signs at 0xA4 and 0xA5 as well as
+/// 0xAA, which should be U+037A GREEK YPOGEGRAMMENI, to the Private Use Area
+/// of Unicode. Windows decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA
+/// instead of U+2018 LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER
+/// LETTER APOSTROPHE instead of U+2019 RIGHT SINGLE QUOTATION MARK.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_7: &'static Encoding = &ISO_8859_7_INIT;

-/// The initializer for the ISO-8859-8 encoding.
+/// The initializer for the [ISO-8859-8](static.ISO_8859_8.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1183,13 +1363,23 @@ pub static ISO_8859_8_INIT: Encoding = Encoding {

 /// The ISO-8859-8 encoding.
 ///
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in visual order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_8: &'static Encoding = &ISO_8859_8_INIT;

-/// The initializer for the ISO-8859-8-I encoding.
+/// The initializer for the [ISO-8859-8-I](static.ISO_8859_8_I.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1208,13 +1398,23 @@ pub static ISO_8859_8_I_INIT: Encoding = Encoding {

 /// The ISO-8859-8-I encoding.
 ///
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in logical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 38598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_8_I: &'static Encoding = &ISO_8859_8_I_INIT;

-/// The initializer for the KOI8-R encoding.
+/// The initializer for the [KOI8-R](static.KOI8_R.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1233,13 +1433,20 @@ pub static KOI8_R_INIT: Encoding = Encoding {

 /// The KOI8-R encoding.
 ///
+/// This is an encoding for Russian from [RFC 1489](https://tools.ietf.org/html/rfc1489).
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-r.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-r-bmp.html)
+///
+/// This encoding matches the Windows code page 20866.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static KOI8_R: &'static Encoding = &KOI8_R_INIT;

-/// The initializer for the KOI8-U encoding.
+/// The initializer for the [KOI8-U](static.KOI8_U.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1258,13 +1465,20 @@ pub static KOI8_U_INIT: Encoding = Encoding {

 /// The KOI8-U encoding.
 ///
+/// This is an encoding for Ukrainian adapted from KOI8-R.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-u.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-u-bmp.html)
+///
+/// This encoding matches the Windows code page 21866.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static KOI8_U: &'static Encoding = &KOI8_U_INIT;

-/// The initializer for the Shift_JIS encoding.
+/// The initializer for the [Shift_JIS](static.SHIFT_JIS.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1283,13 +1497,22 @@ pub static SHIFT_JIS_INIT: Encoding = Encoding {

 /// The Shift_JIS encoding.
 ///
+/// This is the Japanese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/shift_jis.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/shift_jis-bmp.html)
+///
+/// This encoding matches the Windows code page 932, except Windows decodes some byte
+/// sequences that are error per the Encoding Standard to the question mark or the
+/// Private Use Area and generally uses U+30FB in place of the REPLACEMENT CHARACTER.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static SHIFT_JIS: &'static Encoding = &SHIFT_JIS_INIT;

-/// The initializer for the UTF-16BE encoding.
+/// The initializer for the [UTF-16BE](static.UTF_16BE.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1308,13 +1531,22 @@ pub static UTF_16BE_INIT: Encoding = Encoding {

 /// The UTF-16BE encoding.
 ///
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the big endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1201.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static UTF_16BE: &'static Encoding = &UTF_16BE_INIT;

-/// The initializer for the UTF-16LE encoding.
+/// The initializer for the [UTF-16LE](static.UTF_16LE.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1333,13 +1565,22 @@ pub static UTF_16LE_INIT: Encoding = Encoding {

 /// The UTF-16LE encoding.
 ///
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the little endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1200.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static UTF_16LE: &'static Encoding = &UTF_16LE_INIT;

-/// The initializer for the UTF-8 encoding.
+/// The initializer for the [UTF-8](static.UTF_8.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1358,13 +1599,19 @@ pub static UTF_8_INIT: Encoding = Encoding {

 /// The UTF-8 encoding.
 ///
+/// This is the encoding that should be used for all new development it can
+/// represent all of Unicode.
+///
+/// This encoding matches the Windows code page 65001, except Windows differs
+/// in the number of errors generated for some erroneous byte sequences.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static UTF_8: &'static Encoding = &UTF_8_INIT;

-/// The initializer for the gb18030 encoding.
+/// The initializer for the [gb18030](static.GB18030.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1383,13 +1630,23 @@ pub static GB18030_INIT: Encoding = Encoding {

 /// The gb18030 encoding.
 ///
+/// This encoding matches GB18030-2005 except the two-byte sequence 0xA3 0xA0
+/// maps to U+3000 for compatibility with existing Web content. As a result,
+/// this encoding can represent all of Unicode except for the private-use
+/// character U+E5E5.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// This encoding matches the Windows code page 54936.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static GB18030: &'static Encoding = &GB18030_INIT;

-/// The initializer for the macintosh encoding.
+/// The initializer for the [macintosh](static.MACINTOSH.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1408,13 +1665,21 @@ pub static MACINTOSH_INIT: Encoding = Encoding {

 /// The macintosh encoding.
 ///
+/// This is the MacRoman encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/macintosh.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/macintosh-bmp.html)
+///
+/// This encoding matches the Windows code page 10000, except Windows decodes
+/// 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static MACINTOSH: &'static Encoding = &MACINTOSH_INIT;

-/// The initializer for the replacement encoding.
+/// The initializer for the [replacement](static.REPLACEMENT.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1433,13 +1698,24 @@ pub static REPLACEMENT_INIT: Encoding = Encoding {

 /// The replacement encoding.
 ///
+/// This decode-only encoding decodes all non-zero-length streams to a single
+/// REPLACEMENT CHARACTER. Its purpose is to avoid the use of an
+/// ASCII-compatible fallback encoding (typically windows-1252) for some
+/// encodings that are no longer supported by the Web Platform and that
+/// would be dangerous to treat as ASCII-compatible.
+///
+/// There is no corresponding encoder. The output encoding of this encoding
+/// is UTF-8.
+///
+/// This encoding does not have a Windows code page number.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static REPLACEMENT: &'static Encoding = &REPLACEMENT_INIT;

-/// The initializer for the windows-1250 encoding.
+/// The initializer for the [windows-1250](static.WINDOWS_1250.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1458,13 +1734,20 @@ pub static WINDOWS_1250_INIT: Encoding = Encoding {

 /// The windows-1250 encoding.
 ///
+/// This is the Central European encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1250.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1250-bmp.html)
+///
+/// This encoding matches the Windows code page 1250.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1250: &'static Encoding = &WINDOWS_1250_INIT;

-/// The initializer for the windows-1251 encoding.
+/// The initializer for the [windows-1251](static.WINDOWS_1251.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1483,13 +1766,20 @@ pub static WINDOWS_1251_INIT: Encoding = Encoding {

 /// The windows-1251 encoding.
 ///
+/// This is the Cyrillic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1251.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1251-bmp.html)
+///
+/// This encoding matches the Windows code page 1251.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1251: &'static Encoding = &WINDOWS_1251_INIT;

-/// The initializer for the windows-1252 encoding.
+/// The initializer for the [windows-1252](static.WINDOWS_1252.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1508,13 +1798,21 @@ pub static WINDOWS_1252_INIT: Encoding = Encoding {

 /// The windows-1252 encoding.
 ///
+/// This is the Western encoding for Windows. It is an extension of ISO-8859-1,
+/// which is known as Latin 1.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1252.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1252-bmp.html)
+///
+/// This encoding matches the Windows code page 1252.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1252: &'static Encoding = &WINDOWS_1252_INIT;

-/// The initializer for the windows-1253 encoding.
+/// The initializer for the [windows-1253](static.WINDOWS_1253.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1533,13 +1831,22 @@ pub static WINDOWS_1253_INIT: Encoding = Encoding {

 /// The windows-1253 encoding.
 ///
+/// This is the Greek encoding for Windows. It is mostly an extension of
+/// ISO-8859-7, but U+0386 is mapped to a different byte.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1253.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1253-bmp.html)
+///
+/// This encoding matches the Windows code page 1253, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1253: &'static Encoding = &WINDOWS_1253_INIT;

-/// The initializer for the windows-1254 encoding.
+/// The initializer for the [windows-1254](static.WINDOWS_1254.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1558,13 +1865,21 @@ pub static WINDOWS_1254_INIT: Encoding = Encoding {

 /// The windows-1254 encoding.
 ///
+/// This is the Turkish encoding for Windows. It is an extension of ISO-8859-9,
+/// which is known as Latin 5.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1254.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1254-bmp.html)
+///
+/// This encoding matches the Windows code page 1254.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1254: &'static Encoding = &WINDOWS_1254_INIT;

-/// The initializer for the windows-1255 encoding.
+/// The initializer for the [windows-1255](static.WINDOWS_1255.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1583,13 +1898,22 @@ pub static WINDOWS_1255_INIT: Encoding = Encoding {

 /// The windows-1255 encoding.
 ///
+/// This is the Hebrew encoding for Windows. It is an extension of ISO-8859-8-I,
+/// except for a currency sign swap.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1255.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1255-bmp.html)
+///
+/// This encoding matches the Windows code page 1255, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1255: &'static Encoding = &WINDOWS_1255_INIT;

-/// The initializer for the windows-1256 encoding.
+/// The initializer for the [windows-1256](static.WINDOWS_1256.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1608,13 +1932,20 @@ pub static WINDOWS_1256_INIT: Encoding = Encoding {

 /// The windows-1256 encoding.
 ///
+/// This is the Arabic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1256.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1256-bmp.html)
+///
+/// This encoding matches the Windows code page 1256.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1256: &'static Encoding = &WINDOWS_1256_INIT;

-/// The initializer for the windows-1257 encoding.
+/// The initializer for the [windows-1257](static.WINDOWS_1257.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1633,13 +1964,21 @@ pub static WINDOWS_1257_INIT: Encoding = Encoding {

 /// The windows-1257 encoding.
 ///
+/// This is the Baltic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1257.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1257-bmp.html)
+///
+/// This encoding matches the Windows code page 1257, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1257: &'static Encoding = &WINDOWS_1257_INIT;

-/// The initializer for the windows-1258 encoding.
+/// The initializer for the [windows-1258](static.WINDOWS_1258.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1658,13 +1997,25 @@ pub static WINDOWS_1258_INIT: Encoding = Encoding {

 /// The windows-1258 encoding.
 ///
+/// This is the Vietnamese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1258.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1258-bmp.html)
+///
+/// This encoding matches the Windows code page 1258 when used in the
+/// non-normalizing mode. Unlike with the other single-byte encodings, the
+/// result of decoding is not necessarily in Normalization Form C. On the
+/// other hand, input in the Normalization Form C is not encoded without
+/// replacement. In general, it's a bad idea to encode to encodings other
+/// than UTF-8, but this encoding is especially hazardous to encode to.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1258: &'static Encoding = &WINDOWS_1258_INIT;

-/// The initializer for the windows-874 encoding.
+/// The initializer for the [windows-874](static.WINDOWS_874.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1683,13 +2034,21 @@ pub static WINDOWS_874_INIT: Encoding = Encoding {

 /// The windows-874 encoding.
 ///
+/// This is the Thai encoding for Windows. It is an extension of TIS-620 / ISO-8859-11.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-874.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-874-bmp.html)
+///
+/// This encoding matches the Windows code page 874, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_874: &'static Encoding = &WINDOWS_874_INIT;

-/// The initializer for the x-mac-cyrillic encoding.
+/// The initializer for the [x-mac-cyrillic](static.X_MAC_CYRILLIC.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1708,13 +2067,20 @@ pub static X_MAC_CYRILLIC_INIT: Encoding = Encoding {

 /// The x-mac-cyrillic encoding.
 ///
+/// This is the MacUkrainian encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/x-mac-cyrillic.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/x-mac-cyrillic-bmp.html)
+///
+/// This encoding matches the Windows code page 10017.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static X_MAC_CYRILLIC: &'static Encoding = &X_MAC_CYRILLIC_INIT;

-/// The initializer for the x-user-defined encoding.
+/// The initializer for the [x-user-defined](static.X_USER_DEFINED.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1733,6 +2099,13 @@ pub static X_USER_DEFINED_INIT: Encoding = Encoding {

 /// The x-user-defined encoding.
 ///
+/// This encoding offsets the non-ASCII bytes by `0xF700` thereby decoding
+/// them to the Private Use Area of Unicode. It was used for loading binary
+/// data into a JavaScript string using `XMLHttpRequest` before XHR supported
+/// the `"arraybuffer"` response type.
+///
+/// This encoding does not have a Windows code page number.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
@@ -3347,7 +3720,8 @@ impl Decoder {
            | DecoderLifeCycle::AtUtf8Start
            | DecoderLifeCycle::AtUtf16LeStart
            | DecoderLifeCycle::AtUtf16BeStart => {
-                return self.variant
+                return self
+                    .variant
                    .max_utf8_buffer_length_without_replacement(byte_length)
            }
            DecoderLifeCycle::AtStart => {
@@ -3362,7 +3736,8 @@ impl Decoder {
                            // No need to consider the internal state of the underlying decoder,
                            // because it is at start, because no data has reached it yet.
                            return Some(utf_bom);
-                        } else if let Some(non_bom) = self.variant
+                        } else if let Some(non_bom) = self
+                            .variant
                            .max_utf8_buffer_length_without_replacement(byte_length)
                        {
                            return Some(std::cmp::max(utf_bom, non_bom));
@@ -195,9 +195,8 @@ macro_rules! by_unit_check_simd {
                    }
                    let mut simd_accu = $splat;
                    while offset <= len_minus_stride {
-                        simd_accu = simd_accu | unsafe {
-                            *(src.offset(offset as isize) as *const $simd_ty)
-                        };
+                        simd_accu = simd_accu
+                            | unsafe { *(src.offset(offset as isize) as *const $simd_ty) };
                        offset += SIMD_STRIDE_SIZE / unit_size;
                    }
                    if !$func(simd_accu) {
@@ -1279,7 +1278,9 @@ pub fn is_char_bidi(c: char) -> bool {
        // Above Arabic Extended-A and below Arabic Presentation Forms
        if in_inclusive_range32(code_point, 0x200F, 0x2067) {
            // In the range that contains the RTL controls
-            return code_point == 0x200F || code_point == 0x202B || code_point == 0x202E
+            return code_point == 0x200F
+                || code_point == 0x202B
+                || code_point == 0x202E
                || code_point == 0x2067;
        }
        return false;
@@ -1514,7 +1515,8 @@ pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
                // Three-byte
                let second = bytes[read + 1];
                let third = bytes[read + 2];
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                    | (third as u32 & 0x3Fu32);
                dst[written] = point as u16;
                read += 3;
@@ -1524,7 +1526,8 @@ pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
                let second = bytes[read + 1];
                let third = bytes[read + 2];
                let fourth = bytes[read + 3];
-                let point = (((byte as u32) & 0x7u32) << 18) | ((second as u32 & 0x3Fu32) << 12)
+                let point = (((byte as u32) & 0x7u32) << 18)
+                    | ((second as u32 & 0x3Fu32) << 12)
                    | ((third as u32 & 0x3Fu32) << 6)
                    | (fourth as u32 & 0x3Fu32);
                dst[written] = (0xD7C0 + (point >> 10)) as u16;
@@ -248,7 +248,8 @@ impl ShiftJisEncoder {
                            10716 + bmp_minus_roman as usize
                        } else if let Some(pointer) = jis0208_range_encode(bmp) {
                            pointer
-                        } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929
+                        } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D)
+                            || bmp == 0xF929
                            || bmp == 0xF9DC
                        {
                            // Guaranteed to be found in IBM_KANJI
@@ -277,10 +277,15 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {

    // Quick refutation failed. Let's do the full check.

-    (in_range16x8!(s, 0x0590, 0x0900) | in_range16x8!(s, 0xFB50, 0xFE00)
-        | in_range16x8!(s, 0xFE70, 0xFF00) | in_range16x8!(s, 0xD802, 0xD804)
-        | in_range16x8!(s, 0xD83A, 0xD83C) | s.eq(u16x8::splat(0x200F))
-        | s.eq(u16x8::splat(0x202B)) | s.eq(u16x8::splat(0x202E)) | s.eq(u16x8::splat(0x2067)))
+    (in_range16x8!(s, 0x0590, 0x0900)
+        | in_range16x8!(s, 0xFB50, 0xFE00)
+        | in_range16x8!(s, 0xFE70, 0xFF00)
+        | in_range16x8!(s, 0xD802, 0xD804)
+        | in_range16x8!(s, 0xD83A, 0xD83C)
+        | s.eq(u16x8::splat(0x200F))
+        | s.eq(u16x8::splat(0x202B))
+        | s.eq(u16x8::splat(0x202E))
+        | s.eq(u16x8::splat(0x2067)))
        .any()
 }

@@ -29,11 +29,9 @@ impl Utf16Decoder {
    }

    pub fn additional_from_state(&self) -> usize {
-        1 + if self.lead_byte.is_some() { 1 } else { 0 } + if self.lead_surrogate == 0 {
-            0
-        } else {
-            2
-        }
+        1
+            + if self.lead_byte.is_some() { 1 } else { 0 }
+            + if self.lead_surrogate == 0 { 0 } else { 2 }
    }

    pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
@@ -372,7 +372,8 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
                {
                    break 'outer;
                }
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                    | (third as u32 & 0x3Fu32);
                dst[written] = point as u16;
                read = new_read;
@@ -393,7 +394,8 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
                {
                    break 'outer;
                }
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                    | (third as u32 & 0x3Fu32);
                dst[written] = point as u16;
                read = new_read;
@@ -414,7 +416,8 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
                {
                    break 'outer;
                }
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                    | (third as u32 & 0x3Fu32);
                dst[written] = point as u16;
                read = new_read;