diff --git a/doc/Big5.txt b/doc/Big5.txt
new file mode 100644
index 0000000..61e8fd5
--- /dev/null
+++ b/doc/Big5.txt
@@ -0,0 +1,16 @@
+/// This is Big5 with HKSCS with mappings to more recent Unicode assignments
+/// instead of the Private Use Area code points that have been used historically.
+/// It is believed to be able to decode existing Web content in a way that makes
+/// sense.
+///
+/// To avoid form submissions generating data that Web servers don't understand,
+/// the encoder doesn't use the HKSCS byte sequences that precede the unextended
+/// Big5 in the lexical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/big5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/big5-bmp.html)
+///
+/// This encoding is designed to be suited for decoding the Windows code page 950
+/// and its HKSCS patched "951" variant such that the text makes sense, given
+/// assignments that Unicode has made after those encodings used Private Use
+/// Area characters.
diff --git a/doc/EUC-JP.txt b/doc/EUC-JP.txt
new file mode 100644
index 0000000..f90a735
--- /dev/null
+++ b/doc/EUC-JP.txt
@@ -0,0 +1,12 @@
+/// This is the legacy Unix encoding for Japanese.
+///
+/// For compatibility with Web servers that don't expect three-byte sequences
+/// in form submissions, the encoder doesn't generate three-byte sequences.
+/// That is, the JIS X 0212 support is decode-only.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-jp.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-jp-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 20932. There are error
+/// handling differences and a handful of 2-byte sequences that decode differently.
+/// Additionall, Windows doesn't support 3-byte sequences.
diff --git a/doc/EUC-KR.txt b/doc/EUC-KR.txt
new file mode 100644
index 0000000..ef24c98
--- /dev/null
+++ b/doc/EUC-KR.txt
@@ -0,0 +1,10 @@
+/// This is the Korean encoding for Windows. It extends the Unix legacy encoding
+/// for Korean, based on KS X 1001 (which also formed the base of MacKorean on Mac OS
+/// Classic), with all the characters from the Hangul Syllables block of Unicode.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-kr.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-kr-bmp.html)
+///
+/// This encoding matches the Windows code page 949, except Windows decodes byte 0x80
+/// to U+0080 and some byte sequences that are error per the Encoding Standard to
+/// the question mark or the Private Use Area.
diff --git a/doc/GBK.txt b/doc/GBK.txt
new file mode 100644
index 0000000..2faefff
--- /dev/null
+++ b/doc/GBK.txt
@@ -0,0 +1,16 @@
+/// The decoder for this encoding is the same as the decoder for gb18030.
+/// The encoder side of this encoding is GBK with Windows code page 936 euro
+/// sign behavior. GBK extends GB2312-80 to cover the CJK Unified Ideographs
+/// Unicode block as well as a handful of ideographs from the CJK Unified
+/// Ideographs Extension A and CJK Compatibility Ideographs blocks.
+///
+/// Unlike e.g. in the case of ISO-8859-1 and windows-1252, GBK encoder wasn't
+/// unified with the gb18030 encoder in the Encoding Standard out of concern
+/// that servers that expect GBK form submissions might not be able to handle
+/// the four-byte sequences.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// The encoder of this encoding roughly matches the Windows code page 936.
+/// The decoder side is a superset.
diff --git a/doc/IBM866.txt b/doc/IBM866.txt
new file mode 100644
index 0000000..871ff42
--- /dev/null
+++ b/doc/IBM866.txt
@@ -0,0 +1,8 @@
+/// This the most notable one of the DOS Cyrillic code pages. It has the same
+/// box drawing characters as code page 437, so it can be used for decoding
+/// DOS-era ASCII + box drawing data.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/ibm866.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/ibm866-bmp.html)
+///
+/// This encoding matches the Windows code page 866.
diff --git a/doc/ISO-2022-JP.txt b/doc/ISO-2022-JP.txt
new file mode 100644
index 0000000..65713a1
--- /dev/null
+++ b/doc/ISO-2022-JP.txt
@@ -0,0 +1,10 @@
+/// This the primary pre-UTF-8 encoding for Japanese email. It uses the ASCII
+/// byte range to encode non-Basic Latin characters. It's the only encoding
+/// supported by this crate whose encoder is stateful.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/jis0208.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/jis0208-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 50220. Notably, Windows
+/// uses U+30FB in place of the REPLACEMENT CHARACTER and otherwise differs in
+/// error handling.
diff --git a/doc/ISO-8859-10.txt b/doc/ISO-8859-10.txt
new file mode 100644
index 0000000..8aca388
--- /dev/null
+++ b/doc/ISO-8859-10.txt
@@ -0,0 +1,8 @@
+/// This is the Nordic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 6.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-10.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-10-bmp.html)
+///
+/// The Windows code page number for this encoding is 28600, but kernel32.dll
+/// does not support this encoding.
diff --git a/doc/ISO-8859-13.txt b/doc/ISO-8859-13.txt
new file mode 100644
index 0000000..20cd549
--- /dev/null
+++ b/doc/ISO-8859-13.txt
@@ -0,0 +1,8 @@
+/// This is the Baltic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 7.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-13.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-13-bmp.html)
+///
+/// This encoding matches the Windows code page 28603, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
diff --git a/doc/ISO-8859-14.txt b/doc/ISO-8859-14.txt
new file mode 100644
index 0000000..3e4833b
--- /dev/null
+++ b/doc/ISO-8859-14.txt
@@ -0,0 +1,8 @@
+/// This is the Celtic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 8.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-14.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-14-bmp.html)
+///
+/// The Windows code page number for this encoding is 28604, but kernel32.dll
+/// does not support this encoding.
diff --git a/doc/ISO-8859-15.txt b/doc/ISO-8859-15.txt
new file mode 100644
index 0000000..922896a
--- /dev/null
+++ b/doc/ISO-8859-15.txt
@@ -0,0 +1,7 @@
+/// This is the revised Western European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 9.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-15.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-15-bmp.html)
+///
+/// This encoding matches the Windows code page 28605.
diff --git a/doc/ISO-8859-16.txt b/doc/ISO-8859-16.txt
new file mode 100644
index 0000000..d1ae50b
--- /dev/null
+++ b/doc/ISO-8859-16.txt
@@ -0,0 +1,8 @@
+/// This is the South-Eastern European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 10.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-16.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-16-bmp.html)
+///
+/// The Windows code page number for this encoding is 28606, but kernel32.dll
+/// does not support this encoding.
diff --git a/doc/ISO-8859-2.txt b/doc/ISO-8859-2.txt
new file mode 100644
index 0000000..298df09
--- /dev/null
+++ b/doc/ISO-8859-2.txt
@@ -0,0 +1,6 @@
+/// This is the Central European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 2.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-2.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-2-bmp.html)
+///
+/// This encoding matches the Windows code page 28592.
diff --git a/doc/ISO-8859-3.txt b/doc/ISO-8859-3.txt
new file mode 100644
index 0000000..c462ce8
--- /dev/null
+++ b/doc/ISO-8859-3.txt
@@ -0,0 +1,6 @@
+/// This is the South European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 3.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-3.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-3-bmp.html)
+///
+/// This encoding matches the Windows code page 28593.
diff --git a/doc/ISO-8859-4.txt b/doc/ISO-8859-4.txt
new file mode 100644
index 0000000..40449c4
--- /dev/null
+++ b/doc/ISO-8859-4.txt
@@ -0,0 +1,6 @@
+/// This is the North European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 4.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-4.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-4-bmp.html)
+///
+/// This encoding matches the Windows code page 28594.
diff --git a/doc/ISO-8859-5.txt b/doc/ISO-8859-5.txt
new file mode 100644
index 0000000..41774ec
--- /dev/null
+++ b/doc/ISO-8859-5.txt
@@ -0,0 +1,6 @@
+/// This is the Cyrillic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-5-bmp.html)
+///
+/// This encoding matches the Windows code page 28595.
diff --git a/doc/ISO-8859-6.txt b/doc/ISO-8859-6.txt
new file mode 100644
index 0000000..4c70c22
--- /dev/null
+++ b/doc/ISO-8859-6.txt
@@ -0,0 +1,7 @@
+/// This is the Arabic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-6.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-6-bmp.html)
+///
+/// This encoding matches the Windows code page 28596, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
diff --git a/doc/ISO-8859-7.txt b/doc/ISO-8859-7.txt
new file mode 100644
index 0000000..b78ed38
--- /dev/null
+++ b/doc/ISO-8859-7.txt
@@ -0,0 +1,11 @@
+/// This is the Greek part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-7.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-7-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28597. Windows decodes
+/// unassigned code points, the currency signs at 0xA4 and 0xA5 as well as
+/// 0xAA, which should be U+037A GREEK YPOGEGRAMMENI, to the Private Use Area
+/// of Unicode. Windows decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA
+/// instead of U+2018 LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER
+/// LETTER APOSTROPHE instead of U+2019 RIGHT SINGLE QUOTATION MARK.
diff --git a/doc/ISO-8859-8-I.txt b/doc/ISO-8859-8-I.txt
new file mode 100644
index 0000000..b73e572
--- /dev/null
+++ b/doc/ISO-8859-8-I.txt
@@ -0,0 +1,9 @@
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in logical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 38598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
diff --git a/doc/ISO-8859-8.txt b/doc/ISO-8859-8.txt
new file mode 100644
index 0000000..c5600e3
--- /dev/null
+++ b/doc/ISO-8859-8.txt
@@ -0,0 +1,9 @@
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in visual order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
diff --git a/doc/KOI8-R.txt b/doc/KOI8-R.txt
new file mode 100644
index 0000000..46dcfe7
--- /dev/null
+++ b/doc/KOI8-R.txt
@@ -0,0 +1,6 @@
+/// This is an encoding for Russian from [RFC 1489](https://tools.ietf.org/html/rfc1489).
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-r.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-r-bmp.html)
+///
+/// This encoding matches the Windows code page 20866.
diff --git a/doc/KOI8-U.txt b/doc/KOI8-U.txt
new file mode 100644
index 0000000..a263745
--- /dev/null
+++ b/doc/KOI8-U.txt
@@ -0,0 +1,6 @@
+/// This is an encoding for Ukrainian adapted from KOI8-R.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-u.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-u-bmp.html)
+///
+/// This encoding matches the Windows code page 21866.
diff --git a/doc/Shift_JIS.txt b/doc/Shift_JIS.txt
new file mode 100644
index 0000000..b982ab5
--- /dev/null
+++ b/doc/Shift_JIS.txt
@@ -0,0 +1,8 @@
+/// This is the Japanese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/shift_jis.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/shift_jis-bmp.html)
+///
+/// This encoding matches the Windows code page 932, except Windows decodes some byte
+/// sequences that are error per the Encoding Standard to the question mark or the
+/// Private Use Area and generally uses U+30FB in place of the REPLACEMENT CHARACTER.
diff --git a/doc/UTF-16BE.txt b/doc/UTF-16BE.txt
new file mode 100644
index 0000000..0a7df99
--- /dev/null
+++ b/doc/UTF-16BE.txt
@@ -0,0 +1,8 @@
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the big endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1201.
diff --git a/doc/UTF-16LE.txt b/doc/UTF-16LE.txt
new file mode 100644
index 0000000..3a98e8b
--- /dev/null
+++ b/doc/UTF-16LE.txt
@@ -0,0 +1,8 @@
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the little endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1200.
diff --git a/doc/UTF-8.txt b/doc/UTF-8.txt
new file mode 100644
index 0000000..3a93e67
--- /dev/null
+++ b/doc/UTF-8.txt
@@ -0,0 +1,5 @@
+/// This is the encoding that should be used for all new development it can
+/// represent all of Unicode.
+///
+/// This encoding matches the Windows code page 65001, except Windows differs
+/// in the number of errors generated for some erroneous byte sequences.
diff --git a/doc/gb18030.txt b/doc/gb18030.txt
new file mode 100644
index 0000000..572a593
--- /dev/null
+++ b/doc/gb18030.txt
@@ -0,0 +1,9 @@
+/// This encoding matches GB18030-2005 except the two-byte sequence 0xA3 0xA0
+/// maps to U+3000 for compatibility with existing Web content. As a result,
+/// this encoding can represent all of Unicode except for the private-use
+/// character U+E5E5.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// This encoding matches the Windows code page 54936.
diff --git a/doc/macintosh.txt b/doc/macintosh.txt
new file mode 100644
index 0000000..d00fece
--- /dev/null
+++ b/doc/macintosh.txt
@@ -0,0 +1,7 @@
+/// This is the MacRoman encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/macintosh.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/macintosh-bmp.html)
+///
+/// This encoding matches the Windows code page 10000, except Windows decodes
+/// 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
diff --git a/doc/replacement.txt b/doc/replacement.txt
new file mode 100644
index 0000000..2398df0
--- /dev/null
+++ b/doc/replacement.txt
@@ -0,0 +1,10 @@
+/// This decode-only encoding decodes all non-zero-length streams to a single
+/// REPLACEMENT CHARACTER. Its purpose is to avoid the use of an
+/// ASCII-compatible fallback encoding (typically windows-1252) for some
+/// encodings that are no longer supported by the Web Platform and that
+/// would be dangerous to treat as ASCII-compatible.
+///
+/// There is no corresponding encoder. The output encoding of this encoding
+/// is UTF-8.
+///
+/// This encoding does not have a Windows code page number.
diff --git a/doc/windows-1250.txt b/doc/windows-1250.txt
new file mode 100644
index 0000000..96e38ef
--- /dev/null
+++ b/doc/windows-1250.txt
@@ -0,0 +1,6 @@
+/// This is the Central European encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1250.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1250-bmp.html)
+///
+/// This encoding matches the Windows code page 1250.
diff --git a/doc/windows-1251.txt b/doc/windows-1251.txt
new file mode 100644
index 0000000..9645611
--- /dev/null
+++ b/doc/windows-1251.txt
@@ -0,0 +1,6 @@
+/// This is the Cyrillic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1251.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1251-bmp.html)
+///
+/// This encoding matches the Windows code page 1251.
diff --git a/doc/windows-1252.txt b/doc/windows-1252.txt
new file mode 100644
index 0000000..d613fbe
--- /dev/null
+++ b/doc/windows-1252.txt
@@ -0,0 +1,7 @@
+/// This is the Western encoding for Windows. It is an extension of ISO-8859-1,
+/// which is known as Latin 1.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1252.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1252-bmp.html)
+///
+/// This encoding matches the Windows code page 1252.
diff --git a/doc/windows-1253.txt b/doc/windows-1253.txt
new file mode 100644
index 0000000..edcacd9
--- /dev/null
+++ b/doc/windows-1253.txt
@@ -0,0 +1,8 @@
+/// This is the Greek encoding for Windows. It is mostly an extension of
+/// ISO-8859-7, but U+0386 is mapped to a different byte.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1253.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1253-bmp.html)
+///
+/// This encoding matches the Windows code page 1253, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
diff --git a/doc/windows-1254.txt b/doc/windows-1254.txt
new file mode 100644
index 0000000..26491a9
--- /dev/null
+++ b/doc/windows-1254.txt
@@ -0,0 +1,7 @@
+/// This is the Turkish encoding for Windows. It is an extension of ISO-8859-9,
+/// which is known as Latin 5.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1254.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1254-bmp.html)
+///
+/// This encoding matches the Windows code page 1254.
diff --git a/doc/windows-1255.txt b/doc/windows-1255.txt
new file mode 100644
index 0000000..cbcf86d
--- /dev/null
+++ b/doc/windows-1255.txt
@@ -0,0 +1,8 @@
+/// This is the Hebrew encoding for Windows. It is an extension of ISO-8859-8-I,
+/// except for a currency sign swap.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1255.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1255-bmp.html)
+///
+/// This encoding matches the Windows code page 1255, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
diff --git a/doc/windows-1256.txt b/doc/windows-1256.txt
new file mode 100644
index 0000000..38bf2ef
--- /dev/null
+++ b/doc/windows-1256.txt
@@ -0,0 +1,6 @@
+/// This is the Arabic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1256.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1256-bmp.html)
+///
+/// This encoding matches the Windows code page 1256.
diff --git a/doc/windows-1257.txt b/doc/windows-1257.txt
new file mode 100644
index 0000000..fc3fad2
--- /dev/null
+++ b/doc/windows-1257.txt
@@ -0,0 +1,7 @@
+/// This is the Baltic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1257.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1257-bmp.html)
+///
+/// This encoding matches the Windows code page 1257, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
diff --git a/doc/windows-1258.txt b/doc/windows-1258.txt
new file mode 100644
index 0000000..1ae5bbb
--- /dev/null
+++ b/doc/windows-1258.txt
@@ -0,0 +1,11 @@
+/// This is the Vietnamese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1258.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1258-bmp.html)
+///
+/// This encoding matches the Windows code page 1258 when used in the
+/// non-normalizing mode. Unlike with the other single-byte encodings, the
+/// result of decoding is not necessarily in Normalization Form C. On the
+/// other hand, input in the Normalization Form C is not encoded without
+/// replacement. In general, it's a bad idea to encode to encodings other
+/// than UTF-8, but this encoding is especially hazardous to encode to.
diff --git a/doc/windows-874.txt b/doc/windows-874.txt
new file mode 100644
index 0000000..ddbc711
--- /dev/null
+++ b/doc/windows-874.txt
@@ -0,0 +1,7 @@
+/// This is the Thai encoding for Windows. It is an extension of TIS-620 / ISO-8859-11.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-874.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-874-bmp.html)
+///
+/// This encoding matches the Windows code page 874, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
diff --git a/doc/x-mac-cyrillic.txt b/doc/x-mac-cyrillic.txt
new file mode 100644
index 0000000..b5519a1
--- /dev/null
+++ b/doc/x-mac-cyrillic.txt
@@ -0,0 +1,6 @@
+/// This is the MacUkrainian encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/x-mac-cyrillic.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/x-mac-cyrillic-bmp.html)
+///
+/// This encoding matches the Windows code page 10017.
diff --git a/doc/x-user-defined.txt b/doc/x-user-defined.txt
new file mode 100644
index 0000000..e00ddc6
--- /dev/null
+++ b/doc/x-user-defined.txt
@@ -0,0 +1,6 @@
+/// This encoding offsets the non-ASCII bytes by `0xF700` thereby decoding
+/// them to the Private Use Area of Unicode. It was used for loading binary
+/// data into a JavaScript string using `XMLHttpRequest` before XHR supported
+/// the `"arraybuffer"` response type.
+///
+/// This encoding does not have a Windows code page number.
diff --git a/generate-encoding-data.py b/generate-encoding-data.py
index 0b38e90..7c17b18 100644
--- a/generate-encoding-data.py
+++ b/generate-encoding-data.py
@@ -33,6 +33,13 @@ class Label:
   def __cmp__(self, other):
     return cmp_from_end(self.label, other.label)
 
+class CodePage:
+  def __init__(self, code_page, preferred):
+    self.code_page = code_page
+    self.preferred = preferred
+  def __cmp__(self, other):
+    return self.code_page, other.code_page
+
 def static_u16_table(name, data):
   data_file.write('''pub static %s: [u16; %d] = [
   ''' % (name, len(data)))
@@ -82,6 +89,8 @@ single_byte = []
 
 multi_byte = []
 
+code_pages = []
+
 def to_camel_name(name):
   if name == u"iso-8859-8-i":
     return u"Iso8I"
@@ -98,6 +107,66 @@ def to_snake_name(name):
 def to_dom_name(name):
   return name
 
+encodings_by_code_page = {
+  932: "Shift_JIS",
+  936: "GBK",
+  949: "EUC-KR",
+  950: "Big5",
+  866: "IBM866",
+  874: "windows-874",
+  1200: "UTF-16LE",
+  1201: "UTF-16BE",
+  1250: "windows-1250",
+  1251: "windows-1251",
+  1252: "windows-1252",
+  1253: "windows-1253",
+  1254: "windows-1254",
+  1255: "windows-1255",
+  1256: "windows-1256",
+  1257: "windows-1257",
+  1258: "windows-1258",
+  10000: "macintosh",
+  10017: "x-mac-cyrillic",
+  20866: "KOI8-R",
+  20932: "EUC-JP",
+  21866: "KOI8-U",
+  28592: "ISO-8859-2",
+  28593: "ISO-8859-3",
+  28594: "ISO-8859-4",
+  28595: "ISO-8859-5",
+  28596: "ISO-8859-6",
+  28597: "ISO-8859-7",
+  28598: "ISO-8859-8",
+  28600: "ISO-8859-10",
+  28603: "ISO-8859-13",
+  28604: "ISO-8859-14",
+  28605: "ISO-8859-15",
+  28606: "ISO-8859-16",
+  38598: "ISO-8859-8-I",
+  50221: "ISO-2022-JP",
+  54936: "gb18030",
+  65001: "UTF-8",
+}
+
+code_pages_by_encoding = {}
+
+for code_page, encoding in encodings_by_code_page.iteritems():
+  code_pages_by_encoding[encoding] = code_page
+
+encoding_by_alias_code_page = {
+  951: "Big5",
+  20936: "GBK",
+  20949: "EUC-KR",
+  28591: "windows-1252",
+  28599: "windows-1254",
+  28601: "windows-847",
+  50220: "ISO-2022-JP",
+  50222: "ISO-2022-JP",
+  51949: "EUC-JP",
+  51936: "GBK",
+  51949: "EUC-KR",
+}
+
 #
 
 for group in data:
@@ -177,7 +246,11 @@ for name in preferred:
   else:
     variant = to_camel_name(name)
 
-  label_file.write('''/// The initializer for the %s encoding.
+  docfile = open("doc/%s.txt" % name, "r")
+  doctext = docfile.read()
+  docfile.close()
+
+  label_file.write('''/// The initializer for the [%s](static.%s.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -196,13 +269,14 @@ pub static %s_INIT: Encoding = Encoding {
 
 /// The %s encoding.
 ///
+%s///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static %s: &'static Encoding = &%s_INIT;
 
-''' % (to_dom_name(name), to_constant_name(name), to_dom_name(name), variant, to_dom_name(name), to_constant_name(name), to_constant_name(name)))
+''' % (to_dom_name(name), to_constant_name(name), to_constant_name(name), to_dom_name(name), variant, to_dom_name(name), doctext, to_constant_name(name), to_constant_name(name)))
 
 label_file.write("""static LABELS_SORTED: [&'static str; %d] = [
 """ % len(labels))
diff --git a/src/euc_jp.rs b/src/euc_jp.rs
index 9857989..ea9d515 100644
--- a/src/euc_jp.rs
+++ b/src/euc_jp.rs
@@ -286,7 +286,8 @@ impl EucJpEncoder {
                         let lead = (pointer / 94) + 0xA1;
                         let trail = (pointer % 94) + 0xA1;
                         handle.write_two(lead as u8, trail as u8)
-                    } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929
+                    } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D)
+                        || bmp == 0xF929
                         || bmp == 0xF9DC
                     {
                         // Guaranteed to be found in IBM_KANJI
diff --git a/src/euc_kr.rs b/src/euc_kr.rs
index d27a1ef..51939d1 100644
--- a/src/euc_kr.rs
+++ b/src/euc_kr.rs
@@ -205,7 +205,8 @@ fn ksx1001_encode_misc(bmp: u16) -> Option<(usize, usize)> {
             return Some((0x81 + 0x25, 0xA1 + pos));
         }
     }
-    if in_inclusive_range16(bmp, 0x2015, 0x266D) || in_inclusive_range16(bmp, 0x321C, 0x33D8)
+    if in_inclusive_range16(bmp, 0x2015, 0x266D)
+        || in_inclusive_range16(bmp, 0x321C, 0x33D8)
         || in_inclusive_range16(bmp, 0xFF3C, 0xFFE5)
         || in_inclusive_range16(bmp, 0x00A1, 0x00F7)
         || in_inclusive_range16(bmp, 0x02C7, 0x02DD)
diff --git a/src/handles.rs b/src/handles.rs
index be481c5..5b46d14 100644
--- a/src/handles.rs
+++ b/src/handles.rs
@@ -1477,12 +1477,14 @@ impl<'a> Utf8Source<'a> {
             return unsafe { ::std::mem::transmute(point) };
         }
         if unit < 0xF0u32 {
-            let point = ((unit & 0xFu32) << 12) | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 6)
+            let point = ((unit & 0xFu32) << 12)
+                | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 6)
                 | (self.slice[self.pos + 2] as u32 & 0x3Fu32);
             self.pos += 3;
             return unsafe { ::std::mem::transmute(point) };
         }
-        let point = ((unit & 0x7u32) << 18) | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 12)
+        let point = ((unit & 0x7u32) << 18)
+            | ((self.slice[self.pos + 1] as u32 & 0x3Fu32) << 12)
             | ((self.slice[self.pos + 2] as u32 & 0x3Fu32) << 6)
             | (self.slice[self.pos + 3] as u32 & 0x3Fu32);
         self.pos += 4;
diff --git a/src/iso_2022_jp.rs b/src/iso_2022_jp.rs
index 32a088a..23c53ff 100644
--- a/src/iso_2022_jp.rs
+++ b/src/iso_2022_jp.rs
@@ -667,7 +667,8 @@ impl Iso2022JpEncoder {
                                 let trail = (pointer % 94) + 0x21;
                                 handle.write_two(lead as u8, trail as u8);
                                 continue;
-                            } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929
+                            } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D)
+                                || bmp == 0xF929
                                 || bmp == 0xF9DC
                             {
                                 // Guaranteed to be found in IBM_KANJI
diff --git a/src/lib.rs b/src/lib.rs
index dd608c7..e2e9af2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -561,8 +561,14 @@
 //! <tr><th>Encoding</th><th>Code Page</th><th>PUA</th><th>Remarks</th></tr>
 //! </thead>
 //! <tbody>
+//! <tr><td>Shift_JIS</td><td>932</td><td></td><td></td></tr>
+//! <tr><td>GBK</td><td>936</td><td></td><td></td></tr>
+//! <tr><td>EUC-KR</td><td>949</td><td></td><td></td></tr>
+//! <tr><td>Big5</td><td>950</td><td></td><td></td></tr>
 //! <tr><td>IBM866</td><td>866</td><td></td><td></td></tr>
 //! <tr><td>windows-874</td><td>874</td><td>&bullet;</td><td></td></tr>
+//! <tr><td>UTF-16LE</td><td>1200</td><td></td><td></td></tr>
+//! <tr><td>UTF-16BE</td><td>1201</td><td></td><td></td></tr>
 //! <tr><td>windows-1250</td><td>1250</td><td></td><td></td></tr>
 //! <tr><td>windows-1251</td><td>1251</td><td></td><td></td></tr>
 //! <tr><td>windows-1252</td><td>1252</td><td></td><td></td></tr>
@@ -575,6 +581,7 @@
 //! <tr><td>macintosh</td><td>10000</td><td></td><td>1</td></tr>
 //! <tr><td>x-mac-cyrillic</td><td>10017</td><td></td><td>2</td></tr>
 //! <tr><td>KOI8-R</td><td>20866</td><td></td><td></td></tr>
+//! <tr><td>EUC-JP</td><td>20932</td><td></td><td></td></tr>
 //! <tr><td>KOI8-U</td><td>21866</td><td></td><td></td></tr>
 //! <tr><td>ISO-8859-2</td><td>28592</td><td></td><td></td></tr>
 //! <tr><td>ISO-8859-3</td><td>28593</td><td></td><td></td></tr>
@@ -586,6 +593,9 @@
 //! <tr><td>ISO-8859-13</td><td>28603</td><td>&bullet;</td><td></td></tr>
 //! <tr><td>ISO-8859-15</td><td>28605</td><td></td><td></td></tr>
 //! <tr><td>ISO-8859-8-I</td><td>38598</td><td></td><td>5</td></tr>
+//! <tr><td>ISO-2022-JP</td><td>50220</td><td></td><td></td></tr>
+//! <tr><td>gb18030</td><td>54936</td><td></td><td></td></tr>
+//! <tr><td>UTF-8</td><td>65001</td><td></td><td></td></tr>
 //! </tbody>
 //! </table>
 //!
@@ -739,7 +749,7 @@ const NCR_EXTRA: usize = 10; // &#1114111;
 
 const LONGEST_LABEL_LENGTH: usize = 19; // cseucpkdfmtjapanese
 
-/// The initializer for the Big5 encoding.
+/// The initializer for the [Big5](static.BIG5.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -758,13 +768,30 @@ pub static BIG5_INIT: Encoding = Encoding {
 
 /// The Big5 encoding.
 ///
+/// This is Big5 with HKSCS with mappings to more recent Unicode assignments
+/// instead of the Private Use Area code points that have been used historically.
+/// It is believed to be able to decode existing Web content in a way that makes
+/// sense.
+///
+/// To avoid form submissions generating data that Web servers don't understand,
+/// the encoder doesn't use the HKSCS byte sequences that precede the unextended
+/// Big5 in the lexical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/big5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/big5-bmp.html)
+///
+/// This encoding is designed to be suited for decoding the Windows code page 950
+/// and its HKSCS patched "951" variant such that the text makes sense, given
+/// assignments that Unicode has made after those encodings used Private Use
+/// Area characters.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static BIG5: &'static Encoding = &BIG5_INIT;
 
-/// The initializer for the EUC-JP encoding.
+/// The initializer for the [EUC-JP](static.EUC_JP.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -783,13 +810,26 @@ pub static EUC_JP_INIT: Encoding = Encoding {
 
 /// The EUC-JP encoding.
 ///
+/// This is the legacy Unix encoding for Japanese.
+///
+/// For compatibility with Web servers that don't expect three-byte sequences
+/// in form submissions, the encoder doesn't generate three-byte sequences.
+/// That is, the JIS X 0212 support is decode-only.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-jp.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-jp-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 20932. There are error
+/// handling differences and a handful of 2-byte sequences that decode differently.
+/// Additionall, Windows doesn't support 3-byte sequences.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static EUC_JP: &'static Encoding = &EUC_JP_INIT;
 
-/// The initializer for the EUC-KR encoding.
+/// The initializer for the [EUC-KR](static.EUC_KR.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -808,13 +848,24 @@ pub static EUC_KR_INIT: Encoding = Encoding {
 
 /// The EUC-KR encoding.
 ///
+/// This is the Korean encoding for Windows. It extends the Unix legacy encoding
+/// for Korean, based on KS X 1001 (which also formed the base of MacKorean on Mac OS
+/// Classic), with all the characters from the Hangul Syllables block of Unicode.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/euc-kr.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-kr-bmp.html)
+///
+/// This encoding matches the Windows code page 949, except Windows decodes byte 0x80
+/// to U+0080 and some byte sequences that are error per the Encoding Standard to
+/// the question mark or the Private Use Area.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static EUC_KR: &'static Encoding = &EUC_KR_INIT;
 
-/// The initializer for the GBK encoding.
+/// The initializer for the [GBK](static.GBK.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -833,13 +884,30 @@ pub static GBK_INIT: Encoding = Encoding {
 
 /// The GBK encoding.
 ///
+/// The decoder for this encoding is the same as the decoder for gb18030.
+/// The encoder side of this encoding is GBK with Windows code page 936 euro
+/// sign behavior. GBK extends GB2312-80 to cover the CJK Unified Ideographs
+/// Unicode block as well as a handful of ideographs from the CJK Unified
+/// Ideographs Extension A and CJK Compatibility Ideographs blocks.
+///
+/// Unlike e.g. in the case of ISO-8859-1 and windows-1252, GBK encoder wasn't
+/// unified with the gb18030 encoder in the Encoding Standard out of concern
+/// that servers that expect GBK form submissions might not be able to handle
+/// the four-byte sequences.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// The encoder of this encoding roughly matches the Windows code page 936.
+/// The decoder side is a superset.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static GBK: &'static Encoding = &GBK_INIT;
 
-/// The initializer for the IBM866 encoding.
+/// The initializer for the [IBM866](static.IBM866.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -858,13 +926,22 @@ pub static IBM866_INIT: Encoding = Encoding {
 
 /// The IBM866 encoding.
 ///
+/// This the most notable one of the DOS Cyrillic code pages. It has the same
+/// box drawing characters as code page 437, so it can be used for decoding
+/// DOS-era ASCII + box drawing data.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/ibm866.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/ibm866-bmp.html)
+///
+/// This encoding matches the Windows code page 866.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static IBM866: &'static Encoding = &IBM866_INIT;
 
-/// The initializer for the ISO-2022-JP encoding.
+/// The initializer for the [ISO-2022-JP](static.ISO_2022_JP.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -883,13 +960,24 @@ pub static ISO_2022_JP_INIT: Encoding = Encoding {
 
 /// The ISO-2022-JP encoding.
 ///
+/// This the primary pre-UTF-8 encoding for Japanese email. It uses the ASCII
+/// byte range to encode non-Basic Latin characters. It's the only encoding
+/// supported by this crate whose encoder is stateful.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/jis0208.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/jis0208-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 50220. Notably, Windows
+/// uses U+30FB in place of the REPLACEMENT CHARACTER and otherwise differs in
+/// error handling.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_2022_JP: &'static Encoding = &ISO_2022_JP_INIT;
 
-/// The initializer for the ISO-8859-10 encoding.
+/// The initializer for the [ISO-8859-10](static.ISO_8859_10.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -908,13 +996,22 @@ pub static ISO_8859_10_INIT: Encoding = Encoding {
 
 /// The ISO-8859-10 encoding.
 ///
+/// This is the Nordic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 6.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-10.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-10-bmp.html)
+///
+/// The Windows code page number for this encoding is 28600, but kernel32.dll
+/// does not support this encoding.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_10: &'static Encoding = &ISO_8859_10_INIT;
 
-/// The initializer for the ISO-8859-13 encoding.
+/// The initializer for the [ISO-8859-13](static.ISO_8859_13.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -933,13 +1030,22 @@ pub static ISO_8859_13_INIT: Encoding = Encoding {
 
 /// The ISO-8859-13 encoding.
 ///
+/// This is the Baltic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 7.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-13.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-13-bmp.html)
+///
+/// This encoding matches the Windows code page 28603, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_13: &'static Encoding = &ISO_8859_13_INIT;
 
-/// The initializer for the ISO-8859-14 encoding.
+/// The initializer for the [ISO-8859-14](static.ISO_8859_14.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -958,13 +1064,22 @@ pub static ISO_8859_14_INIT: Encoding = Encoding {
 
 /// The ISO-8859-14 encoding.
 ///
+/// This is the Celtic part of the ISO/IEC 8859 encoding family. This encoding
+/// is also known as Latin 8.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-14.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-14-bmp.html)
+///
+/// The Windows code page number for this encoding is 28604, but kernel32.dll
+/// does not support this encoding.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_14: &'static Encoding = &ISO_8859_14_INIT;
 
-/// The initializer for the ISO-8859-15 encoding.
+/// The initializer for the [ISO-8859-15](static.ISO_8859_15.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -983,13 +1098,21 @@ pub static ISO_8859_15_INIT: Encoding = Encoding {
 
 /// The ISO-8859-15 encoding.
 ///
+/// This is the revised Western European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 9.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-15.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-15-bmp.html)
+///
+/// This encoding matches the Windows code page 28605.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_15: &'static Encoding = &ISO_8859_15_INIT;
 
-/// The initializer for the ISO-8859-16 encoding.
+/// The initializer for the [ISO-8859-16](static.ISO_8859_16.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1008,13 +1131,22 @@ pub static ISO_8859_16_INIT: Encoding = Encoding {
 
 /// The ISO-8859-16 encoding.
 ///
+/// This is the South-Eastern European part of the ISO/IEC 8859 encoding
+/// family. This encoding is also known as Latin 10.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-16.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-16-bmp.html)
+///
+/// The Windows code page number for this encoding is 28606, but kernel32.dll
+/// does not support this encoding.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_16: &'static Encoding = &ISO_8859_16_INIT;
 
-/// The initializer for the ISO-8859-2 encoding.
+/// The initializer for the [ISO-8859-2](static.ISO_8859_2.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1033,13 +1165,20 @@ pub static ISO_8859_2_INIT: Encoding = Encoding {
 
 /// The ISO-8859-2 encoding.
 ///
+/// This is the Central European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 2.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-2.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-2-bmp.html)
+///
+/// This encoding matches the Windows code page 28592.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_2: &'static Encoding = &ISO_8859_2_INIT;
 
-/// The initializer for the ISO-8859-3 encoding.
+/// The initializer for the [ISO-8859-3](static.ISO_8859_3.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1058,13 +1197,20 @@ pub static ISO_8859_3_INIT: Encoding = Encoding {
 
 /// The ISO-8859-3 encoding.
 ///
+/// This is the South European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 3.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-3.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-3-bmp.html)
+///
+/// This encoding matches the Windows code page 28593.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_3: &'static Encoding = &ISO_8859_3_INIT;
 
-/// The initializer for the ISO-8859-4 encoding.
+/// The initializer for the [ISO-8859-4](static.ISO_8859_4.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1083,13 +1229,20 @@ pub static ISO_8859_4_INIT: Encoding = Encoding {
 
 /// The ISO-8859-4 encoding.
 ///
+/// This is the North European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 4.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-4.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-4-bmp.html)
+///
+/// This encoding matches the Windows code page 28594.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_4: &'static Encoding = &ISO_8859_4_INIT;
 
-/// The initializer for the ISO-8859-5 encoding.
+/// The initializer for the [ISO-8859-5](static.ISO_8859_5.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1108,13 +1261,20 @@ pub static ISO_8859_5_INIT: Encoding = Encoding {
 
 /// The ISO-8859-5 encoding.
 ///
+/// This is the Cyrillic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-5.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-5-bmp.html)
+///
+/// This encoding matches the Windows code page 28595.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_5: &'static Encoding = &ISO_8859_5_INIT;
 
-/// The initializer for the ISO-8859-6 encoding.
+/// The initializer for the [ISO-8859-6](static.ISO_8859_6.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1133,13 +1293,21 @@ pub static ISO_8859_6_INIT: Encoding = Encoding {
 
 /// The ISO-8859-6 encoding.
 ///
+/// This is the Arabic part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-6.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-6-bmp.html)
+///
+/// This encoding matches the Windows code page 28596, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_6: &'static Encoding = &ISO_8859_6_INIT;
 
-/// The initializer for the ISO-8859-7 encoding.
+/// The initializer for the [ISO-8859-7](static.ISO_8859_7.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1158,13 +1326,25 @@ pub static ISO_8859_7_INIT: Encoding = Encoding {
 
 /// The ISO-8859-7 encoding.
 ///
+/// This is the Greek part of the ISO/IEC 8859 encoding family.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-7.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-7-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28597. Windows decodes
+/// unassigned code points, the currency signs at 0xA4 and 0xA5 as well as
+/// 0xAA, which should be U+037A GREEK YPOGEGRAMMENI, to the Private Use Area
+/// of Unicode. Windows decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA
+/// instead of U+2018 LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER
+/// LETTER APOSTROPHE instead of U+2019 RIGHT SINGLE QUOTATION MARK.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_7: &'static Encoding = &ISO_8859_7_INIT;
 
-/// The initializer for the ISO-8859-8 encoding.
+/// The initializer for the [ISO-8859-8](static.ISO_8859_8.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1183,13 +1363,23 @@ pub static ISO_8859_8_INIT: Encoding = Encoding {
 
 /// The ISO-8859-8 encoding.
 ///
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in visual order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 28598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_8: &'static Encoding = &ISO_8859_8_INIT;
 
-/// The initializer for the ISO-8859-8-I encoding.
+/// The initializer for the [ISO-8859-8-I](static.ISO_8859_8_I.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1208,13 +1398,23 @@ pub static ISO_8859_8_I_INIT: Encoding = Encoding {
 
 /// The ISO-8859-8-I encoding.
 ///
+/// This is the Hebrew part of the ISO/IEC 8859 encoding family in logical order.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
+///
+/// This encoding roughly matches the Windows code page 38598. Windows decodes
+/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
+/// Area instead of LRM and RLM. Windows decodes unassigned code points to
+/// the private use area.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static ISO_8859_8_I: &'static Encoding = &ISO_8859_8_I_INIT;
 
-/// The initializer for the KOI8-R encoding.
+/// The initializer for the [KOI8-R](static.KOI8_R.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1233,13 +1433,20 @@ pub static KOI8_R_INIT: Encoding = Encoding {
 
 /// The KOI8-R encoding.
 ///
+/// This is an encoding for Russian from [RFC 1489](https://tools.ietf.org/html/rfc1489).
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-r.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-r-bmp.html)
+///
+/// This encoding matches the Windows code page 20866.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static KOI8_R: &'static Encoding = &KOI8_R_INIT;
 
-/// The initializer for the KOI8-U encoding.
+/// The initializer for the [KOI8-U](static.KOI8_U.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1258,13 +1465,20 @@ pub static KOI8_U_INIT: Encoding = Encoding {
 
 /// The KOI8-U encoding.
 ///
+/// This is an encoding for Ukrainian adapted from KOI8-R.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/koi8-u.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-u-bmp.html)
+///
+/// This encoding matches the Windows code page 21866.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static KOI8_U: &'static Encoding = &KOI8_U_INIT;
 
-/// The initializer for the Shift_JIS encoding.
+/// The initializer for the [Shift_JIS](static.SHIFT_JIS.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1283,13 +1497,22 @@ pub static SHIFT_JIS_INIT: Encoding = Encoding {
 
 /// The Shift_JIS encoding.
 ///
+/// This is the Japanese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/shift_jis.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/shift_jis-bmp.html)
+///
+/// This encoding matches the Windows code page 932, except Windows decodes some byte
+/// sequences that are error per the Encoding Standard to the question mark or the
+/// Private Use Area and generally uses U+30FB in place of the REPLACEMENT CHARACTER.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static SHIFT_JIS: &'static Encoding = &SHIFT_JIS_INIT;
 
-/// The initializer for the UTF-16BE encoding.
+/// The initializer for the [UTF-16BE](static.UTF_16BE.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1308,13 +1531,22 @@ pub static UTF_16BE_INIT: Encoding = Encoding {
 
 /// The UTF-16BE encoding.
 ///
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the big endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1201.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static UTF_16BE: &'static Encoding = &UTF_16BE_INIT;
 
-/// The initializer for the UTF-16LE encoding.
+/// The initializer for the [UTF-16LE](static.UTF_16LE.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1333,13 +1565,22 @@ pub static UTF_16LE_INIT: Encoding = Encoding {
 
 /// The UTF-16LE encoding.
 ///
+/// This decode-only encoding uses 16-bit code units due to Unicode originally
+/// having been designed as a 16-bit reportoire. In the absence of a byte order
+/// mark the little endian byte order is assumed.
+///
+/// There is no corresponding encoder in this crate or in the Encoding
+/// Standard. The output encoding of this encoding is UTF-8.
+///
+/// This encoding matches the Windows code page 1200.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static UTF_16LE: &'static Encoding = &UTF_16LE_INIT;
 
-/// The initializer for the UTF-8 encoding.
+/// The initializer for the [UTF-8](static.UTF_8.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1358,13 +1599,19 @@ pub static UTF_8_INIT: Encoding = Encoding {
 
 /// The UTF-8 encoding.
 ///
+/// This is the encoding that should be used for all new development it can
+/// represent all of Unicode.
+///
+/// This encoding matches the Windows code page 65001, except Windows differs
+/// in the number of errors generated for some erroneous byte sequences.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static UTF_8: &'static Encoding = &UTF_8_INIT;
 
-/// The initializer for the gb18030 encoding.
+/// The initializer for the [gb18030](static.GB18030.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1383,13 +1630,23 @@ pub static GB18030_INIT: Encoding = Encoding {
 
 /// The gb18030 encoding.
 ///
+/// This encoding matches GB18030-2005 except the two-byte sequence 0xA3 0xA0
+/// maps to U+3000 for compatibility with existing Web content. As a result,
+/// this encoding can represent all of Unicode except for the private-use
+/// character U+E5E5.
+///
+/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
+/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
+///
+/// This encoding matches the Windows code page 54936.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static GB18030: &'static Encoding = &GB18030_INIT;
 
-/// The initializer for the macintosh encoding.
+/// The initializer for the [macintosh](static.MACINTOSH.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1408,13 +1665,21 @@ pub static MACINTOSH_INIT: Encoding = Encoding {
 
 /// The macintosh encoding.
 ///
+/// This is the MacRoman encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/macintosh.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/macintosh-bmp.html)
+///
+/// This encoding matches the Windows code page 10000, except Windows decodes
+/// 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static MACINTOSH: &'static Encoding = &MACINTOSH_INIT;
 
-/// The initializer for the replacement encoding.
+/// The initializer for the [replacement](static.REPLACEMENT.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1433,13 +1698,24 @@ pub static REPLACEMENT_INIT: Encoding = Encoding {
 
 /// The replacement encoding.
 ///
+/// This decode-only encoding decodes all non-zero-length streams to a single
+/// REPLACEMENT CHARACTER. Its purpose is to avoid the use of an
+/// ASCII-compatible fallback encoding (typically windows-1252) for some
+/// encodings that are no longer supported by the Web Platform and that
+/// would be dangerous to treat as ASCII-compatible.
+///
+/// There is no corresponding encoder. The output encoding of this encoding
+/// is UTF-8.
+///
+/// This encoding does not have a Windows code page number.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static REPLACEMENT: &'static Encoding = &REPLACEMENT_INIT;
 
-/// The initializer for the windows-1250 encoding.
+/// The initializer for the [windows-1250](static.WINDOWS_1250.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1458,13 +1734,20 @@ pub static WINDOWS_1250_INIT: Encoding = Encoding {
 
 /// The windows-1250 encoding.
 ///
+/// This is the Central European encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1250.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1250-bmp.html)
+///
+/// This encoding matches the Windows code page 1250.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1250: &'static Encoding = &WINDOWS_1250_INIT;
 
-/// The initializer for the windows-1251 encoding.
+/// The initializer for the [windows-1251](static.WINDOWS_1251.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1483,13 +1766,20 @@ pub static WINDOWS_1251_INIT: Encoding = Encoding {
 
 /// The windows-1251 encoding.
 ///
+/// This is the Cyrillic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1251.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1251-bmp.html)
+///
+/// This encoding matches the Windows code page 1251.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1251: &'static Encoding = &WINDOWS_1251_INIT;
 
-/// The initializer for the windows-1252 encoding.
+/// The initializer for the [windows-1252](static.WINDOWS_1252.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1508,13 +1798,21 @@ pub static WINDOWS_1252_INIT: Encoding = Encoding {
 
 /// The windows-1252 encoding.
 ///
+/// This is the Western encoding for Windows. It is an extension of ISO-8859-1,
+/// which is known as Latin 1.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1252.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1252-bmp.html)
+///
+/// This encoding matches the Windows code page 1252.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1252: &'static Encoding = &WINDOWS_1252_INIT;
 
-/// The initializer for the windows-1253 encoding.
+/// The initializer for the [windows-1253](static.WINDOWS_1253.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1533,13 +1831,22 @@ pub static WINDOWS_1253_INIT: Encoding = Encoding {
 
 /// The windows-1253 encoding.
 ///
+/// This is the Greek encoding for Windows. It is mostly an extension of
+/// ISO-8859-7, but U+0386 is mapped to a different byte.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1253.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1253-bmp.html)
+///
+/// This encoding matches the Windows code page 1253, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1253: &'static Encoding = &WINDOWS_1253_INIT;
 
-/// The initializer for the windows-1254 encoding.
+/// The initializer for the [windows-1254](static.WINDOWS_1254.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1558,13 +1865,21 @@ pub static WINDOWS_1254_INIT: Encoding = Encoding {
 
 /// The windows-1254 encoding.
 ///
+/// This is the Turkish encoding for Windows. It is an extension of ISO-8859-9,
+/// which is known as Latin 5.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1254.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1254-bmp.html)
+///
+/// This encoding matches the Windows code page 1254.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1254: &'static Encoding = &WINDOWS_1254_INIT;
 
-/// The initializer for the windows-1255 encoding.
+/// The initializer for the [windows-1255](static.WINDOWS_1255.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1583,13 +1898,22 @@ pub static WINDOWS_1255_INIT: Encoding = Encoding {
 
 /// The windows-1255 encoding.
 ///
+/// This is the Hebrew encoding for Windows. It is an extension of ISO-8859-8-I,
+/// except for a currency sign swap.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1255.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1255-bmp.html)
+///
+/// This encoding matches the Windows code page 1255, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1255: &'static Encoding = &WINDOWS_1255_INIT;
 
-/// The initializer for the windows-1256 encoding.
+/// The initializer for the [windows-1256](static.WINDOWS_1256.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1608,13 +1932,20 @@ pub static WINDOWS_1256_INIT: Encoding = Encoding {
 
 /// The windows-1256 encoding.
 ///
+/// This is the Arabic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1256.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1256-bmp.html)
+///
+/// This encoding matches the Windows code page 1256.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1256: &'static Encoding = &WINDOWS_1256_INIT;
 
-/// The initializer for the windows-1257 encoding.
+/// The initializer for the [windows-1257](static.WINDOWS_1257.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1633,13 +1964,21 @@ pub static WINDOWS_1257_INIT: Encoding = Encoding {
 
 /// The windows-1257 encoding.
 ///
+/// This is the Baltic encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1257.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1257-bmp.html)
+///
+/// This encoding matches the Windows code page 1257, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1257: &'static Encoding = &WINDOWS_1257_INIT;
 
-/// The initializer for the windows-1258 encoding.
+/// The initializer for the [windows-1258](static.WINDOWS_1258.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1658,13 +1997,25 @@ pub static WINDOWS_1258_INIT: Encoding = Encoding {
 
 /// The windows-1258 encoding.
 ///
+/// This is the Vietnamese encoding for Windows.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-1258.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1258-bmp.html)
+///
+/// This encoding matches the Windows code page 1258 when used in the
+/// non-normalizing mode. Unlike with the other single-byte encodings, the
+/// result of decoding is not necessarily in Normalization Form C. On the
+/// other hand, input in the Normalization Form C is not encoded without
+/// replacement. In general, it's a bad idea to encode to encodings other
+/// than UTF-8, but this encoding is especially hazardous to encode to.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_1258: &'static Encoding = &WINDOWS_1258_INIT;
 
-/// The initializer for the windows-874 encoding.
+/// The initializer for the [windows-874](static.WINDOWS_874.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1683,13 +2034,21 @@ pub static WINDOWS_874_INIT: Encoding = Encoding {
 
 /// The windows-874 encoding.
 ///
+/// This is the Thai encoding for Windows. It is an extension of TIS-620 / ISO-8859-11.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/windows-874.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-874-bmp.html)
+///
+/// This encoding matches the Windows code page 874, except Windows decodes
+/// unassigned code points to the Private Use Area of Unicode.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static WINDOWS_874: &'static Encoding = &WINDOWS_874_INIT;
 
-/// The initializer for the x-mac-cyrillic encoding.
+/// The initializer for the [x-mac-cyrillic](static.X_MAC_CYRILLIC.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1708,13 +2067,20 @@ pub static X_MAC_CYRILLIC_INIT: Encoding = Encoding {
 
 /// The x-mac-cyrillic encoding.
 ///
+/// This is the MacUkrainian encoding from Mac OS Classic.
+///
+/// [Index visualization](https://encoding.spec.whatwg.org/x-mac-cyrillic.html),
+/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/x-mac-cyrillic-bmp.html)
+///
+/// This encoding matches the Windows code page 10017.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
 /// `static`.
 pub static X_MAC_CYRILLIC: &'static Encoding = &X_MAC_CYRILLIC_INIT;
 
-/// The initializer for the x-user-defined encoding.
+/// The initializer for the [x-user-defined](static.X_USER_DEFINED.html) encoding.
 ///
 /// For use only for taking the address of this form when
 /// Rust prohibits the use of the non-`_INIT` form directly,
@@ -1733,6 +2099,13 @@ pub static X_USER_DEFINED_INIT: Encoding = Encoding {
 
 /// The x-user-defined encoding.
 ///
+/// This encoding offsets the non-ASCII bytes by `0xF700` thereby decoding
+/// them to the Private Use Area of Unicode. It was used for loading binary
+/// data into a JavaScript string using `XMLHttpRequest` before XHR supported
+/// the `"arraybuffer"` response type.
+///
+/// This encoding does not have a Windows code page number.
+///
 /// This will change from `static` to `const` if Rust changes
 /// to make the referent of `pub const FOO: &'static Encoding`
 /// unique cross-crate, so don't take the address of this
@@ -3347,7 +3720,8 @@ impl Decoder {
             | DecoderLifeCycle::AtUtf8Start
             | DecoderLifeCycle::AtUtf16LeStart
             | DecoderLifeCycle::AtUtf16BeStart => {
-                return self.variant
+                return self
+                    .variant
                     .max_utf8_buffer_length_without_replacement(byte_length)
             }
             DecoderLifeCycle::AtStart => {
@@ -3362,7 +3736,8 @@ impl Decoder {
                             // No need to consider the internal state of the underlying decoder,
                             // because it is at start, because no data has reached it yet.
                             return Some(utf_bom);
-                        } else if let Some(non_bom) = self.variant
+                        } else if let Some(non_bom) = self
+                            .variant
                             .max_utf8_buffer_length_without_replacement(byte_length)
                         {
                             return Some(std::cmp::max(utf_bom, non_bom));
diff --git a/src/mem.rs b/src/mem.rs
index 7e84ecb..81c5b6e 100644
--- a/src/mem.rs
+++ b/src/mem.rs
@@ -195,9 +195,8 @@ macro_rules! by_unit_check_simd {
                     }
                     let mut simd_accu = $splat;
                     while offset <= len_minus_stride {
-                        simd_accu = simd_accu | unsafe {
-                            *(src.offset(offset as isize) as *const $simd_ty)
-                        };
+                        simd_accu = simd_accu
+                            | unsafe { *(src.offset(offset as isize) as *const $simd_ty) };
                         offset += SIMD_STRIDE_SIZE / unit_size;
                     }
                     if !$func(simd_accu) {
@@ -1279,7 +1278,9 @@ pub fn is_char_bidi(c: char) -> bool {
         // Above Arabic Extended-A and below Arabic Presentation Forms
         if in_inclusive_range32(code_point, 0x200F, 0x2067) {
             // In the range that contains the RTL controls
-            return code_point == 0x200F || code_point == 0x202B || code_point == 0x202E
+            return code_point == 0x200F
+                || code_point == 0x202B
+                || code_point == 0x202E
                 || code_point == 0x2067;
         }
         return false;
@@ -1514,7 +1515,8 @@ pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
                 // Three-byte
                 let second = bytes[read + 1];
                 let third = bytes[read + 2];
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                     | (third as u32 & 0x3Fu32);
                 dst[written] = point as u16;
                 read += 3;
@@ -1524,7 +1526,8 @@ pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
                 let second = bytes[read + 1];
                 let third = bytes[read + 2];
                 let fourth = bytes[read + 3];
-                let point = (((byte as u32) & 0x7u32) << 18) | ((second as u32 & 0x3Fu32) << 12)
+                let point = (((byte as u32) & 0x7u32) << 18)
+                    | ((second as u32 & 0x3Fu32) << 12)
                     | ((third as u32 & 0x3Fu32) << 6)
                     | (fourth as u32 & 0x3Fu32);
                 dst[written] = (0xD7C0 + (point >> 10)) as u16;
diff --git a/src/shift_jis.rs b/src/shift_jis.rs
index e93ae2c..1aea7c3 100644
--- a/src/shift_jis.rs
+++ b/src/shift_jis.rs
@@ -248,7 +248,8 @@ impl ShiftJisEncoder {
                             10716 + bmp_minus_roman as usize
                         } else if let Some(pointer) = jis0208_range_encode(bmp) {
                             pointer
-                        } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929
+                        } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D)
+                            || bmp == 0xF929
                             || bmp == 0xF9DC
                         {
                             // Guaranteed to be found in IBM_KANJI
diff --git a/src/simd_funcs.rs b/src/simd_funcs.rs
index 3c2ec34..e90343a 100644
--- a/src/simd_funcs.rs
+++ b/src/simd_funcs.rs
@@ -277,10 +277,15 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
 
     // Quick refutation failed. Let's do the full check.
 
-    (in_range16x8!(s, 0x0590, 0x0900) | in_range16x8!(s, 0xFB50, 0xFE00)
-        | in_range16x8!(s, 0xFE70, 0xFF00) | in_range16x8!(s, 0xD802, 0xD804)
-        | in_range16x8!(s, 0xD83A, 0xD83C) | s.eq(u16x8::splat(0x200F))
-        | s.eq(u16x8::splat(0x202B)) | s.eq(u16x8::splat(0x202E)) | s.eq(u16x8::splat(0x2067)))
+    (in_range16x8!(s, 0x0590, 0x0900)
+        | in_range16x8!(s, 0xFB50, 0xFE00)
+        | in_range16x8!(s, 0xFE70, 0xFF00)
+        | in_range16x8!(s, 0xD802, 0xD804)
+        | in_range16x8!(s, 0xD83A, 0xD83C)
+        | s.eq(u16x8::splat(0x200F))
+        | s.eq(u16x8::splat(0x202B))
+        | s.eq(u16x8::splat(0x202E))
+        | s.eq(u16x8::splat(0x2067)))
         .any()
 }
 
diff --git a/src/utf_16.rs b/src/utf_16.rs
index f3ec16b..8f82010 100644
--- a/src/utf_16.rs
+++ b/src/utf_16.rs
@@ -29,11 +29,9 @@ impl Utf16Decoder {
     }
 
     pub fn additional_from_state(&self) -> usize {
-        1 + if self.lead_byte.is_some() { 1 } else { 0 } + if self.lead_surrogate == 0 {
-            0
-        } else {
-            2
-        }
+        1
+            + if self.lead_byte.is_some() { 1 } else { 0 }
+            + if self.lead_surrogate == 0 { 0 } else { 2 }
     }
 
     pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
diff --git a/src/utf_8.rs b/src/utf_8.rs
index f9c02d3..db5c62a 100644
--- a/src/utf_8.rs
+++ b/src/utf_8.rs
@@ -372,7 +372,8 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
                 {
                     break 'outer;
                 }
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                     | (third as u32 & 0x3Fu32);
                 dst[written] = point as u16;
                 read = new_read;
@@ -393,7 +394,8 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
                 {
                     break 'outer;
                 }
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                     | (third as u32 & 0x3Fu32);
                 dst[written] = point as u16;
                 read = new_read;
@@ -414,7 +416,8 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz
                 {
                     break 'outer;
                 }
-                let point = (((byte as u32) & 0xFu32) << 12) | ((second as u32 & 0x3Fu32) << 6)
+                let point = (((byte as u32) & 0xFu32) << 12)
+                    | ((second as u32 & 0x3Fu32) << 6)
                     | (third as u32 & 0x3Fu32);
                 dst[written] = point as u16;
                 read = new_read;