syntax: add emoji and break properties

This commit adds several emoji properties such as Emoji and
Extended_Pictographic. We also add support for the Grapheme_Cluster_Break,
Word_Break and Sentence_Break enumeration properties.
This commit is contained in:
Andrew Gallant
2018-11-21 09:04:55 -05:00
parent dd2ba63848
commit 1e3e67bcaf
10 changed files with 3567 additions and 1709 deletions
+9
View File
@@ -108,8 +108,15 @@ properties correspond to properties required by RL1.2):
* `Default_Ignorable_Code_Point` \*
* `Deprecated`
* `Diacritic`
* `Emoji`
* `Emoji_Presentation`
* `Emoji_Modifier`
* `Emoji_Modifier_Base`
* `Emoji_Component`
* `Extended_Pictographic`
* `Extender`
* `Grapheme_Base`
* `Grapheme_Cluster_Break`
* `Grapheme_Extend`
* `Hex_Digit`
* `IDS_Binary_Operator`
@@ -127,6 +134,7 @@ properties correspond to properties required by RL1.2):
* `Quotation_Mark`
* `Radical`
* `Regional_Indicator`
* `Sentence_Break`
* `Sentence_Terminal`
* `Soft_Dotted`
* `Terminal_Punctuation`
@@ -134,6 +142,7 @@ properties correspond to properties required by RL1.2):
* `Uppercase` \*
* `Variation_Selector`
* `White_Space` \*
* `Word_Break`
* `XID_Continue`
* `XID_Start`
+18
View File
@@ -7,11 +7,14 @@ use hir;
use unicode_tables::age;
use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
use unicode_tables::general_category;
use unicode_tables::grapheme_cluster_break;
use unicode_tables::property_bool;
use unicode_tables::property_names::PROPERTY_NAMES;
use unicode_tables::property_values::PROPERTY_VALUES;
use unicode_tables::script;
use unicode_tables::script_extension;
use unicode_tables::sentence_break;
use unicode_tables::word_break;
type Result<T> = result::Result<T, Error>;
@@ -250,6 +253,21 @@ pub fn class<'a>(query: ClassQuery<'a>) -> Result<hir::ClassUnicode> {
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
}
ByValue { property_name: "Grapheme_Cluster_Break", property_value } => {
property_set(grapheme_cluster_break::BY_NAME, property_value)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
}
ByValue { property_name: "Sentence_Break", property_value } => {
property_set(sentence_break::BY_NAME, property_value)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
}
ByValue { property_name: "Word_Break", property_value } => {
property_set(word_break::BY_NAME, property_value)
.map(hir_class)
.ok_or(Error::PropertyValueNotFound)
}
_ => {
// What else should we support?
Err(Error::PropertyNotFound)
@@ -0,0 +1,455 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate grapheme-cluster-break /home/andrew/tmp/ucd-11.0.0/ --chars
//
// ucd-generate is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("CR", CR), ("Control", CONTROL), ("Extend", EXTEND), ("L", L), ("LF", LF),
("LV", LV), ("LVT", LVT), ("Prepend", PREPEND),
("Regional_Indicator", REGIONAL_INDICATOR), ("SpacingMark", SPACINGMARK),
("T", T), ("V", V), ("ZWJ", ZWJ),
];
pub const CR: &'static [(char, char)] = &[
('\r', '\r'),
];
pub const CONTROL: &'static [(char, char)] = &[
('\u{0}', '\t'), ('\u{b}', '\u{c}'), ('\u{e}', '\u{1f}'),
('\u{7f}', '\u{9f}'), ('\u{ad}', '\u{ad}'), ('\u{61c}', '\u{61c}'),
('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200b}'),
('\u{200e}', '\u{200f}'), ('\u{2028}', '\u{202e}'),
('\u{2060}', '\u{206f}'), ('\u{feff}', '\u{feff}'),
('\u{fff0}', '\u{fffb}'), ('\u{1bca0}', '\u{1bca3}'),
('\u{1d173}', '\u{1d17a}'), ('\u{e0000}', '\u{e001f}'),
('\u{e0080}', '\u{e00ff}'), ('\u{e01f0}', '\u{e0fff}'),
];
pub const EXTEND: &'static [(char, char)] = &[
('\u{300}', '\u{36f}'), ('\u{483}', '\u{489}'), ('\u{591}', '\u{5bd}'),
('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
('\u{7fd}', '\u{7fd}'), ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'),
('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'),
('\u{8d3}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'),
('\u{93c}', '\u{93c}'), ('\u{941}', '\u{948}'), ('\u{94d}', '\u{94d}'),
('\u{951}', '\u{957}'), ('\u{962}', '\u{963}'), ('\u{981}', '\u{981}'),
('\u{9bc}', '\u{9bc}'), ('\u{9be}', '\u{9be}'), ('\u{9c1}', '\u{9c4}'),
('\u{9cd}', '\u{9cd}'), ('\u{9d7}', '\u{9d7}'), ('\u{9e2}', '\u{9e3}'),
('\u{9fe}', '\u{9fe}'), ('\u{a01}', '\u{a02}'), ('\u{a3c}', '\u{a3c}'),
('\u{a41}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'),
('\u{a51}', '\u{a51}'), ('\u{a70}', '\u{a71}'), ('\u{a75}', '\u{a75}'),
('\u{a81}', '\u{a82}'), ('\u{abc}', '\u{abc}'), ('\u{ac1}', '\u{ac5}'),
('\u{ac7}', '\u{ac8}'), ('\u{acd}', '\u{acd}'), ('\u{ae2}', '\u{ae3}'),
('\u{afa}', '\u{aff}'), ('\u{b01}', '\u{b01}'), ('\u{b3c}', '\u{b3c}'),
('\u{b3e}', '\u{b3f}'), ('\u{b41}', '\u{b44}'), ('\u{b4d}', '\u{b4d}'),
('\u{b56}', '\u{b57}'), ('\u{b62}', '\u{b63}'), ('\u{b82}', '\u{b82}'),
('\u{bbe}', '\u{bbe}'), ('\u{bc0}', '\u{bc0}'), ('\u{bcd}', '\u{bcd}'),
('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'),
('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'),
('\u{c55}', '\u{c56}'), ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'),
('\u{cbc}', '\u{cbc}'), ('\u{cbf}', '\u{cbf}'), ('\u{cc2}', '\u{cc2}'),
('\u{cc6}', '\u{cc6}'), ('\u{ccc}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'),
('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d01}'), ('\u{d3b}', '\u{d3c}'),
('\u{d3e}', '\u{d3e}'), ('\u{d41}', '\u{d44}'), ('\u{d4d}', '\u{d4d}'),
('\u{d57}', '\u{d57}'), ('\u{d62}', '\u{d63}'), ('\u{dca}', '\u{dca}'),
('\u{dcf}', '\u{dcf}'), ('\u{dd2}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
('\u{ddf}', '\u{ddf}'), ('\u{e31}', '\u{e31}'), ('\u{e34}', '\u{e3a}'),
('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{eb9}'),
('\u{ebb}', '\u{ebc}'), ('\u{ec8}', '\u{ecd}'), ('\u{f18}', '\u{f19}'),
('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'),
('\u{f71}', '\u{f7e}'), ('\u{f80}', '\u{f84}'), ('\u{f86}', '\u{f87}'),
('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'),
('\u{102d}', '\u{1030}'), ('\u{1032}', '\u{1037}'),
('\u{1039}', '\u{103a}'), ('\u{103d}', '\u{103e}'),
('\u{1058}', '\u{1059}'), ('\u{105e}', '\u{1060}'),
('\u{1071}', '\u{1074}'), ('\u{1082}', '\u{1082}'),
('\u{1085}', '\u{1086}'), ('\u{108d}', '\u{108d}'),
('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'),
('\u{1712}', '\u{1714}'), ('\u{1732}', '\u{1734}'),
('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'),
('\u{17b4}', '\u{17b5}'), ('\u{17b7}', '\u{17bd}'),
('\u{17c6}', '\u{17c6}'), ('\u{17c9}', '\u{17d3}'),
('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'),
('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'),
('\u{1920}', '\u{1922}'), ('\u{1927}', '\u{1928}'),
('\u{1932}', '\u{1932}'), ('\u{1939}', '\u{193b}'),
('\u{1a17}', '\u{1a18}'), ('\u{1a1b}', '\u{1a1b}'),
('\u{1a56}', '\u{1a56}'), ('\u{1a58}', '\u{1a5e}'),
('\u{1a60}', '\u{1a60}'), ('\u{1a62}', '\u{1a62}'),
('\u{1a65}', '\u{1a6c}'), ('\u{1a73}', '\u{1a7c}'),
('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abe}'),
('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b34}'),
('\u{1b36}', '\u{1b3a}'), ('\u{1b3c}', '\u{1b3c}'),
('\u{1b42}', '\u{1b42}'), ('\u{1b6b}', '\u{1b73}'),
('\u{1b80}', '\u{1b81}'), ('\u{1ba2}', '\u{1ba5}'),
('\u{1ba8}', '\u{1ba9}'), ('\u{1bab}', '\u{1bad}'),
('\u{1be6}', '\u{1be6}'), ('\u{1be8}', '\u{1be9}'),
('\u{1bed}', '\u{1bed}'), ('\u{1bef}', '\u{1bf1}'),
('\u{1c2c}', '\u{1c33}'), ('\u{1c36}', '\u{1c37}'),
('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce0}'),
('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'),
('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'),
('\u{1dc0}', '\u{1df9}'), ('\u{1dfb}', '\u{1dff}'),
('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'),
('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'),
('\u{2de0}', '\u{2dff}'), ('\u{302a}', '\u{302f}'),
('\u{3099}', '\u{309a}'), ('\u{a66f}', '\u{a672}'),
('\u{a674}', '\u{a67d}'), ('\u{a69e}', '\u{a69f}'),
('\u{a6f0}', '\u{a6f1}'), ('\u{a802}', '\u{a802}'),
('\u{a806}', '\u{a806}'), ('\u{a80b}', '\u{a80b}'),
('\u{a825}', '\u{a826}'), ('\u{a8c4}', '\u{a8c5}'),
('\u{a8e0}', '\u{a8f1}'), ('\u{a8ff}', '\u{a8ff}'),
('\u{a926}', '\u{a92d}'), ('\u{a947}', '\u{a951}'),
('\u{a980}', '\u{a982}'), ('\u{a9b3}', '\u{a9b3}'),
('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}', '\u{a9bc}'),
('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'),
('\u{aa31}', '\u{aa32}'), ('\u{aa35}', '\u{aa36}'),
('\u{aa43}', '\u{aa43}'), ('\u{aa4c}', '\u{aa4c}'),
('\u{aa7c}', '\u{aa7c}'), ('\u{aab0}', '\u{aab0}'),
('\u{aab2}', '\u{aab4}'), ('\u{aab7}', '\u{aab8}'),
('\u{aabe}', '\u{aabf}'), ('\u{aac1}', '\u{aac1}'),
('\u{aaec}', '\u{aaed}'), ('\u{aaf6}', '\u{aaf6}'),
('\u{abe5}', '\u{abe5}'), ('\u{abe8}', '\u{abe8}'),
('\u{abed}', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'),
('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'),
('\u{ff9e}', '\u{ff9f}'), ('\u{101fd}', '\u{101fd}'),
('\u{102e0}', '\u{102e0}'), ('\u{10376}', '\u{1037a}'),
('\u{10a01}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'),
('\u{10a0c}', '\u{10a0f}'), ('\u{10a38}', '\u{10a3a}'),
('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'),
('\u{10d24}', '\u{10d27}'), ('\u{10f46}', '\u{10f50}'),
('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'),
('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'),
('\u{110b9}', '\u{110ba}'), ('\u{11100}', '\u{11102}'),
('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'),
('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11181}'),
('\u{111b6}', '\u{111be}'), ('\u{111c9}', '\u{111cc}'),
('\u{1122f}', '\u{11231}'), ('\u{11234}', '\u{11234}'),
('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'),
('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'),
('\u{11300}', '\u{11301}'), ('\u{1133b}', '\u{1133c}'),
('\u{1133e}', '\u{1133e}'), ('\u{11340}', '\u{11340}'),
('\u{11357}', '\u{11357}'), ('\u{11366}', '\u{1136c}'),
('\u{11370}', '\u{11374}'), ('\u{11438}', '\u{1143f}'),
('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'),
('\u{1145e}', '\u{1145e}'), ('\u{114b0}', '\u{114b0}'),
('\u{114b3}', '\u{114b8}'), ('\u{114ba}', '\u{114ba}'),
('\u{114bd}', '\u{114bd}'), ('\u{114bf}', '\u{114c0}'),
('\u{114c2}', '\u{114c3}'), ('\u{115af}', '\u{115af}'),
('\u{115b2}', '\u{115b5}'), ('\u{115bc}', '\u{115bd}'),
('\u{115bf}', '\u{115c0}'), ('\u{115dc}', '\u{115dd}'),
('\u{11633}', '\u{1163a}'), ('\u{1163d}', '\u{1163d}'),
('\u{1163f}', '\u{11640}'), ('\u{116ab}', '\u{116ab}'),
('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'),
('\u{116b7}', '\u{116b7}'), ('\u{1171d}', '\u{1171f}'),
('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'),
('\u{1182f}', '\u{11837}'), ('\u{11839}', '\u{1183a}'),
('\u{11a01}', '\u{11a0a}'), ('\u{11a33}', '\u{11a38}'),
('\u{11a3b}', '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'),
('\u{11a51}', '\u{11a56}'), ('\u{11a59}', '\u{11a5b}'),
('\u{11a8a}', '\u{11a96}'), ('\u{11a98}', '\u{11a99}'),
('\u{11c30}', '\u{11c36}'), ('\u{11c38}', '\u{11c3d}'),
('\u{11c3f}', '\u{11c3f}'), ('\u{11c92}', '\u{11ca7}'),
('\u{11caa}', '\u{11cb0}'), ('\u{11cb2}', '\u{11cb3}'),
('\u{11cb5}', '\u{11cb6}'), ('\u{11d31}', '\u{11d36}'),
('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}', '\u{11d3d}'),
('\u{11d3f}', '\u{11d45}'), ('\u{11d47}', '\u{11d47}'),
('\u{11d90}', '\u{11d91}'), ('\u{11d95}', '\u{11d95}'),
('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'),
('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'),
('\u{16f8f}', '\u{16f92}'), ('\u{1bc9d}', '\u{1bc9e}'),
('\u{1d165}', '\u{1d165}'), ('\u{1d167}', '\u{1d169}'),
('\u{1d16e}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'),
('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'),
('\u{1d242}', '\u{1d244}'), ('\u{1da00}', '\u{1da36}'),
('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'),
('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'),
('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}', '\u{1e006}'),
('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'),
('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'),
('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'),
('\u{e0020}', '\u{e007f}'), ('\u{e0100}', '\u{e01ef}'),
];
pub const L: &'static [(char, char)] = &[
('ᄀ', ''), ('ꥠ', 'ꥼ'),
];
pub const LF: &'static [(char, char)] = &[
('\n', '\n'),
];
pub const LV: &'static [(char, char)] = &[
('가', '가'), ('개', '개'), ('갸', '갸'), ('걔', '걔'),
('거', '거'), ('게', '게'), ('겨', '겨'), ('계', '계'),
('고', '고'), ('과', '과'), ('괘', '괘'), ('괴', '괴'),
('교', '교'), ('구', '구'), ('궈', '궈'), ('궤', '궤'),
('귀', '귀'), ('규', '규'), ('그', '그'), ('긔', '긔'),
('기', '기'), ('까', '까'), ('깨', '깨'), ('꺄', '꺄'),
('꺠', '꺠'), ('꺼', '꺼'), ('께', '께'), ('껴', '껴'),
('꼐', '꼐'), ('꼬', '꼬'), ('꽈', '꽈'), ('꽤', '꽤'),
('꾀', '꾀'), ('꾜', '꾜'), ('꾸', '꾸'), ('꿔', '꿔'),
('꿰', '꿰'), ('뀌', '뀌'), ('뀨', '뀨'), ('끄', '끄'),
('끠', '끠'), ('끼', '끼'), ('나', '나'), ('내', '내'),
('냐', '냐'), ('냬', '냬'), ('너', '너'), ('네', '네'),
('녀', '녀'), ('녜', '녜'), ('노', '노'), ('놔', '놔'),
('놰', '놰'), ('뇌', '뇌'), ('뇨', '뇨'), ('누', '누'),
('눠', '눠'), ('눼', '눼'), ('뉘', '뉘'), ('뉴', '뉴'),
('느', '느'), ('늬', '늬'), ('니', '니'), ('다', '다'),
('대', '대'), ('댜', '댜'), ('댸', '댸'), ('더', '더'),
('데', '데'), ('뎌', '뎌'), ('뎨', '뎨'), ('도', '도'),
('돠', '돠'), ('돼', '돼'), ('되', '되'), ('됴', '됴'),
('두', '두'), ('둬', '둬'), ('뒈', '뒈'), ('뒤', '뒤'),
('듀', '듀'), ('드', '드'), ('듸', '듸'), ('디', '디'),
('따', '따'), ('때', '때'), ('땨', '땨'), ('떄', '떄'),
('떠', '떠'), ('떼', '떼'), ('뗘', '뗘'), ('뗴', '뗴'),
('또', '또'), ('똬', '똬'), ('뙈', '뙈'), ('뙤', '뙤'),
('뚀', '뚀'), ('뚜', '뚜'), ('뚸', '뚸'), ('뛔', '뛔'),
('뛰', '뛰'), ('뜌', '뜌'), ('뜨', '뜨'), ('띄', '띄'),
('띠', '띠'), ('라', '라'), ('래', '래'), ('랴', '랴'),
('럐', '럐'), ('러', '러'), ('레', '레'), ('려', '려'),
('례', '례'), ('로', '로'), ('롸', '롸'), ('뢔', '뢔'),
('뢰', '뢰'), ('료', '료'), ('루', '루'), ('뤄', '뤄'),
('뤠', '뤠'), ('뤼', '뤼'), ('류', '류'), ('르', '르'),
('릐', '릐'), ('리', '리'), ('마', '마'), ('매', '매'),
('먀', '먀'), ('먜', '먜'), ('머', '머'), ('메', '메'),
('며', '며'), ('몌', '몌'), ('모', '모'), ('뫄', '뫄'),
('뫠', '뫠'), ('뫼', '뫼'), ('묘', '묘'), ('무', '무'),
('뭐', '뭐'), ('뭬', '뭬'), ('뮈', '뮈'), ('뮤', '뮤'),
('므', '므'), ('믜', '믜'), ('미', '미'), ('바', '바'),
('배', '배'), ('뱌', '뱌'), ('뱨', '뱨'), ('버', '버'),
('베', '베'), ('벼', '벼'), ('볘', '볘'), ('보', '보'),
('봐', '봐'), ('봬', '봬'), ('뵈', '뵈'), ('뵤', '뵤'),
('부', '부'), ('붜', '붜'), ('붸', '붸'), ('뷔', '뷔'),
('뷰', '뷰'), ('브', '브'), ('븨', '븨'), ('비', '비'),
('빠', '빠'), ('빼', '빼'), ('뺘', '뺘'), ('뺴', '뺴'),
('뻐', '뻐'), ('뻬', '뻬'), ('뼈', '뼈'), ('뼤', '뼤'),
('뽀', '뽀'), ('뽜', '뽜'), ('뽸', '뽸'), ('뾔', '뾔'),
('뾰', '뾰'), ('뿌', '뿌'), ('뿨', '뿨'), ('쀄', '쀄'),
('쀠', '쀠'), ('쀼', '쀼'), ('쁘', '쁘'), ('쁴', '쁴'),
('삐', '삐'), ('사', '사'), ('새', '새'), ('샤', '샤'),
('섀', '섀'), ('서', '서'), ('세', '세'), ('셔', '셔'),
('셰', '셰'), ('소', '소'), ('솨', '솨'), ('쇄', '쇄'),
('쇠', '쇠'), ('쇼', '쇼'), ('수', '수'), ('숴', '숴'),
('쉐', '쉐'), ('쉬', '쉬'), ('슈', '슈'), ('스', '스'),
('싀', '싀'), ('시', '시'), ('싸', '싸'), ('쌔', '쌔'),
('쌰', '쌰'), ('썌', '썌'), ('써', '써'), ('쎄', '쎄'),
('쎠', '쎠'), ('쎼', '쎼'), ('쏘', '쏘'), ('쏴', '쏴'),
('쐐', '쐐'), ('쐬', '쐬'), ('쑈', '쑈'), ('쑤', '쑤'),
('쒀', '쒀'), ('쒜', '쒜'), ('쒸', '쒸'), ('쓔', '쓔'),
('쓰', '쓰'), ('씌', '씌'), ('씨', '씨'), ('아', '아'),
('애', '애'), ('야', '야'), ('얘', '얘'), ('어', '어'),
('에', '에'), ('여', '여'), ('예', '예'), ('오', '오'),
('와', '와'), ('왜', '왜'), ('외', '외'), ('요', '요'),
('우', '우'), ('워', '워'), ('웨', '웨'), ('위', '위'),
('유', '유'), ('으', '으'), ('의', '의'), ('이', '이'),
('자', '자'), ('재', '재'), ('쟈', '쟈'), ('쟤', '쟤'),
('저', '저'), ('제', '제'), ('져', '져'), ('졔', '졔'),
('조', '조'), ('좌', '좌'), ('좨', '좨'), ('죄', '죄'),
('죠', '죠'), ('주', '주'), ('줘', '줘'), ('줴', '줴'),
('쥐', '쥐'), ('쥬', '쥬'), ('즈', '즈'), ('즤', '즤'),
('지', '지'), ('짜', '짜'), ('째', '째'), ('쨔', '쨔'),
('쨰', '쨰'), ('쩌', '쩌'), ('쩨', '쩨'), ('쪄', '쪄'),
('쪠', '쪠'), ('쪼', '쪼'), ('쫘', '쫘'), ('쫴', '쫴'),
('쬐', '쬐'), ('쬬', '쬬'), ('쭈', '쭈'), ('쭤', '쭤'),
('쮀', '쮀'), ('쮜', '쮜'), ('쮸', '쮸'), ('쯔', '쯔'),
('쯰', '쯰'), ('찌', '찌'), ('차', '차'), ('채', '채'),
('챠', '챠'), ('챼', '챼'), ('처', '처'), ('체', '체'),
('쳐', '쳐'), ('쳬', '쳬'), ('초', '초'), ('촤', '촤'),
('쵀', '쵀'), ('최', '최'), ('쵸', '쵸'), ('추', '추'),
('춰', '춰'), ('췌', '췌'), ('취', '취'), ('츄', '츄'),
('츠', '츠'), ('츼', '츼'), ('치', '치'), ('카', '카'),
('캐', '캐'), ('캬', '캬'), ('컈', '컈'), ('커', '커'),
('케', '케'), ('켜', '켜'), ('켸', '켸'), ('코', '코'),
('콰', '콰'), ('쾌', '쾌'), ('쾨', '쾨'), ('쿄', '쿄'),
('쿠', '쿠'), ('쿼', '쿼'), ('퀘', '퀘'), ('퀴', '퀴'),
('큐', '큐'), ('크', '크'), ('킈', '킈'), ('키', '키'),
('타', '타'), ('태', '태'), ('탸', '탸'), ('턔', '턔'),
('터', '터'), ('테', '테'), ('텨', '텨'), ('톄', '톄'),
('토', '토'), ('톼', '톼'), ('퇘', '퇘'), ('퇴', '퇴'),
('툐', '툐'), ('투', '투'), ('퉈', '퉈'), ('퉤', '퉤'),
('튀', '튀'), ('튜', '튜'), ('트', '트'), ('틔', '틔'),
('티', '티'), ('파', '파'), ('패', '패'), ('퍄', '퍄'),
('퍠', '퍠'), ('퍼', '퍼'), ('페', '페'), ('펴', '펴'),
('폐', '폐'), ('포', '포'), ('퐈', '퐈'), ('퐤', '퐤'),
('푀', '푀'), ('표', '표'), ('푸', '푸'), ('풔', '풔'),
('풰', '풰'), ('퓌', '퓌'), ('퓨', '퓨'), ('프', '프'),
('픠', '픠'), ('피', '피'), ('하', '하'), ('해', '해'),
('햐', '햐'), ('햬', '햬'), ('허', '허'), ('헤', '헤'),
('혀', '혀'), ('혜', '혜'), ('호', '호'), ('화', '화'),
('홰', '홰'), ('회', '회'), ('효', '효'), ('후', '후'),
('훠', '훠'), ('훼', '훼'), ('휘', '휘'), ('휴', '휴'),
('흐', '흐'), ('희', '희'), ('히', '히'),
];
pub const LVT: &'static [(char, char)] = &[
('각', '갛'), ('객', '갷'), ('갹', '걓'), ('걕', '걯'),
('걱', '겋'), ('겍', '겧'), ('격', '곃'), ('곅', '곟'),
('곡', '곻'), ('곽', '괗'), ('괙', '괳'), ('괵', '굏'),
('굑', '굫'), ('국', '궇'), ('궉', '궣'), ('궥', '궿'),
('귁', '귛'), ('귝', '귷'), ('극', '긓'), ('긕', '긯'),
('긱', '깋'), ('깍', '깧'), ('깩', '꺃'), ('꺅', '꺟'),
('꺡', '꺻'), ('꺽', '껗'), ('껙', '껳'), ('껵', '꼏'),
('꼑', '꼫'), ('꼭', '꽇'), ('꽉', '꽣'), ('꽥', '꽿'),
('꾁', '꾛'), ('꾝', '꾷'), ('꾹', '꿓'), ('꿕', '꿯'),
('꿱', '뀋'), ('뀍', '뀧'), ('뀩', '끃'), ('끅', '끟'),
('끡', '끻'), ('끽', '낗'), ('낙', '낳'), ('낵', '냏'),
('냑', '냫'), ('냭', '넇'), ('넉', '넣'), ('넥', '넿'),
('녁', '녛'), ('녝', '녷'), ('녹', '놓'), ('놕', '놯'),
('놱', '뇋'), ('뇍', '뇧'), ('뇩', '눃'), ('눅', '눟'),
('눡', '눻'), ('눽', '뉗'), ('뉙', '뉳'), ('뉵', '늏'),
('늑', '늫'), ('늭', '닇'), ('닉', '닣'), ('닥', '닿'),
('댁', '댛'), ('댝', '댷'), ('댹', '덓'), ('덕', '덯'),
('덱', '뎋'), ('뎍', '뎧'), ('뎩', '돃'), ('독', '돟'),
('돡', '돻'), ('돽', '됗'), ('됙', '됳'), ('됵', '둏'),
('둑', '둫'), ('둭', '뒇'), ('뒉', '뒣'), ('뒥', '뒿'),
('듁', '듛'), ('득', '듷'), ('듹', '딓'), ('딕', '딯'),
('딱', '땋'), ('땍', '땧'), ('땩', '떃'), ('떅', '떟'),
('떡', '떻'), ('떽', '뗗'), ('뗙', '뗳'), ('뗵', '똏'),
('똑', '똫'), ('똭', '뙇'), ('뙉', '뙣'), ('뙥', '뙿'),
('뚁', '뚛'), ('뚝', '뚷'), ('뚹', '뛓'), ('뛕', '뛯'),
('뛱', '뜋'), ('뜍', '뜧'), ('뜩', '띃'), ('띅', '띟'),
('띡', '띻'), ('락', '랗'), ('랙', '랳'), ('략', '럏'),
('럑', '럫'), ('럭', '렇'), ('렉', '렣'), ('력', '렿'),
('롁', '롛'), ('록', '롷'), ('롹', '뢓'), ('뢕', '뢯'),
('뢱', '룋'), ('룍', '룧'), ('룩', '뤃'), ('뤅', '뤟'),
('뤡', '뤻'), ('뤽', '륗'), ('륙', '륳'), ('륵', '릏'),
('릑', '릫'), ('릭', '맇'), ('막', '맣'), ('맥', '맿'),
('먁', '먛'), ('먝', '먷'), ('먹', '멓'), ('멕', '멯'),
('멱', '몋'), ('몍', '몧'), ('목', '뫃'), ('뫅', '뫟'),
('뫡', '뫻'), ('뫽', '묗'), ('묙', '묳'), ('묵', '뭏'),
('뭑', '뭫'), ('뭭', '뮇'), ('뮉', '뮣'), ('뮥', '뮿'),
('믁', '믛'), ('믝', '믷'), ('믹', '밓'), ('박', '밯'),
('백', '뱋'), ('뱍', '뱧'), ('뱩', '벃'), ('벅', '벟'),
('벡', '벻'), ('벽', '볗'), ('볙', '볳'), ('복', '봏'),
('봑', '봫'), ('봭', '뵇'), ('뵉', '뵣'), ('뵥', '뵿'),
('북', '붛'), ('붝', '붷'), ('붹', '뷓'), ('뷕', '뷯'),
('뷱', '븋'), ('븍', '븧'), ('븩', '빃'), ('빅', '빟'),
('빡', '빻'), ('빽', '뺗'), ('뺙', '뺳'), ('뺵', '뻏'),
('뻑', '뻫'), ('뻭', '뼇'), ('뼉', '뼣'), ('뼥', '뼿'),
('뽁', '뽛'), ('뽝', '뽷'), ('뽹', '뾓'), ('뾕', '뾯'),
('뾱', '뿋'), ('뿍', '뿧'), ('뿩', '쀃'), ('쀅', '쀟'),
('쀡', '쀻'), ('쀽', '쁗'), ('쁙', '쁳'), ('쁵', '삏'),
('삑', '삫'), ('삭', '샇'), ('색', '샣'), ('샥', '샿'),
('섁', '섛'), ('석', '섷'), ('섹', '셓'), ('셕', '셯'),
('셱', '솋'), ('속', '솧'), ('솩', '쇃'), ('쇅', '쇟'),
('쇡', '쇻'), ('쇽', '숗'), ('숙', '숳'), ('숵', '쉏'),
('쉑', '쉫'), ('쉭', '슇'), ('슉', '슣'), ('슥', '슿'),
('싁', '싛'), ('식', '싷'), ('싹', '쌓'), ('쌕', '쌯'),
('쌱', '썋'), ('썍', '썧'), ('썩', '쎃'), ('쎅', '쎟'),
('쎡', '쎻'), ('쎽', '쏗'), ('쏙', '쏳'), ('쏵', '쐏'),
('쐑', '쐫'), ('쐭', '쑇'), ('쑉', '쑣'), ('쑥', '쑿'),
('쒁', '쒛'), ('쒝', '쒷'), ('쒹', '쓓'), ('쓕', '쓯'),
('쓱', '씋'), ('씍', '씧'), ('씩', '앃'), ('악', '앟'),
('액', '앻'), ('약', '얗'), ('얙', '얳'), ('억', '엏'),
('엑', '엫'), ('역', '옇'), ('옉', '옣'), ('옥', '옿'),
('왁', '왛'), ('왝', '왷'), ('왹', '욓'), ('욕', '욯'),
('욱', '웋'), ('웍', '웧'), ('웩', '윃'), ('윅', '윟'),
('육', '윻'), ('윽', '읗'), ('읙', '읳'), ('익', '잏'),
('작', '잫'), ('잭', '쟇'), ('쟉', '쟣'), ('쟥', '쟿'),
('적', '젛'), ('젝', '젷'), ('젹', '졓'), ('졕', '졯'),
('족', '좋'), ('좍', '좧'), ('좩', '죃'), ('죅', '죟'),
('죡', '죻'), ('죽', '줗'), ('줙', '줳'), ('줵', '쥏'),
('쥑', '쥫'), ('쥭', '즇'), ('즉', '즣'), ('즥', '즿'),
('직', '짛'), ('짝', '짷'), ('짹', '쨓'), ('쨕', '쨯'),
('쨱', '쩋'), ('쩍', '쩧'), ('쩩', '쪃'), ('쪅', '쪟'),
('쪡', '쪻'), ('쪽', '쫗'), ('쫙', '쫳'), ('쫵', '쬏'),
('쬑', '쬫'), ('쬭', '쭇'), ('쭉', '쭣'), ('쭥', '쭿'),
('쮁', '쮛'), ('쮝', '쮷'), ('쮹', '쯓'), ('쯕', '쯯'),
('쯱', '찋'), ('찍', '찧'), ('착', '챃'), ('책', '챟'),
('챡', '챻'), ('챽', '첗'), ('척', '첳'), ('첵', '쳏'),
('쳑', '쳫'), ('쳭', '촇'), ('촉', '촣'), ('촥', '촿'),
('쵁', '쵛'), ('쵝', '쵷'), ('쵹', '춓'), ('축', '춯'),
('춱', '췋'), ('췍', '췧'), ('췩', '츃'), ('츅', '츟'),
('측', '츻'), ('츽', '칗'), ('칙', '칳'), ('칵', '캏'),
('캑', '캫'), ('캭', '컇'), ('컉', '컣'), ('컥', '컿'),
('켁', '켛'), ('켝', '켷'), ('켹', '콓'), ('콕', '콯'),
('콱', '쾋'), ('쾍', '쾧'), ('쾩', '쿃'), ('쿅', '쿟'),
('쿡', '쿻'), ('쿽', '퀗'), ('퀙', '퀳'), ('퀵', '큏'),
('큑', '큫'), ('큭', '킇'), ('킉', '킣'), ('킥', '킿'),
('탁', '탛'), ('택', '탷'), ('탹', '턓'), ('턕', '턯'),
('턱', '텋'), ('텍', '텧'), ('텩', '톃'), ('톅', '톟'),
('톡', '톻'), ('톽', '퇗'), ('퇙', '퇳'), ('퇵', '툏'),
('툑', '툫'), ('툭', '퉇'), ('퉉', '퉣'), ('퉥', '퉿'),
('튁', '튛'), ('튝', '튷'), ('특', '틓'), ('틕', '틯'),
('틱', '팋'), ('팍', '팧'), ('팩', '퍃'), ('퍅', '퍟'),
('퍡', '퍻'), ('퍽', '펗'), ('펙', '펳'), ('펵', '폏'),
('폑', '폫'), ('폭', '퐇'), ('퐉', '퐣'), ('퐥', '퐿'),
('푁', '푛'), ('푝', '푷'), ('푹', '풓'), ('풕', '풯'),
('풱', '퓋'), ('퓍', '퓧'), ('퓩', '픃'), ('픅', '픟'),
('픡', '픻'), ('픽', '핗'), ('학', '핳'), ('핵', '햏'),
('햑', '햫'), ('햭', '헇'), ('헉', '헣'), ('헥', '헿'),
('혁', '혛'), ('혝', '혷'), ('혹', '홓'), ('확', '홯'),
('홱', '횋'), ('획', '횧'), ('횩', '훃'), ('훅', '훟'),
('훡', '훻'), ('훽', '휗'), ('휙', '휳'), ('휵', '흏'),
('흑', '흫'), ('흭', '힇'), ('힉', '힣'),
];
pub const PREPEND: &'static [(char, char)] = &[
('\u{600}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'),
('\u{8e2}', '\u{8e2}'), ('ൎ', 'ൎ'), ('\u{110bd}', '\u{110bd}'),
('\u{110cd}', '\u{110cd}'), ('𑇂', '𑇃'), ('𑨺', '𑨺'),
('𑪆', '𑪉'), ('𑵆', '𑵆'),
];
pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[
('🇦', '🇿'),
];
pub const SPACINGMARK: &'static [(char, char)] = &[
('', ''), ('ऻ', 'ऻ'), ('ा', 'ी'), ('ॉ', 'ौ'),
('ॎ', 'ॏ'), ('ং', 'ঃ'), ('ি', 'ী'), ('ে', 'ৈ'),
('ো', 'ৌ'), ('ਃ', 'ਃ'), ('ਾ', 'ੀ'), ('', ''),
('ા', 'ી'), ('ૉ', 'ૉ'), ('ો', 'ૌ'), ('ଂ', ''),
('ୀ', 'ୀ'), ('େ', 'ୈ'), ('ୋ', 'ୌ'), ('ி', 'ி'),
('ு', 'ூ'), ('ெ', 'ை'), ('ொ', 'ௌ'), ('ఁ', 'ః'),
('ు', 'ౄ'), ('', 'ಃ'), ('ಾ', 'ಾ'), ('ೀ', 'ು'),
('ೃ', 'ೄ'), ('ೇ', 'ೈ'), ('ೊ', 'ೋ'), ('', 'ഃ'),
('ി', 'ീ'), ('െ', 'ൈ'), ('ൊ', 'ൌ'), ('', 'ඃ'),
('ැ', 'ෑ'), ('ෘ', 'ෞ'), ('ෲ', 'ෳ'), ('ำ', 'ำ'),
('ຳ', 'ຳ'), ('༾', '༿'), ('ཿ', 'ཿ'), ('ေ', 'ေ'),
('ျ', 'ြ'), ('ၖ', 'ၗ'), ('ႄ', 'ႄ'), ('ា', 'ា'),
('ើ', 'ៅ'), ('ះ', 'ៈ'), ('ᤣ', 'ᤦ'), ('ᤩ', 'ᤫ'),
('ᤰ', 'ᤱ'), ('ᤳ', 'ᤸ'), ('ᨙ', 'ᨚ'), ('ᩕ', 'ᩕ'),
('ᩗ', 'ᩗ'), ('ᩭ', 'ᩲ'), ('ᬄ', 'ᬄ'), ('ᬵ', 'ᬵ'),
('ᬻ', 'ᬻ'), ('ᬽ', 'ᭁ'), ('ᭃ', '᭄'), ('ᮂ', 'ᮂ'),
('ᮡ', 'ᮡ'), ('ᮦ', 'ᮧ'), ('᮪', '᮪'), ('ᯧ', 'ᯧ'),
('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), ('᯲', '᯳'), ('ᰤ', 'ᰫ'),
('ᰴ', 'ᰵ'), ('᳡', '᳡'), ('ᳲ', 'ᳳ'), ('᳷', '᳷'),
('ꠣ', 'ꠤ'), ('ꠧ', 'ꠧ'), ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣃ'),
('ꥒ', '꥓'), ('ꦃ', 'ꦃ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'),
('ꦽ', '꧀'), ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'), ('ꩍ', 'ꩍ'),
('ꫫ', 'ꫫ'), ('ꫮ', 'ꫯ'), ('ꫵ', 'ꫵ'), ('ꯣ', 'ꯤ'),
('ꯦ', 'ꯧ'), ('ꯩ', 'ꯪ'), ('꯬', '꯬'), ('𑀀', '𑀀'),
('𑀂', '𑀂'), ('𑂂', '𑂂'), ('𑂰', '𑂲'), ('𑂷', '𑂸'),
('𑄬', '𑄬'), ('𑅅', '𑅆'), ('𑆂', '𑆂'), ('𑆳', '𑆵'),
('𑆿', '𑇀'), ('𑈬', '𑈮'), ('𑈲', '𑈳'), ('𑈵', '𑈵'),
('𑋠', '𑋢'), ('𑌂', '𑌃'), ('𑌿', '𑌿'), ('𑍁', '𑍄'),
('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍢', '𑍣'), ('𑐵', '𑐷'),
('𑑀', '𑑁'), ('𑑅', '𑑅'), ('𑒱', '𑒲'), ('𑒹', '𑒹'),
('𑒻', '𑒼'), ('𑒾', '𑒾'), ('𑓁', '𑓁'), ('𑖰', '𑖱'),
('𑖸', '𑖻'), ('𑖾', '𑖾'), ('𑘰', '𑘲'), ('𑘻', '𑘼'),
('𑘾', '𑘾'), ('𑚬', '𑚬'), ('𑚮', '𑚯'), ('𑚶', '𑚶'),
('𑜠', '𑜡'), ('𑜦', '𑜦'), ('𑠬', '𑠮'), ('𑠸', '𑠸'),
('𑨹', '𑨹'), ('𑩗', '𑩘'), ('𑪗', '𑪗'), ('𑰯', '𑰯'),
('𑰾', '𑰾'), ('𑲩', '𑲩'), ('𑲱', '𑲱'), ('𑲴', '𑲴'),
('𑶊', '𑶎'), ('𑶓', '𑶔'), ('𑶖', '𑶖'), ('𑻵', '𑻶'),
('𖽑', '𖽾'), ('𝅦', '𝅦'), ('𝅭', '𝅭'),
];
pub const T: &'static [(char, char)] = &[
('ᆨ', 'ᇿ'), ('ퟋ', 'ퟻ'),
];
pub const V: &'static [(char, char)] = &[
('', 'ᆧ'), ('ힰ', 'ퟆ'),
];
pub const ZWJ: &'static [(char, char)] = &[
('\u{200d}', '\u{200d}'),
];
+3
View File
@@ -1,9 +1,12 @@
pub mod age;
pub mod case_folding_simple;
pub mod general_category;
pub mod grapheme_cluster_break;
pub mod perl_word;
pub mod property_bool;
pub mod property_names;
pub mod property_values;
pub mod script_extension;
pub mod script;
pub mod sentence_break;
pub mod word_break;
File diff suppressed because it is too large Load Diff
@@ -1,6 +1,6 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate property-names tmp/ucd-11.0.0/
// ucd-generate property-names /home/andrew/tmp/ucd-11.0.0/
//
// ucd-generate is available on crates.io.
@@ -47,13 +47,17 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
("di", "Default_Ignorable_Code_Point"), ("dia", "Diacritic"),
("diacritic", "Diacritic"), ("dm", "Decomposition_Mapping"),
("dt", "Decomposition_Type"), ("ea", "East_Asian_Width"),
("eastasianwidth", "East_Asian_Width"),
("eastasianwidth", "East_Asian_Width"), ("emoji", "Emoji"),
("emojicomponent", "Emoji_Component"), ("emojimodifier", "Emoji_Modifier"),
("emojimodifierbase", "Emoji_Modifier_Base"),
("emojipresentation", "Emoji_Presentation"),
("equideo", "Equivalent_Unified_Ideograph"),
("equivalentunifiedideograph", "Equivalent_Unified_Ideograph"),
("expandsonnfc", "Expands_On_NFC"), ("expandsonnfd", "Expands_On_NFD"),
("expandsonnfkc", "Expands_On_NFKC"), ("expandsonnfkd", "Expands_On_NFKD"),
("ext", "Extender"), ("extender", "Extender"),
("fcnfkc", "FC_NFKC_Closure"), ("fcnfkcclosure", "FC_NFKC_Closure"),
("ext", "Extender"), ("extendedpictographic", "Extended_Pictographic"),
("extender", "Extender"), ("fcnfkc", "FC_NFKC_Closure"),
("fcnfkcclosure", "FC_NFKC_Closure"),
("fullcompositionexclusion", "Full_Composition_Exclusion"),
("gc", "General_Category"), ("gcb", "Grapheme_Cluster_Break"),
("generalcategory", "General_Category"), ("graphemebase", "Grapheme_Base"),
@@ -1,6 +1,6 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate property-values tmp/ucd-11.0.0/ --include gc,script,scx,age
// ucd-generate property-values /home/andrew/tmp/ucd-11.0.0/ --include gc,script,scx,age,gcb,wb,sb
//
// ucd-generate is available on crates.io.
@@ -57,6 +57,17 @@ pub const PROPERTY_VALUES: &'static [(&'static str, &'static [(&'static str, &'s
("zl", "Line_Separator"), ("zp", "Paragraph_Separator"),
("zs", "Space_Separator"), ]),
("Grapheme_Cluster_Break", &[("cn", "Control"), ("control", "Control"),
("cr", "CR"), ("eb", "E_Base"), ("ebase", "E_Base"),
("ebasegaz", "E_Base_GAZ"), ("ebg", "E_Base_GAZ"), ("em", "E_Modifier"),
("emodifier", "E_Modifier"), ("ex", "Extend"), ("extend", "Extend"),
("gaz", "Glue_After_Zwj"), ("glueafterzwj", "Glue_After_Zwj"), ("l", "L"),
("lf", "LF"), ("lv", "LV"), ("lvt", "LVT"), ("other", "Other"),
("pp", "Prepend"), ("prepend", "Prepend"),
("regionalindicator", "Regional_Indicator"), ("ri", "Regional_Indicator"),
("sm", "SpacingMark"), ("spacingmark", "SpacingMark"), ("t", "T"),
("v", "V"), ("xx", "Other"), ("zwj", "ZWJ"), ]),
("Script", &[("adlam", "Adlam"), ("adlm", "Adlam"),
("aghb", "Caucasian_Albanian"), ("ahom", "Ahom"),
("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), ("arab", "Arabic"),
@@ -286,4 +297,30 @@ pub const PROPERTY_VALUES: &'static [(&'static str, &'static [(&'static str, &'s
("yiii", "Yi"), ("zanabazarsquare", "Zanabazar_Square"),
("zanb", "Zanabazar_Square"), ("zinh", "Inherited"), ("zyyy", "Common"),
("zzzz", "Unknown"), ]),
("Sentence_Break", &[("at", "ATerm"), ("aterm", "ATerm"), ("cl", "Close"),
("close", "Close"), ("cr", "CR"), ("ex", "Extend"), ("extend", "Extend"),
("fo", "Format"), ("format", "Format"), ("le", "OLetter"), ("lf", "LF"),
("lo", "Lower"), ("lower", "Lower"), ("nu", "Numeric"),
("numeric", "Numeric"), ("oletter", "OLetter"), ("other", "Other"),
("sc", "SContinue"), ("scontinue", "SContinue"), ("se", "Sep"),
("sep", "Sep"), ("sp", "Sp"), ("st", "STerm"), ("sterm", "STerm"),
("up", "Upper"), ("upper", "Upper"), ("xx", "Other"), ]),
("Word_Break", &[("aletter", "ALetter"), ("cr", "CR"),
("doublequote", "Double_Quote"), ("dq", "Double_Quote"), ("eb", "E_Base"),
("ebase", "E_Base"), ("ebasegaz", "E_Base_GAZ"), ("ebg", "E_Base_GAZ"),
("em", "E_Modifier"), ("emodifier", "E_Modifier"), ("ex", "ExtendNumLet"),
("extend", "Extend"), ("extendnumlet", "ExtendNumLet"), ("fo", "Format"),
("format", "Format"), ("gaz", "Glue_After_Zwj"),
("glueafterzwj", "Glue_After_Zwj"), ("hebrewletter", "Hebrew_Letter"),
("hl", "Hebrew_Letter"), ("ka", "Katakana"), ("katakana", "Katakana"),
("le", "ALetter"), ("lf", "LF"), ("mb", "MidNumLet"),
("midletter", "MidLetter"), ("midnum", "MidNum"),
("midnumlet", "MidNumLet"), ("ml", "MidLetter"), ("mn", "MidNum"),
("newline", "Newline"), ("nl", "Newline"), ("nu", "Numeric"),
("numeric", "Numeric"), ("other", "Other"),
("regionalindicator", "Regional_Indicator"), ("ri", "Regional_Indicator"),
("singlequote", "Single_Quote"), ("sq", "Single_Quote"),
("wsegspace", "WSegSpace"), ("xx", "Other"), ("zwj", "ZWJ"), ]),
];
@@ -0,0 +1,642 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate sentence-break /home/andrew/tmp/ucd-11.0.0/ --chars
//
// ucd-generate is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("ATerm", ATERM), ("CR", CR), ("Close", CLOSE), ("Extend", EXTEND),
("Format", FORMAT), ("LF", LF), ("Lower", LOWER), ("Numeric", NUMERIC),
("OLetter", OLETTER), ("SContinue", SCONTINUE), ("STerm", STERM),
("Sep", SEP), ("Sp", SP), ("Upper", UPPER),
];
pub const ATERM: &'static [(char, char)] = &[
('.', '.'), ('', ''), ('﹒', '﹒'), ('', ''),
];
pub const CR: &'static [(char, char)] = &[
('\r', '\r'),
];
pub const CLOSE: &'static [(char, char)] = &[
('\"', '\"'), ('\'', ')'), ('[', '['), (']', ']'), ('{', '{'), ('}', '}'),
('«', '«'), ('»', '»'), ('༺', '༽'), ('᚛', '᚜'), ('', '‟'),
('', ''), ('⁅', '⁆'), ('⁽', '⁾'), ('₍', '₎'),
('⌈', '⌋'), ('〈', '〉'), ('❛', '❠'), ('', ''),
('⟅', '⟆'), ('⟦', '⟯'), ('⦃', '⦘'), ('⧘', '⧛'),
('⧼', '⧽'), ('⸀', '⸍'), ('⸜', '⸝'), ('⸠', '⸩'),
('⹂', '⹂'), ('〈', '】'), ('', '〛'), ('〝', '〟'),
('', '﴿'), ('︗', '︘'), ('︵', '﹄'), ('﹇', '﹈'),
('﹙', '﹞'), ('', ''), ('', ''), ('', ''),
('', ''), ('', ''), ('⦅', '⦆'), ('「', '」'),
('🙶', '🙸'),
];
pub const EXTEND: &'static [(char, char)] = &[
('\u{300}', '\u{36f}'), ('\u{483}', '\u{489}'), ('\u{591}', '\u{5bd}'),
('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
('\u{7fd}', '\u{7fd}'), ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'),
('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'),
('\u{8d3}', '\u{8e1}'), ('\u{8e3}', ''), ('\u{93a}', '\u{93c}'),
('ा', 'ॏ'), ('\u{951}', '\u{957}'), ('\u{962}', '\u{963}'),
('\u{981}', 'ঃ'), ('\u{9bc}', '\u{9bc}'), ('\u{9be}', '\u{9c4}'),
('ে', 'ৈ'), ('ো', '\u{9cd}'), ('\u{9d7}', '\u{9d7}'),
('\u{9e2}', '\u{9e3}'), ('\u{9fe}', '\u{9fe}'), ('\u{a01}', 'ਃ'),
('\u{a3c}', '\u{a3c}'), ('ਾ', '\u{a42}'), ('\u{a47}', '\u{a48}'),
('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('\u{a70}', '\u{a71}'),
('\u{a75}', '\u{a75}'), ('\u{a81}', ''), ('\u{abc}', '\u{abc}'),
('ા', '\u{ac5}'), ('\u{ac7}', 'ૉ'), ('ો', '\u{acd}'),
('\u{ae2}', '\u{ae3}'), ('\u{afa}', '\u{aff}'), ('\u{b01}', ''),
('\u{b3c}', '\u{b3c}'), ('\u{b3e}', '\u{b44}'), ('େ', 'ୈ'),
('ୋ', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b62}', '\u{b63}'),
('\u{b82}', '\u{b82}'), ('\u{bbe}', 'ூ'), ('ெ', 'ை'),
('ொ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'),
('\u{c3e}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'),
('\u{c55}', '\u{c56}'), ('\u{c62}', '\u{c63}'), ('\u{c81}', 'ಃ'),
('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), ('\u{cc6}', 'ೈ'),
('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'),
('\u{d00}', 'ഃ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'),
('െ', 'ൈ'), ('ൊ', '\u{d4d}'), ('\u{d57}', '\u{d57}'),
('\u{d62}', '\u{d63}'), ('', 'ඃ'), ('\u{dca}', '\u{dca}'),
('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'), ('ෘ', '\u{ddf}'),
('ෲ', 'ෳ'), ('\u{e31}', '\u{e31}'), ('\u{e34}', '\u{e3a}'),
('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{eb9}'),
('\u{ebb}', '\u{ebc}'), ('\u{ec8}', '\u{ecd}'), ('\u{f18}', '\u{f19}'),
('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'),
('༾', '༿'), ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f87}'),
('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'),
('ါ', '\u{103e}'), ('ၖ', '\u{1059}'), ('\u{105e}', '\u{1060}'),
('ၢ', 'ၤ'), ('ၧ', 'ၭ'), ('\u{1071}', '\u{1074}'),
('\u{1082}', '\u{108d}'), ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'),
('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'),
('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'),
('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'),
('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'),
('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ᤫ'),
('ᤰ', '\u{193b}'), ('\u{1a17}', '\u{1a1b}'), ('ᩕ', '\u{1a5e}'),
('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'),
('\u{1ab0}', '\u{1abe}'), ('\u{1b00}', 'ᬄ'), ('\u{1b34}', '᭄'),
('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'),
('\u{1be6}', '᯳'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'),
('\u{1cd4}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('ᳲ', '\u{1cf4}'),
('᳷', '\u{1cf9}'), ('\u{1dc0}', '\u{1df9}'), ('\u{1dfb}', '\u{1dff}'),
('\u{200c}', '\u{200d}'), ('\u{20d0}', '\u{20f0}'),
('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'),
('\u{2de0}', '\u{2dff}'), ('\u{302a}', '\u{302f}'),
('\u{3099}', '\u{309a}'), ('\u{a66f}', '\u{a672}'),
('\u{a674}', '\u{a67d}'), ('\u{a69e}', '\u{a69f}'),
('\u{a6f0}', '\u{a6f1}'), ('\u{a802}', '\u{a802}'),
('\u{a806}', '\u{a806}'), ('\u{a80b}', '\u{a80b}'), ('ꠣ', 'ꠧ'),
('ꢀ', 'ꢁ'), ('ꢴ', '\u{a8c5}'), ('\u{a8e0}', '\u{a8f1}'),
('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '꥓'),
('\u{a980}', 'ꦃ'), ('\u{a9b3}', '꧀'), ('\u{a9e5}', '\u{a9e5}'),
('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), ('\u{aa4c}', 'ꩍ'),
('ꩻ', 'ꩽ'), ('\u{aab0}', '\u{aab0}'), ('\u{aab2}', '\u{aab4}'),
('\u{aab7}', '\u{aab8}'), ('\u{aabe}', '\u{aabf}'),
('\u{aac1}', '\u{aac1}'), ('ꫫ', 'ꫯ'), ('ꫵ', '\u{aaf6}'),
('ꯣ', 'ꯪ'), ('꯬', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'),
('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'),
('\u{ff9e}', '\u{ff9f}'), ('\u{101fd}', '\u{101fd}'),
('\u{102e0}', '\u{102e0}'), ('\u{10376}', '\u{1037a}'),
('\u{10a01}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'),
('\u{10a0c}', '\u{10a0f}'), ('\u{10a38}', '\u{10a3a}'),
('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'),
('\u{10d24}', '\u{10d27}'), ('\u{10f46}', '\u{10f50}'), ('𑀀', '𑀂'),
('\u{11038}', '\u{11046}'), ('\u{1107f}', '𑂂'), ('𑂰', '\u{110ba}'),
('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('𑅅', '𑅆'),
('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), ('𑆳', '𑇀'),
('\u{111c9}', '\u{111cc}'), ('𑈬', '\u{11237}'),
('\u{1123e}', '\u{1123e}'), ('\u{112df}', '\u{112ea}'),
('\u{11300}', '𑌃'), ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'),
('𑍇', '𑍈'), ('𑍋', '𑍍'), ('\u{11357}', '\u{11357}'),
('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'),
('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'),
('\u{114b0}', '\u{114c3}'), ('\u{115af}', '\u{115b5}'),
('𑖸', '\u{115c0}'), ('\u{115dc}', '\u{115dd}'), ('𑘰', '\u{11640}'),
('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'),
('𑠬', '\u{1183a}'), ('\u{11a01}', '\u{11a0a}'), ('\u{11a33}', '𑨹'),
('\u{11a3b}', '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'),
('\u{11a51}', '\u{11a5b}'), ('\u{11a8a}', '\u{11a99}'),
('𑰯', '\u{11c36}'), ('\u{11c38}', '\u{11c3f}'),
('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}'),
('\u{11d31}', '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'),
('\u{11d3c}', '\u{11d3d}'), ('\u{11d3f}', '\u{11d45}'),
('\u{11d47}', '\u{11d47}'), ('𑶊', '𑶎'), ('\u{11d90}', '\u{11d91}'),
('𑶓', '\u{11d97}'), ('\u{11ef3}', '𑻶'), ('\u{16af0}', '\u{16af4}'),
('\u{16b30}', '\u{16b36}'), ('𖽑', '𖽾'), ('\u{16f8f}', '\u{16f92}'),
('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d165}', '\u{1d169}'),
('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'),
('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'),
('\u{1d242}', '\u{1d244}'), ('\u{1da00}', '\u{1da36}'),
('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'),
('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'),
('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}', '\u{1e006}'),
('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'),
('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'),
('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'),
('\u{e0020}', '\u{e007f}'), ('\u{e0100}', '\u{e01ef}'),
];
pub const FORMAT: &'static [(char, char)] = &[
('\u{ad}', '\u{ad}'), ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'),
('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), ('\u{8e2}', '\u{8e2}'),
('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200b}'),
('\u{200e}', '\u{200f}'), ('\u{202a}', '\u{202e}'),
('\u{2060}', '\u{2064}'), ('\u{2066}', '\u{206f}'),
('\u{feff}', '\u{feff}'), ('\u{fff9}', '\u{fffb}'),
('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'),
('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'),
('\u{e0001}', '\u{e0001}'),
];
pub const LF: &'static [(char, char)] = &[
('\n', '\n'),
];
pub const LOWER: &'static [(char, char)] = &[
('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'), ('ß', 'ö'),
('ø', 'ÿ'), ('ā', 'ā'), ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'),
('ĉ', 'ĉ'), ('ċ', 'ċ'), ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'),
('ē', 'ē'), ('ĕ', 'ĕ'), ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'),
('ĝ', 'ĝ'), ('ğ', 'ğ'), ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'),
('ħ', 'ħ'), ('ĩ', 'ĩ'), ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'),
('ı', 'ı'), ('ij', 'ij'), ('ĵ', 'ĵ'), ('ķ', 'ĸ'), ('ĺ', 'ĺ'),
('ļ', 'ļ'), ('ľ', 'ľ'), ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'),
('ņ', 'ņ'), ('ň', 'ʼn'), ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'),
('ő', 'ő'), ('œ', 'œ'), ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'),
('ś', 'ś'), ('ŝ', 'ŝ'), ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'),
('ť', 'ť'), ('ŧ', 'ŧ'), ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'),
('ů', 'ů'), ('ű', 'ű'), ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'),
('ź', 'ź'), ('ż', 'ż'), ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'),
('ƈ', 'ƈ'), ('ƌ', 'ƍ'), ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƛ'),
('ƞ', 'ƞ'), ('ơ', 'ơ'), ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'),
('ƪ', 'ƫ'), ('ƭ', 'ƭ'), ('ư', 'ư'), ('ƴ', 'ƴ'), ('ƶ', 'ƶ'),
('ƹ', 'ƺ'), ('ƽ', 'ƿ'), ('dž', 'dž'), ('lj', 'lj'), ('nj', 'nj'),
('ǎ', 'ǎ'), ('ǐ', 'ǐ'), ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'),
('ǘ', 'ǘ'), ('ǚ', 'ǚ'), ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'),
('ǣ', 'ǣ'), ('ǥ', 'ǥ'), ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'),
('ǭ', 'ǭ'), ('ǯ', 'ǰ'), ('dz', 'dz'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'),
('ǻ', 'ǻ'), ('ǽ', 'ǽ'), ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'),
('ȅ', 'ȅ'), ('ȇ', 'ȇ'), ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'),
('ȏ', 'ȏ'), ('ȑ', 'ȑ'), ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'),
('ș', 'ș'), ('ț', 'ț'), ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȡ', 'ȡ'),
('ȣ', 'ȣ'), ('ȥ', 'ȥ'), ('ȧ', 'ȧ'), ('ȩ', 'ȩ'), ('ȫ', 'ȫ'),
('ȭ', 'ȭ'), ('ȯ', 'ȯ'), ('ȱ', 'ȱ'), ('ȳ', 'ȹ'), ('ȼ', 'ȼ'),
('ȿ', 'ɀ'), ('ɂ', 'ɂ'), ('ɇ', 'ɇ'), ('ɉ', 'ɉ'), ('ɋ', 'ɋ'),
('ɍ', 'ɍ'), ('ɏ', 'ʓ'), ('ʕ', 'ʸ'), ('ˀ', 'ˁ'), ('ˠ', 'ˤ'),
('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('ΐ', 'ΐ'),
('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'), ('ϙ', 'ϙ'), ('ϛ', 'ϛ'),
('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('ϣ', 'ϣ'), ('ϥ', 'ϥ'),
('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'), ('ϭ', 'ϭ'), ('ϯ', 'ϳ'),
('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϼ'), ('а', 'џ'), ('ѡ', 'ѡ'),
('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'), ('ѩ', 'ѩ'), ('ѫ', 'ѫ'),
('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'), ('ѳ', 'ѳ'), ('ѵ', 'ѵ'),
('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'), ('ѽ', 'ѽ'), ('ѿ', 'ѿ'),
('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'), ('ҏ', 'ҏ'), ('ґ', 'ґ'),
('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'), ('ҙ', 'ҙ'), ('қ', 'қ'),
('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'), ('ң', 'ң'), ('ҥ', 'ҥ'),
('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'), ('ҭ', 'ҭ'), ('ү', 'ү'),
('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'), ('ҷ', 'ҷ'), ('ҹ', 'ҹ'),
('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'), ('ӂ', 'ӂ'), ('ӄ', 'ӄ'),
('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'), ('ӌ', 'ӌ'), ('ӎ', 'ӏ'),
('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'), ('ӗ', 'ӗ'), ('ә', 'ә'),
('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'), ('ӡ', 'ӡ'), ('ӣ', 'ӣ'),
('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'), ('ӫ', 'ӫ'), ('ӭ', 'ӭ'),
('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'), ('ӵ', 'ӵ'), ('ӷ', 'ӷ'),
('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'), ('ӿ', 'ӿ'), ('ԁ', 'ԁ'),
('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'), ('ԉ', 'ԉ'), ('ԋ', 'ԋ'),
('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'), ('ԓ', 'ԓ'), ('ԕ', 'ԕ'),
('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'), ('ԝ', 'ԝ'), ('ԟ', 'ԟ'),
('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'), ('ԧ', 'ԧ'), ('ԩ', 'ԩ'),
('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ՠ', 'ֈ'), ('ა', 'ჺ'),
('ჽ', ''), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᶿ'),
('ḁ', 'ḁ'), ('ḃ', 'ḃ'), ('ḅ', 'ḅ'), ('ḇ', 'ḇ'),
('ḉ', 'ḉ'), ('ḋ', 'ḋ'), ('ḍ', 'ḍ'), ('ḏ', 'ḏ'),
('ḑ', 'ḑ'), ('ḓ', 'ḓ'), ('ḕ', 'ḕ'), ('ḗ', 'ḗ'),
('ḙ', 'ḙ'), ('ḛ', 'ḛ'), ('ḝ', 'ḝ'), ('ḟ', 'ḟ'),
('ḡ', 'ḡ'), ('ḣ', 'ḣ'), ('ḥ', 'ḥ'), ('ḧ', 'ḧ'),
('ḩ', 'ḩ'), ('ḫ', 'ḫ'), ('ḭ', 'ḭ'), ('ḯ', 'ḯ'),
('ḱ', 'ḱ'), ('ḳ', 'ḳ'), ('ḵ', 'ḵ'), ('ḷ', 'ḷ'),
('ḹ', 'ḹ'), ('ḻ', 'ḻ'), ('ḽ', 'ḽ'), ('ḿ', 'ḿ'),
('ṁ', 'ṁ'), ('ṃ', 'ṃ'), ('ṅ', 'ṅ'), ('ṇ', 'ṇ'),
('ṉ', 'ṉ'), ('ṋ', 'ṋ'), ('ṍ', 'ṍ'), ('ṏ', 'ṏ'),
('ṑ', 'ṑ'), ('ṓ', 'ṓ'), ('ṕ', 'ṕ'), ('ṗ', 'ṗ'),
('ṙ', 'ṙ'), ('ṛ', 'ṛ'), ('ṝ', 'ṝ'), ('ṟ', 'ṟ'),
('ṡ', 'ṡ'), ('ṣ', 'ṣ'), ('ṥ', 'ṥ'), ('ṧ', 'ṧ'),
('ṩ', 'ṩ'), ('ṫ', 'ṫ'), ('ṭ', 'ṭ'), ('ṯ', 'ṯ'),
('ṱ', 'ṱ'), ('ṳ', 'ṳ'), ('ṵ', 'ṵ'), ('ṷ', 'ṷ'),
('ṹ', 'ṹ'), ('ṻ', 'ṻ'), ('ṽ', 'ṽ'), ('ṿ', 'ṿ'),
('ẁ', 'ẁ'), ('ẃ', 'ẃ'), ('ẅ', 'ẅ'), ('ẇ', 'ẇ'),
('ẉ', 'ẉ'), ('ẋ', 'ẋ'), ('ẍ', 'ẍ'), ('ẏ', 'ẏ'),
('ẑ', 'ẑ'), ('ẓ', 'ẓ'), ('ẕ', ''), ('ẟ', 'ẟ'),
('ạ', 'ạ'), ('ả', 'ả'), ('ấ', 'ấ'), ('ầ', 'ầ'),
('ẩ', 'ẩ'), ('ẫ', 'ẫ'), ('ậ', 'ậ'), ('ắ', 'ắ'),
('ằ', 'ằ'), ('ẳ', 'ẳ'), ('ẵ', 'ẵ'), ('ặ', 'ặ'),
('ẹ', 'ẹ'), ('ẻ', 'ẻ'), ('ẽ', 'ẽ'), ('ế', 'ế'),
('ề', 'ề'), ('ể', 'ể'), ('ễ', 'ễ'), ('ệ', 'ệ'),
('ỉ', 'ỉ'), ('ị', 'ị'), ('ọ', 'ọ'), ('ỏ', 'ỏ'),
('ố', 'ố'), ('ồ', 'ồ'), ('ổ', 'ổ'), ('ỗ', 'ỗ'),
('ộ', 'ộ'), ('ớ', 'ớ'), ('ờ', 'ờ'), ('ở', 'ở'),
('ỡ', 'ỡ'), ('ợ', 'ợ'), ('ụ', 'ụ'), ('ủ', 'ủ'),
('ứ', 'ứ'), ('ừ', 'ừ'), ('ử', 'ử'), ('ữ', 'ữ'),
('ự', 'ự'), ('ỳ', 'ỳ'), ('ỵ', 'ỵ'), ('ỷ', 'ỷ'),
('ỹ', 'ỹ'), ('ỻ', 'ỻ'), ('ỽ', 'ỽ'), ('ỿ', 'ἇ'),
('ἐ', 'ἕ'), ('ἠ', 'ἧ'), ('ἰ', 'ἷ'), ('ὀ', 'ὅ'),
('ὐ', 'ὗ'), ('ὠ', 'ὧ'), ('ὰ', 'ώ'), ('ᾀ', 'ᾇ'),
('ᾐ', 'ᾗ'), ('ᾠ', 'ᾧ'), ('ᾰ', 'ᾴ'), ('ᾶ', 'ᾷ'),
('', ''), ('ῂ', 'ῄ'), ('ῆ', 'ῇ'), ('ῐ', 'ΐ'),
('ῖ', 'ῗ'), ('ῠ', 'ῧ'), ('ῲ', 'ῴ'), ('ῶ', 'ῷ'),
('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('', ''),
('', 'ℏ'), ('', ''), ('', ''), ('', ''),
('', ''), ('ℼ', ''), ('', ''), ('ⅎ', 'ⅎ'),
('', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), ('ⰰ', 'ⱞ'),
('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), ('ⱪ', 'ⱪ'),
('ⱬ', 'ⱬ'), ('ⱱ', 'ⱱ'), ('ⱳ', 'ⱴ'), ('ⱶ', 'ⱽ'),
('ⲁ', 'ⲁ'), ('ⲃ', 'ⲃ'), ('', ''), ('ⲇ', 'ⲇ'),
('ⲉ', 'ⲉ'), ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'),
('ⲑ', 'ⲑ'), ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'),
('ⲙ', 'ⲙ'), ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('', ''),
('ⲡ', 'ⲡ'), ('', ''), ('', ''), ('ⲧ', 'ⲧ'),
('ⲩ', 'ⲩ'), ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'),
('ⲱ', 'ⲱ'), ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'),
('ⲹ', 'ⲹ'), ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'),
('ⳁ', 'ⳁ'), ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'),
('ⳉ', 'ⳉ'), ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'),
('ⳑ', 'ⳑ'), ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'),
('ⳙ', 'ⳙ'), ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'),
('ⳡ', 'ⳡ'), ('ⳣ', 'ⳤ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'),
('ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
('ꙁ', 'ꙁ'), ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('', ''),
('ꙉ', 'ꙉ'), ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'),
('ꙑ', 'ꙑ'), ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'),
('ꙙ', 'ꙙ'), ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'),
('ꙡ', 'ꙡ'), ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'),
('ꙩ', 'ꙩ'), ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'),
('ꚃ', 'ꚃ'), ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'),
('ꚋ', 'ꚋ'), ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'),
('ꚓ', 'ꚓ'), ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'),
('ꚛ', 'ꚝ'), ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'),
('ꜩ', 'ꜩ'), ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', ''),
('ꜳ', 'ꜳ'), ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'),
('ꜻ', 'ꜻ'), ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'),
('ꝃ', 'ꝃ'), ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'),
('ꝋ', 'ꝋ'), ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'),
('ꝓ', 'ꝓ'), ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'),
('ꝛ', 'ꝛ'), ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'),
('ꝣ', 'ꝣ'), ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'),
('ꝫ', 'ꝫ'), ('ꝭ', 'ꝭ'), ('ꝯ', ''), ('ꝺ', 'ꝺ'),
('ꝼ', 'ꝼ'), ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'), ('ꞃ', 'ꞃ'),
('ꞅ', 'ꞅ'), ('ꞇ', 'ꞇ'), ('', ''), ('ꞎ', 'ꞎ'),
('ꞑ', 'ꞑ'), ('ꞓ', 'ꞕ'), ('ꞗ', 'ꞗ'), ('', ''),
('ꞛ', 'ꞛ'), ('ꞝ', 'ꞝ'), ('', ''), ('ꞡ', 'ꞡ'),
('ꞣ', 'ꞣ'), ('ꞥ', 'ꞥ'), ('ꞧ', 'ꞧ'), ('ꞩ', 'ꞩ'),
('ꞯ', 'ꞯ'), ('ꞵ', 'ꞵ'), ('ꞷ', 'ꞷ'), ('ꞹ', 'ꞹ'),
('ꟸ', 'ꟺ'), ('ꬰ', ''), ('ꭜ', 'ꭥ'), ('ꭰ', 'ꮿ'),
('ff', 'st'), ('ﬓ', 'ﬗ'), ('', ''), ('𐐨', '𐑏'),
('𐓘', '𐓻'), ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𖹠', '𖹿'),
('𝐚', '𝐳'), ('𝑎', '𝑔'), ('𝑖', '𝑧'), ('𝒂', '𝒛'),
('𝒶', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝓏'),
('𝓪', '𝔃'), ('𝔞', '𝔷'), ('𝕒', '𝕫'), ('𝖆', '𝖟'),
('𝖺', '𝗓'), ('𝗮', '𝘇'), ('𝘢', '𝘻'), ('𝙖', '𝙯'),
('𝚊', '𝚥'), ('𝛂', '𝛚'), ('𝛜', '𝛡'), ('𝛼', '𝜔'),
('𝜖', '𝜛'), ('𝜶', '𝝎'), ('𝝐', '𝝕'), ('𝝰', '𝞈'),
('𝞊', '𝞏'), ('𝞪', '𝟂'), ('𝟄', '𝟉'), ('𝟋', '𝟋'),
('𞤢', '𞥃'),
];
pub const NUMERIC: &'static [(char, char)] = &[
('0', '9'), ('٠', '٩'), ('٫', '٬'), ('۰', '۹'), ('߀', '߉'),
('', '९'), ('', '৯'), ('', '੯'), ('', '૯'),
('', '୯'), ('', '௯'), ('', '౯'), ('', '೯'),
('', '൯'), ('෦', '෯'), ('', '๙'), ('', '໙'),
('༠', '༩'), ('', '၉'), ('႐', '႙'), ('០', '៩'),
('᠐', '᠙'), ('᥆', '᥏'), ('᧐', '᧙'), ('᪀', '᪉'),
('᪐', '᪙'), ('᭐', '᭙'), ('᮰', '᮹'), ('᱀', '᱉'),
('᱐', '᱙'), ('꘠', '꘩'), ('꣐', '꣙'), ('꤀', '꤉'),
('꧐', '꧙'), ('꧰', '꧹'), ('꩐', '꩙'), ('꯰', '꯹'),
('𐒠', '𐒩'), ('𐴰', '𐴹'), ('𑁦', '𑁯'), ('𑃰', '𑃹'),
('𑄶', '𑄿'), ('𑇐', '𑇙'), ('𑋰', '𑋹'), ('𑑐', '𑑙'),
('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), ('𑜰', '𑜹'),
('𑣠', '𑣩'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'),
('𖩠', '𖩩'), ('𖭐', '𖭙'), ('𝟎', '𝟿'), ('𞥐', '𞥙'),
];
pub const OLETTER: &'static [(char, char)] = &[
('ƻ', 'ƻ'), ('ǀ', 'ǃ'), ('ʔ', 'ʔ'), ('ʹ', 'ʿ'), ('ˆ', 'ˑ'),
('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('ʹ', 'ʹ'), ('ՙ', 'ՙ'), ('א', 'ת'),
('ׯ', '׳'), ('ؠ', 'ي'), ('ٮ', 'ٯ'), ('ٱ', 'ۓ'), ('ە', 'ە'),
('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', 'ܐ'),
('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), ('ߊ', 'ߪ'), ('ߴ', 'ߵ'),
('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'), ('ࠤ', 'ࠤ'),
('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'),
('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'),
('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'), ('য়', 'ৡ'),
('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'),
('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'),
('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('ੲ', 'ੴ'),
('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'), ('ૐ', 'ૐ'),
('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'),
('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'),
('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'), ('ୱ', 'ୱ'),
('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'), ('అ', 'ఌ'),
('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ఽ'),
('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'),
('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'),
('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'),
('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'), ('ൺ', 'ൿ'),
('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'), ('ල', 'ල'),
('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'ำ'), ('เ', 'ๆ'),
('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ະ'),
('າ', 'ຳ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'),
('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'), ('ཉ', 'ཬ'),
('ྈ', 'ྌ'), ('က', 'ဪ'), ('ဿ', 'ဿ'), ('ၐ', 'ၕ'),
('ၚ', 'ၝ'), ('ၡ', 'ၡ'), ('ၥ', 'ၦ'), ('ၮ', 'ၰ'),
('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('ჼ', 'ჼ'), ('ᄀ', 'ቈ'),
('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'),
('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'),
('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'), ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'),
('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ក', 'ឳ'),
('ៗ', 'ៗ'), ('ៜ', 'ៜ'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢄ'),
('ᢇ', 'ᢨ'), ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'),
('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'),
('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'), ('ᬅ', 'ᬳ'),
('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'),
('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), ('ᳩ', 'ᳬ'),
('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'), ('ℵ', 'ℸ'), ('ↀ', 'ↂ'),
('ↅ', 'ↈ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'),
('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'),
('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'),
('ⸯ', 'ⸯ'), ('々', ''), ('〡', '〩'), ('〱', '〵'),
('〸', '〼'), ('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'),
('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'),
('ㇰ', 'ㇿ'), ('㐀', '䶵'), ('一', '鿯'), ('ꀀ', 'ꒌ'),
('', ''), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'),
('ꙮ', 'ꙮ'), ('ꙿ', 'ꙿ'), ('ꚠ', ''), ('ꜗ', 'ꜟ'),
('ꞈ', 'ꞈ'), ('ꞏ', 'ꞏ'), ('ꟷ', 'ꟷ'), ('ꟻ', 'ꠁ'),
('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'),
('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣾ'),
('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'), ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'),
('ꧏ', 'ꧏ'), ('ꧠ', 'ꧤ'), ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'),
('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'), ('ꩄ', 'ꩋ'), ('ꩠ', 'ꩶ'),
('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'), ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'),
('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'), ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'),
('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'),
('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꯀ', 'ꯢ'),
('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
('並', '龎'), ('יִ', 'יִ'), ('ײַ', 'ﬨ'), ('שׁ', 'זּ'),
('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'),
('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'),
('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('ヲ', 'ン'),
('', 'ᄒ'), ('ᅡ', 'ᅦ'), ('ᅧ', 'ᅬ'), ('ᅭ', 'ᅲ'),
('ᅳ', 'ᅵ'), ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'),
('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'),
('𐅀', '𐅴'), ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'),
('𐌭', '𐍊'), ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎠', '𐏃'),
('𐏈', '𐏏'), ('𐏑', '𐏕'), ('𐑐', '𐒝'), ('𐔀', '𐔧'),
('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'),
('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'),
('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'),
('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨀'), ('𐨐', '𐨓'),
('𐨕', '𐨗'), ('𐨙', '𐨵'), ('𐩠', '𐩼'), ('𐪀', '𐪜'),
('𐫀', '𐫇'), ('𐫉', '𐫤'), ('𐬀', '𐬵'), ('𐭀', '𐭕'),
('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐴀', '𐴣'),
('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), ('𑀃', '𑀷'),
('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'),
('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), ('𑇁', '𑇄'),
('𑇚', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'),
('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'),
('𑊟', '𑊨'), ('𑊰', '𑋞'), ('𑌅', '𑌌'), ('𑌏', '𑌐'),
('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'),
('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), ('𑐀', '𑐴'),
('𑑇', '𑑊'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'),
('𑖀', '𑖮'), ('𑗘', '𑗛'), ('𑘀', '𑘯'), ('𑙄', '𑙄'),
('𑚀', '𑚪'), ('𑜀', '𑜚'), ('𑠀', '𑠫'), ('𑣿', '𑣿'),
('𑨀', '𑨀'), ('𑨋', '𑨲'), ('𑨺', '𑨺'), ('𑩐', '𑩐'),
('𑩜', '𑪃'), ('𑪆', '𑪉'), ('𑪝', '𑪝'), ('𑫀', '𑫸'),
('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), ('𑱲', '𑲏'),
('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴰'), ('𑵆', '𑵆'),
('𑵠', '𑵥'), ('𑵧', '𑵨'), ('𑵪', '𑶉'), ('𑶘', '𑶘'),
('𑻠', '𑻲'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'),
('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𖾓', '𖾟'),
('𖿠', '𖿡'), ('𗀀', '𘟱'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
('𛲐', '𛲙'), ('𞠀', '𞣄'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛖'),
('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'),
('丽', '𪘀'),
];
pub const SCONTINUE: &'static [(char, char)] = &[
(',', '-'), (':', ':'), ('՝', '՝'), ('،', '؍'), ('߸', '߸'),
('᠂', '᠂'), ('᠈', '᠈'), ('', '—'), ('、', '、'),
('︐', '︑'), ('︓', '︓'), ('︱', '︲'), ('﹐', '﹑'),
('﹕', '﹕'), ('', ''), ('﹣', '﹣'), ('', ''),
('', ''), ('、', '、'),
];
pub const STERM: &'static [(char, char)] = &[
('!', '!'), ('?', '?'), ('։', '։'), ('؞', '؟'), ('۔', '۔'),
('܀', '܂'), ('߹', '߹'), ('࠷', '࠷'), ('࠹', '࠹'), ('࠽', '࠾'),
('।', '॥'), ('၊', '။'), ('።', '።'), ('፧', '፨'),
('', ''), ('', '᜶'), ('', ''), ('', ''),
('᥄', '᥅'), ('᪨', '᪫'), ('᭚', '᭛'), ('᭞', '᭟'),
('᰻', '᰼'), ('᱾', '᱿'), ('‼', '‽'), ('⁇', '⁉'),
('⸮', '⸮'), ('⸼', '⸼'), ('。', '。'), ('', ''),
('', '꘏'), ('꛳', '꛳'), ('꛷', '꛷'), ('꡶', '꡷'),
('꣎', '꣏'), ('꤯', '꤯'), ('꧈', '꧉'), ('꩝', '꩟'),
('꫰', '꫱'), ('꯫', '꯫'), ('﹖', '﹗'), ('', ''),
('', ''), ('。', '。'), ('𐩖', '𐩗'), ('𐽕', '𐽙'),
('𑁇', '𑁈'), ('𑂾', '𑃁'), ('𑅁', '𑅃'), ('𑇅', '𑇆'),
('𑇍', '𑇍'), ('𑇞', '𑇟'), ('𑈸', '𑈹'), ('𑈻', '𑈼'),
('𑊩', '𑊩'), ('𑑋', '𑑌'), ('𑗂', '𑗃'), ('𑗉', '𑗗'),
('𑙁', '𑙂'), ('𑜼', '𑜾'), ('𑩂', '𑩃'), ('𑪛', '𑪜'),
('𑱁', '𑱂'), ('𑻷', '𑻸'), ('𖩮', '𖩯'), ('𖫵', '𖫵'),
('𖬷', '𖬸'), ('𖭄', '𖭄'), ('𖺘', '𖺘'), ('𛲟', '𛲟'),
('𝪈', '𝪈'),
];
pub const SEP: &'static [(char, char)] = &[
('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}'),
];
pub const SP: &'static [(char, char)] = &[
('\t', '\t'), ('\u{b}', '\u{c}'), (' ', ' '), ('\u{a0}', '\u{a0}'),
('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
('\u{202f}', '\u{202f}'), ('\u{205f}', '\u{205f}'),
('\u{3000}', '\u{3000}'),
];
pub const UPPER: &'static [(char, char)] = &[
('A', 'Z'), ('À', 'Ö'), ('Ø', 'Þ'), ('Ā', 'Ā'), ('Ă', 'Ă'),
('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'), ('Č', 'Č'),
('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'), ('Ė', 'Ė'),
('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'), ('Ġ', 'Ġ'),
('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'), ('Ī', 'Ī'),
('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('IJ', 'IJ'), ('Ĵ', 'Ĵ'),
('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'), ('Ŀ', 'Ŀ'),
('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'), ('Ŋ', 'Ŋ'),
('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'), ('Ŕ', 'Ŕ'),
('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'), ('Ş', 'Ş'),
('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'), ('Ũ', 'Ũ'),
('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'), ('Ų', 'Ų'),
('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'), ('Ž', 'Ž'),
('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'), ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'),
('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'), ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'),
('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'), ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'),
('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'), ('Ƽ', 'Ƽ'), ('DŽ', 'Dž'),
('LJ', 'Lj'), ('NJ', 'Nj'), ('Ǎ', 'Ǎ'), ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'),
('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'), ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'),
('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'), ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'),
('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'), ('Ǯ', 'Ǯ'), ('DZ', 'Dz'),
('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'), ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'),
('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'), ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'),
('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'), ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'),
('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'), ('Ț', 'Ț'), ('Ȝ', 'Ȝ'),
('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'), ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'),
('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'), ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'),
('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'), ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'),
('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'), ('Ɏ', 'Ɏ'), ('Ͱ', 'Ͱ'),
('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
('Ό', 'Ό'), ('Ύ', 'Ώ'), ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('Ϗ', 'Ϗ'),
('ϒ', 'ϔ'), ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'),
('Ϡ', 'Ϡ'), ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'),
('Ϫ', 'Ϫ'), ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϴ', 'ϴ'), ('Ϸ', 'Ϸ'),
('Ϲ', 'Ϻ'), ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'),
('Ѧ', 'Ѧ'), ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'),
('Ѱ', 'Ѱ'), ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'),
('Ѻ', 'Ѻ'), ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'),
('Ҍ', 'Ҍ'), ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'),
('Җ', 'Җ'), ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'),
('Ҡ', 'Ҡ'), ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'),
('Ҫ', 'Ҫ'), ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'),
('Ҵ', 'Ҵ'), ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'),
('Ҿ', 'Ҿ'), ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'),
('Ӊ', 'Ӊ'), ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'),
('Ӕ', 'Ӕ'), ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'),
('Ӟ', 'Ӟ'), ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'),
('Ө', 'Ө'), ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'),
('Ӳ', 'Ӳ'), ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'),
('Ӽ', 'Ӽ'), ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'),
('Ԇ', 'Ԇ'), ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'),
('Ԑ', 'Ԑ'), ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'),
('Ԛ', 'Ԛ'), ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'),
('Ԥ', 'Ԥ'), ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'),
('Ԯ', 'Ԯ'), ('Ա', 'Ֆ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
('', 'Ᏽ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'),
('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'), ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'),
('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'), ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'),
('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'), ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'),
('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'), ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'),
('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'), ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'),
('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'), ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'),
('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'), ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'),
('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'), ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'),
('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'), ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'),
('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'), ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'),
('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'), ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'),
('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'), ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'),
('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'), ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'),
('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'), ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'),
('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'), ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'),
('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'), ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'),
('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'), ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'),
('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'), ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'),
('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'), ('ẞ', 'ẞ'), ('Ạ', 'Ạ'),
('Ả', 'Ả'), ('Ấ', 'Ấ'), ('Ầ', 'Ầ'), ('Ẩ', 'Ẩ'),
('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'), ('Ắ', 'Ắ'), ('Ằ', 'Ằ'),
('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'), ('Ặ', 'Ặ'), ('Ẹ', 'Ẹ'),
('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'), ('Ế', 'Ế'), ('Ề', 'Ề'),
('Ể', 'Ể'), ('Ễ', 'Ễ'), ('Ệ', 'Ệ'), ('Ỉ', 'Ỉ'),
('Ị', 'Ị'), ('Ọ', 'Ọ'), ('Ỏ', 'Ỏ'), ('Ố', 'Ố'),
('Ồ', 'Ồ'), ('Ổ', 'Ổ'), ('Ỗ', 'Ỗ'), ('Ộ', 'Ộ'),
('Ớ', 'Ớ'), ('Ờ', 'Ờ'), ('Ở', 'Ở'), ('Ỡ', 'Ỡ'),
('Ợ', 'Ợ'), ('Ụ', 'Ụ'), ('Ủ', 'Ủ'), ('Ứ', 'Ứ'),
('Ừ', 'Ừ'), ('Ử', 'Ử'), ('Ữ', 'Ữ'), ('Ự', 'Ự'),
('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'), ('Ỷ', 'Ỷ'), ('Ỹ', 'Ỹ'),
('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'), ('Ỿ', 'Ỿ'), ('Ἀ', 'Ἇ'),
('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'), ('Ἰ', 'Ἷ'), ('Ὀ', 'Ὅ'),
('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'Ὗ'),
('Ὠ', 'Ὧ'), ('ᾈ', 'ᾏ'), ('ᾘ', 'ᾟ'), ('ᾨ', 'ᾯ'),
('Ᾰ', 'ᾼ'), ('Ὲ', 'ῌ'), ('Ῐ', 'Ί'), ('Ῠ', 'Ῥ'),
('Ὸ', 'ῼ'), ('', ''), ('ℇ', 'ℇ'), ('', ''),
('', ''), ('', ''), ('', ''), ('', ''),
('Ω', 'Ω'), ('', ''), ('', ''), ('', ''),
('ℾ', 'ℿ'), ('', ''), ('', ''), ('Ↄ', 'Ↄ'),
('Ⓐ', 'Ⓩ'), ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'),
('Ⱨ', 'Ⱨ'), ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'), ('Ɑ', 'Ɒ'),
('Ⱳ', 'Ⱳ'), ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'), ('Ⲃ', 'Ⲃ'),
('Ⲅ', 'Ⲅ'), ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'), ('Ⲋ', 'Ⲋ'),
('Ⲍ', 'Ⲍ'), ('', ''), ('Ⲑ', 'Ⲑ'), ('', ''),
('', ''), ('Ⲗ', 'Ⲗ'), ('', ''), ('', ''),
('Ⲝ', 'Ⲝ'), ('', ''), ('Ⲡ', 'Ⲡ'), ('', ''),
('', ''), ('', ''), ('', ''), ('Ⲫ', 'Ⲫ'),
('', ''), ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'), ('Ⲳ', 'Ⲳ'),
('Ⲵ', 'Ⲵ'), ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'), ('', ''),
('Ⲽ', 'Ⲽ'), ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'), ('Ⳃ', 'Ⳃ'),
('Ⳅ', 'Ⳅ'), ('', ''), ('Ⳉ', 'Ⳉ'), ('', ''),
('', ''), ('Ⳏ', 'Ⳏ'), ('', ''), ('', ''),
('Ⳕ', 'Ⳕ'), ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'), ('Ⳛ', 'Ⳛ'),
('Ⳝ', 'Ⳝ'), ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'), ('Ⳣ', 'Ⳣ'),
('Ⳬ', 'Ⳬ'), ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'), ('Ꙁ', 'Ꙁ'),
('Ꙃ', 'Ꙃ'), ('', ''), ('Ꙇ', 'Ꙇ'), ('Ꙉ', 'Ꙉ'),
('Ꙋ', 'Ꙋ'), ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'), ('Ꙑ', 'Ꙑ'),
('Ꙓ', 'Ꙓ'), ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'), ('Ꙙ', 'Ꙙ'),
('Ꙛ', 'Ꙛ'), ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'), ('Ꙡ', 'Ꙡ'),
('Ꙣ', 'Ꙣ'), ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'), ('Ꙩ', 'Ꙩ'),
('Ꙫ', 'Ꙫ'), ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'), ('Ꚃ', 'Ꚃ'),
('Ꚅ', 'Ꚅ'), ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'), ('Ꚋ', 'Ꚋ'),
('Ꚍ', 'Ꚍ'), ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'), ('Ꚓ', 'Ꚓ'),
('Ꚕ', 'Ꚕ'), ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'), ('Ꚛ', 'Ꚛ'),
('Ꜣ', 'Ꜣ'), ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'), ('Ꜩ', 'Ꜩ'),
('Ꜫ', 'Ꜫ'), ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'), ('Ꜳ', 'Ꜳ'),
('Ꜵ', 'Ꜵ'), ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'), ('Ꜻ', 'Ꜻ'),
('Ꜽ', 'Ꜽ'), ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'), ('Ꝃ', 'Ꝃ'),
('Ꝅ', 'Ꝅ'), ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'), ('Ꝋ', 'Ꝋ'),
('Ꝍ', 'Ꝍ'), ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'), ('Ꝓ', 'Ꝓ'),
('Ꝕ', 'Ꝕ'), ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'), ('', ''),
('Ꝝ', 'Ꝝ'), ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'), ('Ꝣ', 'Ꝣ'),
('Ꝥ', 'Ꝥ'), ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'), ('', ''),
('Ꝭ', 'Ꝭ'), ('', ''), ('Ꝺ', 'Ꝺ'), ('Ꝼ', 'Ꝼ'),
('Ᵹ', 'Ꝿ'), ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'), ('Ꞅ', 'Ꞅ'),
('Ꞇ', 'Ꞇ'), ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'), ('Ꞑ', 'Ꞑ'),
('Ꞓ', 'Ꞓ'), ('Ꞗ', 'Ꞗ'), ('', ''), ('Ꞛ', 'Ꞛ'),
('Ꞝ', 'Ꞝ'), ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'), ('Ꞣ', 'Ꞣ'),
('Ꞥ', 'Ꞥ'), ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'), ('Ɦ', 'Ɪ'),
('Ʞ', ''), ('Ꞷ', 'Ꞷ'), ('Ꞹ', 'Ꞹ'), ('', ''),
('𐐀', '𐐧'), ('𐒰', '𐓓'), ('𐲀', '𐲲'), ('𑢠', '𑢿'),
('𖹀', '𖹟'), ('𝐀', '𝐙'), ('𝐴', '𝑍'), ('𝑨', '𝒁'),
('𝒜', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
('𝒩', '𝒬'), ('𝒮', '𝒵'), ('𝓐', '𝓩'), ('𝔄', '𝔅'),
('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔸', '𝔹'),
('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
('𝕬', '𝖅'), ('𝖠', '𝖹'), ('𝗔', '𝗭'), ('𝘈', '𝘡'),
('𝘼', '𝙕'), ('𝙰', '𝚉'), ('𝚨', '𝛀'), ('𝛢', '𝛺'),
('𝜜', '𝜴'), ('𝝖', '𝝮'), ('𝞐', '𝞨'), ('𝟊', '𝟊'),
('𞤀', '𞤡'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'),
];
@@ -0,0 +1,351 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate word-break /home/andrew/tmp/ucd-11.0.0/ --chars
//
// ucd-generate is available on crates.io.
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
("ALetter", ALETTER), ("CR", CR), ("Double_Quote", DOUBLE_QUOTE),
("Extend", EXTEND), ("ExtendNumLet", EXTENDNUMLET), ("Format", FORMAT),
("Hebrew_Letter", HEBREW_LETTER), ("Katakana", KATAKANA), ("LF", LF),
("MidLetter", MIDLETTER), ("MidNum", MIDNUM), ("MidNumLet", MIDNUMLET),
("Newline", NEWLINE), ("Numeric", NUMERIC),
("Regional_Indicator", REGIONAL_INDICATOR), ("Single_Quote", SINGLE_QUOTE),
("WSegSpace", WSEGSPACE), ("ZWJ", ZWJ),
];
pub const ALETTER: &'static [(char, char)] = &[
('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
('À', 'Ö'), ('Ø', 'ö'), ('ø', '˗'), ('˞', 'ˤ'), ('ˬ', '˿'),
('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'),
('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'),
('Ҋ', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'), ('՛', '՜'), ('՞', '՞'),
('ՠ', 'ֈ'), ('׳', '׳'), ('ؠ', 'ي'), ('ٮ', 'ٯ'), ('ٱ', 'ۓ'),
('ە', 'ە'), ('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'),
('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), ('ߊ', 'ߪ'),
('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'), ('ࠤ', 'ࠤ'),
('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'),
('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'),
('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'), ('য়', 'ৡ'),
('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'),
('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'),
('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('ੲ', 'ੴ'),
('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'), ('ૐ', 'ૐ'),
('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'),
('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'),
('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'), ('ୱ', 'ୱ'),
('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'), ('అ', 'ఌ'),
('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ఽ'),
('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'),
('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'),
('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'),
('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'), ('ൺ', 'ൿ'),
('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'), ('ල', 'ල'),
('ව', 'ෆ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'), ('ཉ', 'ཬ'),
('ྈ', 'ྌ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
('ა', 'ჺ'), ('ჼ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'),
('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'),
('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'),
('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'),
('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'), ('', 'Ᏽ'), ('ᏸ', 'ᏽ'),
('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'),
('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'), ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'),
('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ᠠ', 'ᡸ'),
('ᢀ', 'ᢄ'), ('ᢇ', 'ᢨ'), ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'),
('ᤀ', 'ᤞ'), ('ᨀ', 'ᨖ'), ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'),
('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'),
('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), ('ᲀ', 'ᲈ'), ('Ა', 'Ჺ'),
('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'),
('ᴀ', 'ᶿ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'),
('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'),
('', ''), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'),
('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'),
('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('', ''),
('ℇ', 'ℇ'), ('', ''), ('', ''), ('', ''),
('', ''), ('Ω', 'Ω'), ('', ''), ('', ''),
('', ''), ('ℼ', 'ℿ'), ('', ''), ('ⅎ', 'ⅎ'),
('', 'ↈ'), ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'),
('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'),
('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'),
('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'),
('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'),
('ⷘ', 'ⷞ'), ('ⸯ', 'ⸯ'), ('々', '々'), ('〻', '〼'),
('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ꀀ', 'ꒌ'),
('', ''), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'),
('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'), ('ꚠ', ''), ('ꜗ', 'ꞹ'),
('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'),
('ꡀ', 'ꡳ'), ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'), ('ꣻ', 'ꣻ'),
('ꣽ', 'ꣾ'), ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'), ('ꥠ', 'ꥼ'),
('ꦄ', 'ꦲ'), ('ꧏ', 'ꧏ'), ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'),
('ꩄ', 'ꩋ'), ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'), ('ꬁ', 'ꬆ'),
('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
('ꬰ', 'ꭥ'), ('ꭰ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'),
('ퟋ', 'ퟻ'), ('ff', 'st'), ('ﬓ', 'ﬗ'), ('ﭐ', 'ﮱ'),
('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'),
('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('', ''), ('', ''),
('', 'ᄒ'), ('ᅡ', 'ᅦ'), ('ᅧ', 'ᅬ'), ('ᅭ', 'ᅲ'),
('ᅳ', 'ᅵ'), ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'),
('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'),
('𐅀', '𐅴'), ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'),
('𐌭', '𐍊'), ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎠', '𐏃'),
('𐏈', '𐏏'), ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒰', '𐓓'),
('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'),
('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'),
('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'),
('𐨀', '𐨀'), ('𐨐', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨵'),
('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'),
('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'),
('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), ('𑀃', '𑀷'),
('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'),
('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), ('𑇁', '𑇄'),
('𑇚', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'),
('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'),
('𑊟', '𑊨'), ('𑊰', '𑋞'), ('𑌅', '𑌌'), ('𑌏', '𑌐'),
('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'),
('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), ('𑐀', '𑐴'),
('𑑇', '𑑊'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'),
('𑖀', '𑖮'), ('𑗘', '𑗛'), ('𑘀', '𑘯'), ('𑙄', '𑙄'),
('𑚀', '𑚪'), ('𑠀', '𑠫'), ('𑢠', '𑣟'), ('𑣿', '𑣿'),
('𑨀', '𑨀'), ('𑨋', '𑨲'), ('𑨺', '𑨺'), ('𑩐', '𑩐'),
('𑩜', '𑪃'), ('𑪆', '𑪉'), ('𑪝', '𑪝'), ('𑫀', '𑫸'),
('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), ('𑱲', '𑲏'),
('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴰'), ('𑵆', '𑵆'),
('𑵠', '𑵥'), ('𑵧', '𑵨'), ('𑵪', '𑶉'), ('𑶘', '𑶘'),
('𑻠', '𑻲'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'),
('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
('𖭽', '𖮏'), ('𖹀', '𖹿'), ('𖼀', '𖽄'), ('𖽐', '𖽐'),
('𖾓', '𖾟'), ('𖿠', '𖿡'), ('𛰀', '𛱪'), ('𛱰', '𛱼'),
('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'),
('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'),
('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'),
('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'),
('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'),
('𞠀', '𞣄'), ('𞤀', '𞥃'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('🄰', '🅉'),
('🅐', '🅩'), ('🅰', '🆉'),
];
pub const CR: &'static [(char, char)] = &[
('\r', '\r'),
];
pub const DOUBLE_QUOTE: &'static [(char, char)] = &[
('\"', '\"'),
];
pub const EXTEND: &'static [(char, char)] = &[
('\u{300}', '\u{36f}'), ('\u{483}', '\u{489}'), ('\u{591}', '\u{5bd}'),
('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
('\u{7fd}', '\u{7fd}'), ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'),
('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'),
('\u{8d3}', '\u{8e1}'), ('\u{8e3}', ''), ('\u{93a}', '\u{93c}'),
('ा', 'ॏ'), ('\u{951}', '\u{957}'), ('\u{962}', '\u{963}'),
('\u{981}', 'ঃ'), ('\u{9bc}', '\u{9bc}'), ('\u{9be}', '\u{9c4}'),
('ে', 'ৈ'), ('ো', '\u{9cd}'), ('\u{9d7}', '\u{9d7}'),
('\u{9e2}', '\u{9e3}'), ('\u{9fe}', '\u{9fe}'), ('\u{a01}', 'ਃ'),
('\u{a3c}', '\u{a3c}'), ('ਾ', '\u{a42}'), ('\u{a47}', '\u{a48}'),
('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('\u{a70}', '\u{a71}'),
('\u{a75}', '\u{a75}'), ('\u{a81}', ''), ('\u{abc}', '\u{abc}'),
('ા', '\u{ac5}'), ('\u{ac7}', 'ૉ'), ('ો', '\u{acd}'),
('\u{ae2}', '\u{ae3}'), ('\u{afa}', '\u{aff}'), ('\u{b01}', ''),
('\u{b3c}', '\u{b3c}'), ('\u{b3e}', '\u{b44}'), ('େ', 'ୈ'),
('ୋ', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b62}', '\u{b63}'),
('\u{b82}', '\u{b82}'), ('\u{bbe}', 'ூ'), ('ெ', 'ை'),
('ொ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'),
('\u{c3e}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'),
('\u{c55}', '\u{c56}'), ('\u{c62}', '\u{c63}'), ('\u{c81}', 'ಃ'),
('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), ('\u{cc6}', 'ೈ'),
('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'),
('\u{d00}', 'ഃ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'),
('െ', 'ൈ'), ('ൊ', '\u{d4d}'), ('\u{d57}', '\u{d57}'),
('\u{d62}', '\u{d63}'), ('', 'ඃ'), ('\u{dca}', '\u{dca}'),
('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'), ('ෘ', '\u{ddf}'),
('ෲ', 'ෳ'), ('\u{e31}', '\u{e31}'), ('\u{e34}', '\u{e3a}'),
('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{eb9}'),
('\u{ebb}', '\u{ebc}'), ('\u{ec8}', '\u{ecd}'), ('\u{f18}', '\u{f19}'),
('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'),
('༾', '༿'), ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f87}'),
('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'),
('ါ', '\u{103e}'), ('ၖ', '\u{1059}'), ('\u{105e}', '\u{1060}'),
('ၢ', 'ၤ'), ('ၧ', 'ၭ'), ('\u{1071}', '\u{1074}'),
('\u{1082}', '\u{108d}'), ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'),
('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'),
('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'),
('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'),
('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'),
('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ᤫ'),
('ᤰ', '\u{193b}'), ('\u{1a17}', '\u{1a1b}'), ('ᩕ', '\u{1a5e}'),
('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'),
('\u{1ab0}', '\u{1abe}'), ('\u{1b00}', 'ᬄ'), ('\u{1b34}', '᭄'),
('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'),
('\u{1be6}', '᯳'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'),
('\u{1cd4}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('ᳲ', '\u{1cf4}'),
('᳷', '\u{1cf9}'), ('\u{1dc0}', '\u{1df9}'), ('\u{1dfb}', '\u{1dff}'),
('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'),
('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'),
('\u{2de0}', '\u{2dff}'), ('\u{302a}', '\u{302f}'),
('\u{3099}', '\u{309a}'), ('\u{a66f}', '\u{a672}'),
('\u{a674}', '\u{a67d}'), ('\u{a69e}', '\u{a69f}'),
('\u{a6f0}', '\u{a6f1}'), ('\u{a802}', '\u{a802}'),
('\u{a806}', '\u{a806}'), ('\u{a80b}', '\u{a80b}'), ('ꠣ', 'ꠧ'),
('ꢀ', 'ꢁ'), ('ꢴ', '\u{a8c5}'), ('\u{a8e0}', '\u{a8f1}'),
('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '꥓'),
('\u{a980}', 'ꦃ'), ('\u{a9b3}', '꧀'), ('\u{a9e5}', '\u{a9e5}'),
('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), ('\u{aa4c}', 'ꩍ'),
('ꩻ', 'ꩽ'), ('\u{aab0}', '\u{aab0}'), ('\u{aab2}', '\u{aab4}'),
('\u{aab7}', '\u{aab8}'), ('\u{aabe}', '\u{aabf}'),
('\u{aac1}', '\u{aac1}'), ('ꫫ', 'ꫯ'), ('ꫵ', '\u{aaf6}'),
('ꯣ', 'ꯪ'), ('꯬', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'),
('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'),
('\u{ff9e}', '\u{ff9f}'), ('\u{101fd}', '\u{101fd}'),
('\u{102e0}', '\u{102e0}'), ('\u{10376}', '\u{1037a}'),
('\u{10a01}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'),
('\u{10a0c}', '\u{10a0f}'), ('\u{10a38}', '\u{10a3a}'),
('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'),
('\u{10d24}', '\u{10d27}'), ('\u{10f46}', '\u{10f50}'), ('𑀀', '𑀂'),
('\u{11038}', '\u{11046}'), ('\u{1107f}', '𑂂'), ('𑂰', '\u{110ba}'),
('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('𑅅', '𑅆'),
('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), ('𑆳', '𑇀'),
('\u{111c9}', '\u{111cc}'), ('𑈬', '\u{11237}'),
('\u{1123e}', '\u{1123e}'), ('\u{112df}', '\u{112ea}'),
('\u{11300}', '𑌃'), ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'),
('𑍇', '𑍈'), ('𑍋', '𑍍'), ('\u{11357}', '\u{11357}'),
('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'),
('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'),
('\u{114b0}', '\u{114c3}'), ('\u{115af}', '\u{115b5}'),
('𑖸', '\u{115c0}'), ('\u{115dc}', '\u{115dd}'), ('𑘰', '\u{11640}'),
('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'),
('𑠬', '\u{1183a}'), ('\u{11a01}', '\u{11a0a}'), ('\u{11a33}', '𑨹'),
('\u{11a3b}', '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'),
('\u{11a51}', '\u{11a5b}'), ('\u{11a8a}', '\u{11a99}'),
('𑰯', '\u{11c36}'), ('\u{11c38}', '\u{11c3f}'),
('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}'),
('\u{11d31}', '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'),
('\u{11d3c}', '\u{11d3d}'), ('\u{11d3f}', '\u{11d45}'),
('\u{11d47}', '\u{11d47}'), ('𑶊', '𑶎'), ('\u{11d90}', '\u{11d91}'),
('𑶓', '\u{11d97}'), ('\u{11ef3}', '𑻶'), ('\u{16af0}', '\u{16af4}'),
('\u{16b30}', '\u{16b36}'), ('𖽑', '𖽾'), ('\u{16f8f}', '\u{16f92}'),
('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d165}', '\u{1d169}'),
('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'),
('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'),
('\u{1d242}', '\u{1d244}'), ('\u{1da00}', '\u{1da36}'),
('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'),
('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'),
('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}', '\u{1e006}'),
('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'),
('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'),
('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'),
('\u{e0020}', '\u{e007f}'), ('\u{e0100}', '\u{e01ef}'),
];
pub const EXTENDNUMLET: &'static [(char, char)] = &[
('_', '_'), ('\u{202f}', '\u{202f}'), ('‿', '⁀'), ('⁔', '⁔'),
('︳', '︴'), ('', ''), ('_', '_'),
];
pub const FORMAT: &'static [(char, char)] = &[
('\u{ad}', '\u{ad}'), ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'),
('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), ('\u{8e2}', '\u{8e2}'),
('\u{180e}', '\u{180e}'), ('\u{200e}', '\u{200f}'),
('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{2064}'),
('\u{2066}', '\u{206f}'), ('\u{feff}', '\u{feff}'),
('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'),
('\u{110cd}', '\u{110cd}'), ('\u{1bca0}', '\u{1bca3}'),
('\u{1d173}', '\u{1d17a}'), ('\u{e0001}', '\u{e0001}'),
];
pub const HEBREW_LETTER: &'static [(char, char)] = &[
('א', 'ת'), ('ׯ', 'ײ'), ('יִ', 'יִ'), ('ײַ', 'ﬨ'), ('שׁ', 'זּ'),
('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'),
('צּ', 'ﭏ'),
];
pub const KATAKANA: &'static [(char, char)] = &[
('〱', '〵'), ('゛', '゜'), ('', 'ヺ'), ('ー', 'ヿ'),
('ㇰ', 'ㇿ'), ('㋐', '㋾'), ('㌀', '㍗'), ('ヲ', 'ン'),
('𛀀', '𛀀'),
];
pub const LF: &'static [(char, char)] = &[
('\n', '\n'),
];
pub const MIDLETTER: &'static [(char, char)] = &[
(':', ':'), ('·', '·'), ('·', '·'), ('״', '״'), ('‧', '‧'),
('︓', '︓'), ('﹕', '﹕'), ('', ''),
];
pub const MIDNUM: &'static [(char, char)] = &[
(',', ','), (';', ';'), (';', ';'), ('։', '։'), ('،', '؍'),
('٬', '٬'), ('߸', '߸'), ('', ''), ('︐', '︐'), ('︔', '︔'),
('﹐', '﹐'), ('﹔', '﹔'), ('', ''), ('', ''),
];
pub const MIDNUMLET: &'static [(char, char)] = &[
('.', '.'), ('', ''), ('', ''), ('﹒', '﹒'), ('', ''),
('', ''),
];
pub const NEWLINE: &'static [(char, char)] = &[
('\u{b}', '\u{c}'), ('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}'),
];
pub const NUMERIC: &'static [(char, char)] = &[
('0', '9'), ('٠', '٩'), ('٫', '٫'), ('۰', '۹'), ('߀', '߉'),
('', '९'), ('', '৯'), ('', '੯'), ('', '૯'),
('', '୯'), ('', '௯'), ('', '౯'), ('', '೯'),
('', '൯'), ('෦', '෯'), ('', '๙'), ('', '໙'),
('༠', '༩'), ('', '၉'), ('႐', '႙'), ('០', '៩'),
('᠐', '᠙'), ('᥆', '᥏'), ('᧐', '᧙'), ('᪀', '᪉'),
('᪐', '᪙'), ('᭐', '᭙'), ('᮰', '᮹'), ('᱀', '᱉'),
('᱐', '᱙'), ('꘠', '꘩'), ('꣐', '꣙'), ('꤀', '꤉'),
('꧐', '꧙'), ('꧰', '꧹'), ('꩐', '꩙'), ('꯰', '꯹'),
('𐒠', '𐒩'), ('𐴰', '𐴹'), ('𑁦', '𑁯'), ('𑃰', '𑃹'),
('𑄶', '𑄿'), ('𑇐', '𑇙'), ('𑋰', '𑋹'), ('𑑐', '𑑙'),
('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), ('𑜰', '𑜹'),
('𑣠', '𑣩'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'),
('𖩠', '𖩩'), ('𖭐', '𖭙'), ('𝟎', '𝟿'), ('𞥐', '𞥙'),
];
pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[
('🇦', '🇿'),
];
pub const SINGLE_QUOTE: &'static [(char, char)] = &[
('\'', '\''),
];
pub const WSEGSPACE: &'static [(char, char)] = &[
(' ', ' '), ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{2006}'),
('\u{2008}', '\u{200a}'), ('\u{205f}', '\u{205f}'),
('\u{3000}', '\u{3000}'),
];
pub const ZWJ: &'static [(char, char)] = &[
('\u{200d}', '\u{200d}'),
];
+37
View File
@@ -108,3 +108,40 @@ mat!(uni_class_gencat_unassigned,
r"\p{Unassigned}", "\u{10FFFF}", Some((0, 4)));
mat!(uni_class_gencat_uppercase_letter,
r"\p{Uppercase_Letter}", "", Some((0, 3)));
// Test a smattering of properties.
mat!(uni_class_prop_emoji1, r"\p{Emoji}", "\u{23E9}", Some((0, 3)));
mat!(uni_class_prop_emoji2, r"\p{emoji}", "\u{1F21A}", Some((0, 4)));
mat!(uni_class_prop_picto1,
r"\p{extendedpictographic}", "\u{1FA6E}", Some((0, 4)));
mat!(uni_class_prop_picto2,
r"\p{extendedpictographic}", "\u{1FFFD}", Some((0, 4)));
// grapheme_cluster_break
mat!(uni_class_gcb_prepend,
r"\p{grapheme_cluster_break=prepend}", "\u{11D46}", Some((0, 4)));
mat!(uni_class_gcb_ri1,
r"\p{gcb=regional_indicator}", "\u{1F1E6}", Some((0, 4)));
mat!(uni_class_gcb_ri2,
r"\p{gcb=ri}", "\u{1F1E7}", Some((0, 4)));
mat!(uni_class_gcb_ri3,
r"\p{gcb=regionalindicator}", "\u{1F1FF}", Some((0, 4)));
mat!(uni_class_gcb_lvt,
r"\p{gcb=lvt}", "\u{C989}", Some((0, 3)));
mat!(uni_class_gcb_zwj,
r"\p{gcb=zwj}", "\u{200D}", Some((0, 3)));
// word_break
mat!(uni_class_wb1,
r"\p{word_break=Hebrew_Letter}", "\u{FB46}", Some((0, 3)));
mat!(uni_class_wb2, r"\p{wb=hebrewletter}", "\u{FB46}", Some((0, 3)));
mat!(uni_class_wb3, r"\p{wb=ExtendNumLet}", "\u{FF3F}", Some((0, 3)));
mat!(uni_class_wb4, r"\p{wb=WSegSpace}", "\u{3000}", Some((0, 3)));
mat!(uni_class_wb5, r"\p{wb=numeric}", "\u{1E950}", Some((0, 4)));
// sentence_break
mat!(uni_class_sb1, r"\p{sentence_break=Lower}", "\u{0469}", Some((0, 2)));
mat!(uni_class_sb2, r"\p{sb=lower}", "\u{0469}", Some((0, 2)));
mat!(uni_class_sb3, r"\p{sb=Close}", "\u{FF60}", Some((0, 3)));
mat!(uni_class_sb4, r"\p{sb=Close}", "\u{1F677}", Some((0, 4)));
mat!(uni_class_sb5, r"\p{sb=SContinue}", "\u{FF64}", Some((0, 3)));