mirror of
https://github.com/openharmony/third_party_rust_unicode-bidi.git
synced 2026-06-30 21:27:57 -04:00
Handle canonical equivalence
This commit is contained in:
@@ -42,14 +42,14 @@ pub fn bidi_class(c: char) -> BidiClass {
|
||||
bsearch_range_value_table(c, bidi_class_table)
|
||||
}
|
||||
|
||||
/// If this character is an opening bracket according to BidiBrackets.txt,
|
||||
/// return its corresponding closing bracket.
|
||||
pub(crate) fn bidi_matched_bracket(c: char) -> Option<(char, bool)> {
|
||||
/// If this character is a bracket according to BidiBrackets.txt,
|
||||
/// return the corresponding *normalized* *opening bracket* of the pair,
|
||||
/// and whether or not it itself is an opening bracket.
|
||||
pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<(char, bool)> {
|
||||
for pair in self::tables::bidi_pairs_table {
|
||||
if pair.0 == c {
|
||||
return Some((pair.1, true));
|
||||
} else if pair.1 == c {
|
||||
return Some((pair.0, false));
|
||||
if pair.0 == c || pair.1 == c {
|
||||
let skeleton = pair.2.unwrap_or(pair.0);
|
||||
return Some((skeleton, pair.0 == c));
|
||||
}
|
||||
}
|
||||
None
|
||||
|
||||
+23
-20
@@ -508,25 +508,28 @@ pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[
|
||||
('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L)
|
||||
];
|
||||
|
||||
pub const bidi_pairs_table: &'static [(char, char)] = &[
|
||||
('\u{28}', '\u{29}'), ('\u{5b}', '\u{5d}'), ('\u{7b}', '\u{7d}'), ('\u{f3a}', '\u{f3b}'),
|
||||
('\u{f3c}', '\u{f3d}'), ('\u{169b}', '\u{169c}'), ('\u{2045}', '\u{2046}'), ('\u{207d}',
|
||||
'\u{207e}'), ('\u{208d}', '\u{208e}'), ('\u{2308}', '\u{2309}'), ('\u{230a}', '\u{230b}'),
|
||||
('\u{2329}', '\u{232a}'), ('\u{2768}', '\u{2769}'), ('\u{276a}', '\u{276b}'), ('\u{276c}',
|
||||
'\u{276d}'), ('\u{276e}', '\u{276f}'), ('\u{2770}', '\u{2771}'), ('\u{2772}', '\u{2773}'),
|
||||
('\u{2774}', '\u{2775}'), ('\u{27c5}', '\u{27c6}'), ('\u{27e6}', '\u{27e7}'), ('\u{27e8}',
|
||||
'\u{27e9}'), ('\u{27ea}', '\u{27eb}'), ('\u{27ec}', '\u{27ed}'), ('\u{27ee}', '\u{27ef}'),
|
||||
('\u{2983}', '\u{2984}'), ('\u{2985}', '\u{2986}'), ('\u{2987}', '\u{2988}'), ('\u{2989}',
|
||||
'\u{298a}'), ('\u{298b}', '\u{298c}'), ('\u{298d}', '\u{2990}'), ('\u{298f}', '\u{298e}'),
|
||||
('\u{2991}', '\u{2992}'), ('\u{2993}', '\u{2994}'), ('\u{2995}', '\u{2996}'), ('\u{2997}',
|
||||
'\u{2998}'), ('\u{29d8}', '\u{29d9}'), ('\u{29da}', '\u{29db}'), ('\u{29fc}', '\u{29fd}'),
|
||||
('\u{2e22}', '\u{2e23}'), ('\u{2e24}', '\u{2e25}'), ('\u{2e26}', '\u{2e27}'), ('\u{2e28}',
|
||||
'\u{2e29}'), ('\u{2e55}', '\u{2e56}'), ('\u{2e57}', '\u{2e58}'), ('\u{2e59}', '\u{2e5a}'),
|
||||
('\u{2e5b}', '\u{2e5c}'), ('\u{3008}', '\u{3009}'), ('\u{300a}', '\u{300b}'), ('\u{300c}',
|
||||
'\u{300d}'), ('\u{300e}', '\u{300f}'), ('\u{3010}', '\u{3011}'), ('\u{3014}', '\u{3015}'),
|
||||
('\u{3016}', '\u{3017}'), ('\u{3018}', '\u{3019}'), ('\u{301a}', '\u{301b}'), ('\u{fe59}',
|
||||
'\u{fe5a}'), ('\u{fe5b}', '\u{fe5c}'), ('\u{fe5d}', '\u{fe5e}'), ('\u{ff08}', '\u{ff09}'),
|
||||
('\u{ff3b}', '\u{ff3d}'), ('\u{ff5b}', '\u{ff5d}'), ('\u{ff5f}', '\u{ff60}'), ('\u{ff62}',
|
||||
'\u{ff63}')
|
||||
pub const bidi_pairs_table: &'static [(char, char, Option<char>)] = &[
|
||||
('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}',
|
||||
'\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}',
|
||||
'\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}',
|
||||
'\u{2309}', None), ('\u{230a}', '\u{230b}', None), ('\u{2329}', '\u{232a}', Some('\u{3008}')),
|
||||
('\u{2768}', '\u{2769}', None), ('\u{276a}', '\u{276b}', None), ('\u{276c}', '\u{276d}', None),
|
||||
('\u{276e}', '\u{276f}', None), ('\u{2770}', '\u{2771}', None), ('\u{2772}', '\u{2773}', None),
|
||||
('\u{2774}', '\u{2775}', None), ('\u{27c5}', '\u{27c6}', None), ('\u{27e6}', '\u{27e7}', None),
|
||||
('\u{27e8}', '\u{27e9}', None), ('\u{27ea}', '\u{27eb}', None), ('\u{27ec}', '\u{27ed}', None),
|
||||
('\u{27ee}', '\u{27ef}', None), ('\u{2983}', '\u{2984}', None), ('\u{2985}', '\u{2986}', None),
|
||||
('\u{2987}', '\u{2988}', None), ('\u{2989}', '\u{298a}', None), ('\u{298b}', '\u{298c}', None),
|
||||
('\u{298d}', '\u{2990}', None), ('\u{298f}', '\u{298e}', None), ('\u{2991}', '\u{2992}', None),
|
||||
('\u{2993}', '\u{2994}', None), ('\u{2995}', '\u{2996}', None), ('\u{2997}', '\u{2998}', None),
|
||||
('\u{29d8}', '\u{29d9}', None), ('\u{29da}', '\u{29db}', None), ('\u{29fc}', '\u{29fd}', None),
|
||||
('\u{2e22}', '\u{2e23}', None), ('\u{2e24}', '\u{2e25}', None), ('\u{2e26}', '\u{2e27}', None),
|
||||
('\u{2e28}', '\u{2e29}', None), ('\u{2e55}', '\u{2e56}', None), ('\u{2e57}', '\u{2e58}', None),
|
||||
('\u{2e59}', '\u{2e5a}', None), ('\u{2e5b}', '\u{2e5c}', None), ('\u{3008}', '\u{3009}', None),
|
||||
('\u{300a}', '\u{300b}', None), ('\u{300c}', '\u{300d}', None), ('\u{300e}', '\u{300f}', None),
|
||||
('\u{3010}', '\u{3011}', None), ('\u{3014}', '\u{3015}', None), ('\u{3016}', '\u{3017}', None),
|
||||
('\u{3018}', '\u{3019}', None), ('\u{301a}', '\u{301b}', None), ('\u{fe59}', '\u{fe5a}', None),
|
||||
('\u{fe5b}', '\u{fe5c}', None), ('\u{fe5d}', '\u{fe5e}', None), ('\u{ff08}', '\u{ff09}', None),
|
||||
('\u{ff3b}', '\u{ff3d}', None), ('\u{ff5b}', '\u{ff5d}', None), ('\u{ff5f}', '\u{ff60}', None),
|
||||
('\u{ff62}', '\u{ff63}', None)
|
||||
];
|
||||
|
||||
|
||||
+7
-4
@@ -14,15 +14,18 @@ use crate::BidiClass;
|
||||
pub trait BidiDataSource {
|
||||
fn bidi_class(&self, c: char) -> BidiClass;
|
||||
/// If this character is a bracket according to BidiBrackets.txt,
|
||||
/// return its corresponding matched bracket, and whether or not it is an
|
||||
/// opening bracket
|
||||
/// return the corresponding *normalized* *opening bracket* of the pair,
|
||||
/// and whether or not it itself is an opening bracket.
|
||||
///
|
||||
/// This effectively buckets brackets into equivalence classes keyed on the
|
||||
/// normalized opening bracket.
|
||||
///
|
||||
/// The default implementation will pull in a small amount of hardcoded data,
|
||||
/// regardless of the `hardcoded-data` feature. This is in part for convenience
|
||||
/// (since this data is small and changes less often), and in part so that this method can be
|
||||
/// added without needing a breaking version bump.
|
||||
/// Override this method in your custom data source to prevent the use of hardcoded data.
|
||||
fn bidi_matched_bracket(&self, c: char) -> Option<(char, bool)> {
|
||||
crate::char_data::bidi_matched_bracket(c)
|
||||
fn bidi_matched_opening_bracket(&self, c: char) -> Option<(char, bool)> {
|
||||
crate::char_data::bidi_matched_opening_bracket(c)
|
||||
}
|
||||
}
|
||||
|
||||
+3
-3
@@ -371,7 +371,7 @@ fn identify_bracket_pairs<D: BidiDataSource>(
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some((matched, is_open)) = data_source.bidi_matched_bracket(ch) {
|
||||
if let Some((opening, is_open)) = data_source.bidi_matched_opening_bracket(ch) {
|
||||
if is_open {
|
||||
// If an opening paired bracket is found ...
|
||||
|
||||
@@ -381,7 +381,7 @@ fn identify_bracket_pairs<D: BidiDataSource>(
|
||||
break;
|
||||
}
|
||||
// ... push its Bidi_Paired_Bracket property value and its text position onto the stack
|
||||
stack.push((matched, i))
|
||||
stack.push((opening, i))
|
||||
} else {
|
||||
// If a closing paired bracket is found, do the following
|
||||
|
||||
@@ -392,7 +392,7 @@ fn identify_bracket_pairs<D: BidiDataSource>(
|
||||
for (stack_index, element) in stack.iter().enumerate().rev() {
|
||||
// Compare the closing paired bracket being inspected or its canonical
|
||||
// equivalent to the bracket in the current stack element.
|
||||
if element.0 == ch {
|
||||
if element.0 == opening {
|
||||
// If the values match, meaning the two characters form a bracket pair, then
|
||||
|
||||
// Append the text position in the current stack element together with the
|
||||
|
||||
@@ -138,7 +138,7 @@ fn gen_base_levels_for_base_tests(bitset: u8) -> Vec<Option<Level>> {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "69 test cases failed! (91638 passed)")]
|
||||
#[should_panic(expected = "65 test cases failed! (91642 passed)")]
|
||||
fn test_character_conformance() {
|
||||
let test_data = include_str!("data/BidiCharacterTest.txt");
|
||||
|
||||
|
||||
+18
-7
@@ -56,7 +56,7 @@ def open_data(name):
|
||||
def is_surrogate(n):
|
||||
return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
|
||||
|
||||
def load_bidi_pairs():
|
||||
def load_bidi_pairs(on_decomps):
|
||||
fetch_data(BIDI_BRACKETS_NAME)
|
||||
arr = []
|
||||
for line in fileinput.input(os.path.join(DATA_DIR, BIDI_BRACKETS_NAME)):
|
||||
@@ -69,12 +69,21 @@ def load_bidi_pairs():
|
||||
continue
|
||||
cp1 = int(data[0], 16);
|
||||
cp2 = int(data[1], 16);
|
||||
arr += [(cp1, cp2)]
|
||||
decomp = None
|
||||
if cp1 in on_decomps:
|
||||
decomp = int(on_decomps[cp1], 16)
|
||||
arr += [(cp1, cp2, decomp)]
|
||||
return arr
|
||||
|
||||
# Returns (group_categories, on_decomps),
|
||||
# where on_decomps is a map containing canonical equivalents for
|
||||
# ON characters only, and group_categories is the result of group_categories()
|
||||
# on bidi properties
|
||||
def load_unicode_data():
|
||||
fetch_data(UNICODE_DATA_NAME)
|
||||
udict = {};
|
||||
# Decompositions of all ON characters that have them
|
||||
on_decomps = {}
|
||||
|
||||
range_start = -1;
|
||||
for line in fileinput.input(os.path.join(DATA_DIR, UNICODE_DATA_NAME)):
|
||||
@@ -103,6 +112,8 @@ def load_unicode_data():
|
||||
|
||||
if bidi not in bidi_class:
|
||||
bidi_class[bidi] = []
|
||||
if len(decomp) != 0 and " " not in decomp:
|
||||
on_decomps[code] = decomp
|
||||
bidi_class[bidi].append(code)
|
||||
|
||||
# Default Bidi_Class for unassigned codepoints.
|
||||
@@ -124,7 +135,7 @@ def load_unicode_data():
|
||||
if not code in udict:
|
||||
bidi_class[default].append(code)
|
||||
|
||||
return group_categories(bidi_class)
|
||||
return (group_categories(bidi_class), on_decomps)
|
||||
|
||||
def group_categories(cats):
|
||||
cats_out = []
|
||||
@@ -223,8 +234,8 @@ use self::BidiClass::*;
|
||||
file_,
|
||||
"bidi_pairs_table",
|
||||
bidi_pairs_table,
|
||||
"&'static [(char, char)]",
|
||||
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])),
|
||||
"&'static [(char, char, Option<char>)]",
|
||||
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), "Some(%s)" % escape_char(x[2]) if x[2] else "None"),
|
||||
)
|
||||
|
||||
def get_unicode_version():
|
||||
@@ -249,8 +260,8 @@ if __name__ == "__main__":
|
||||
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
||||
""" % unicode_version)
|
||||
|
||||
(bidi_categories, bidi_class_table) = load_unicode_data()
|
||||
bidi_pairs_table = load_bidi_pairs()
|
||||
((bidi_categories, bidi_class_table), on_decomps) = load_unicode_data()
|
||||
bidi_pairs_table = load_bidi_pairs(on_decomps)
|
||||
emit_bidi_module(file_, bidi_class_table, bidi_categories, bidi_pairs_table)
|
||||
|
||||
# Fetch test data files
|
||||
|
||||
Reference in New Issue
Block a user