Rewrite printable.py codegen to emit C++

This commit is contained in:
Victor Zverovich 2021-08-22 09:10:10 -07:00
parent 6cf90d7cee
commit 7df2c82a8a

View File

@ -1,8 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# This script is based on
# https://github.com/rust-lang/rust/blob/master/library/core/src/unicode/printable.py
# distributed under https://github.com/rust-lang/rust/blob/master/LICENSE-MIT.
# This script uses the following Unicode tables: # This script uses the following Unicode tables:
# - UnicodeData.txt # - UnicodeData.txt
# Script license: https://github.com/rust-lang/rust/blob/master/LICENSE-MIT
from collections import namedtuple from collections import namedtuple
@ -112,23 +115,20 @@ def compress_normal(normal):
return compressed return compressed
def print_singletons(uppers, lowers, uppersname, lowersname): def print_singletons(uppers, lowers, uppersname, lowersname):
print("#[rustfmt::skip]") print(" static constexpr singleton {}[] = {{".format(uppersname))
print("const {}: &[(u8, u8)] = &[".format(uppersname))
for u, c in uppers: for u, c in uppers:
print(" ({:#04x}, {}),".format(u, c)) print(" {{{:#04x}, {}}},".format(u, c))
print("];") print(" };")
print("#[rustfmt::skip]") print(" static constexpr unsigned char {}[] = {{".format(lowersname))
print("const {}: &[u8] = &[".format(lowersname))
for i in range(0, len(lowers), 8): for i in range(0, len(lowers), 8):
print(" {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8]))) print(" {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8])))
print("];") print(" };")
def print_normal(normal, normalname): def print_normal(normal, normalname):
print("#[rustfmt::skip]") print(" static constexpr unsigned char {}[] = {{".format(normalname))
print("const {}: &[u8] = &[".format(normalname))
for v in normal: for v in normal:
print(" {}".format(" ".join("{:#04x},".format(i) for i in v))) print(" {}".format(" ".join("{:#04x},".format(i) for i in v)))
print("];") print(" };")
def main(): def main():
file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt") file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
@ -171,67 +171,66 @@ def main():
normal1 = compress_normal(normal1) normal1 = compress_normal(normal1)
print("""\ print("""\
// NOTE: The following code was generated by "src/libcore/unicode/printable.py", struct singleton {
// do not edit directly! unsigned char upper;
unsigned char lowercount;
};
fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool { inline auto check(uint16_t x, const singleton* singletonuppers,
let xupper = (x >> 8) as u8; size_t singletonuppers_size,
let mut lowerstart = 0; const unsigned char* singletonlowers,
for &(upper, lowercount) in singletonuppers { const unsigned char* normal, size_t normal_size) -> bool {
let lowerend = lowerstart + lowercount as usize; auto xupper = x >> 8;
if xupper == upper { auto lowerstart = 0;
for &lower in &singletonlowers[lowerstart..lowerend] { for (size_t i = 0; i < singletonuppers_size; ++i) {
if lower == x as u8 { auto su = singletonuppers[i];
return false; auto lowerend = lowerstart + su.lowercount;
} if (xupper < su.upper) break;
} if (xupper == su.upper) {
} else if xupper < upper { for (auto j = lowerstart; j < lowerend; ++j) {
break; if (singletonlowers[j] == x) return false;
} }
lowerstart = lowerend;
} }
lowerstart = lowerend;
}
let mut x = x as i32; auto xsigned = static_cast<int>(x);
let mut normal = normal.iter().cloned(); auto current = true;
let mut current = true; for (size_t i = 0; i < normal_size; ++i) {
while let Some(v) = normal.next() { auto v = static_cast<int>(normal[i]);
let len = if v & 0x80 != 0 { auto len = v & 0x80 != 0 ? (v & 0x7f) << 8 | normal[i++] : v;
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32 xsigned -= len;
} else { if (xsigned < 0) break;
v as i32 current = !current;
}; }
x -= len; return current;
if x < 0 {
break;
}
current = !current;
}
current
} }
pub(crate) fn is_printable(x: char) -> bool { inline auto is_printable(uint32_t cp) -> bool {\
let x = x as u32; """)
let lower = x as u16; print_singletons(singletons0u, singletons0l, 'singletons0u', 'singletons0l')
if x < 0x10000 { print_singletons(singletons1u, singletons1l, 'singletons1u', 'singletons1l')
check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0) print_normal(normal0, 'normal0')
} else if x < 0x20000 { print_normal(normal1, 'normal1')
check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1) print("""\
} else {\ auto lower = static_cast<uint16_t>(cp);
if (cp < 0x10000) {
return check(lower, singletons0u,
sizeof(singletons0u) / sizeof(*singletons0u), singletons0l,
normal0, sizeof(normal0));
}
if (cp < 0x20000) {
return check(lower, singletons1u,
sizeof(singletons1u) / sizeof(*singletons1u), singletons1l,
normal1, sizeof(normal1));
}\
""") """)
for a, b in extra: for a, b in extra:
print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b)) print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b))
print(" return false;")
print(" }")
print("""\ print("""\
true return true;
}
}\ }\
""") """)
print()
print_singletons(singletons0u, singletons0l, 'SINGLETONS0U', 'SINGLETONS0L')
print_singletons(singletons1u, singletons1l, 'SINGLETONS1U', 'SINGLETONS1L')
print_normal(normal0, 'NORMAL0')
print_normal(normal1, 'NORMAL1')
if __name__ == '__main__': if __name__ == '__main__':
main() main()