mirror of
https://gitee.com/openharmony/third_party_rust_unicode-ident
synced 2024-11-22 23:19:43 +00:00
Replace ucd-generate dependency with ucd-parse
This commit is contained in:
parent
c384237743
commit
834e16aba1
2
.gitattributes
vendored
2
.gitattributes
vendored
@ -1,5 +1,5 @@
|
||||
generate/src/ucd.rs linguist-generated
|
||||
src/tables.rs linguist-generated
|
||||
tests/fst/xid_continue.fst linguist-generated
|
||||
tests/fst/xid_start.fst linguist-generated
|
||||
tests/tables/tables.rs linguist-generated
|
||||
tests/trie/trie.rs linguist-generated
|
||||
|
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@ -20,15 +20,15 @@ jobs:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
- id: ucd-generate
|
||||
run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' generate/src/ucd.rs --only-matching)" >> $GITHUB_OUTPUT
|
||||
run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' tests/tables/tables.rs --only-matching)" >> $GITHUB_OUTPUT
|
||||
- run: cargo install ucd-generate
|
||||
- run: curl -LO https://www.unicode.org/Public/zipped/latest/UCD.zip
|
||||
- run: unzip UCD.zip -d UCD
|
||||
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs
|
||||
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/tables/tables.rs
|
||||
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst
|
||||
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs
|
||||
- run: cargo run --manifest-path generate/Cargo.toml
|
||||
- run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' generate/src/ucd.rs tests/trie/trie.rs
|
||||
- run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' tests/tables/tables.rs tests/trie/trie.rs
|
||||
- run: git diff --exit-code
|
||||
|
||||
test:
|
||||
|
@ -4,3 +4,7 @@ version = "0.0.0"
|
||||
authors = ["David Tolnay <dtolnay@gmail.com>"]
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
ucd-parse = "0.1.10"
|
||||
|
@ -3,7 +3,7 @@
|
||||
// $ cargo install ucd-generate
|
||||
// $ curl -LO https://www.unicode.org/Public/zipped/15.0.0/UCD.zip
|
||||
// $ unzip UCD.zip -d UCD
|
||||
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs
|
||||
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/table/tables.rs
|
||||
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst
|
||||
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs
|
||||
// $ cargo run --manifest-path generate/Cargo.toml
|
||||
@ -12,50 +12,43 @@
|
||||
clippy::cast_lossless,
|
||||
clippy::cast_possible_truncation, // https://github.com/rust-lang/rust-clippy/issues/9613
|
||||
clippy::match_wild_err_arm,
|
||||
clippy::module_name_repetitions,
|
||||
clippy::too_many_lines,
|
||||
clippy::uninlined_format_args
|
||||
)]
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[allow(dead_code, clippy::all, clippy::pedantic)]
|
||||
mod ucd;
|
||||
|
||||
mod output;
|
||||
mod parse;
|
||||
mod write;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use crate::parse::parse_xid_properties;
|
||||
use anyhow::Result;
|
||||
use std::collections::{BTreeMap as Map, VecDeque};
|
||||
use std::convert::TryFrom;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
|
||||
const CHUNK: usize = 64;
|
||||
const PATH: &str = "../src/tables.rs";
|
||||
const UCD: &str = "UCD";
|
||||
const TABLES: &str = "src/tables.rs";
|
||||
|
||||
fn is_xid_start(ch: char) -> bool {
|
||||
search(ch, ucd::XID_START)
|
||||
}
|
||||
fn main() -> Result<()> {
|
||||
let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
|
||||
let unicode_ident_dir = manifest_dir.parent().unwrap();
|
||||
let ucd_dir = unicode_ident_dir.join(UCD);
|
||||
if !ucd_dir.exists() {
|
||||
writeln!(
|
||||
io::stderr(),
|
||||
"Not found: {}\nDownload from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip.",
|
||||
ucd_dir.display(),
|
||||
)?;
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
fn is_xid_continue(ch: char) -> bool {
|
||||
search(ch, ucd::XID_CONTINUE)
|
||||
}
|
||||
let properties = parse_xid_properties(&ucd_dir)?;
|
||||
|
||||
fn search(ch: char, table: &[(u32, u32)]) -> bool {
|
||||
table
|
||||
.binary_search_by(|&(lo, hi)| {
|
||||
if lo > ch as u32 {
|
||||
Ordering::Greater
|
||||
} else if hi < ch as u32 {
|
||||
Ordering::Less
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
fn main() -> io::Result<()> {
|
||||
let mut chunkmap = Map::<[u8; CHUNK], u8>::new();
|
||||
let mut dense = Vec::<[u8; CHUNK]>::new();
|
||||
let mut new_chunk = |chunk| {
|
||||
@ -87,8 +80,8 @@ fn main() -> io::Result<()> {
|
||||
let code = (i * CHUNK as u32 + j) * 8 + k;
|
||||
if code >= 0x80 {
|
||||
if let Some(ch) = char::from_u32(code) {
|
||||
*this_start |= (is_xid_start(ch) as u8) << k;
|
||||
*this_continue |= (is_xid_continue(ch) as u8) << k;
|
||||
*this_start |= (properties.is_xid_start(ch) as u8) << k;
|
||||
*this_continue |= (properties.is_xid_continue(ch) as u8) << k;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -163,7 +156,8 @@ fn main() -> io::Result<()> {
|
||||
*index = dense_to_halfdense[index];
|
||||
}
|
||||
|
||||
let out = write::output(&index_start, &index_continue, &halfdense);
|
||||
let path = Path::new(env!("CARGO_MANIFEST_DIR")).join(PATH);
|
||||
fs::write(path, out)
|
||||
let out = write::output(&properties, &index_start, &index_continue, &halfdense);
|
||||
let path = unicode_ident_dir.join(TABLES);
|
||||
fs::write(path, out)?;
|
||||
Ok(())
|
||||
}
|
||||
|
40
generate/src/parse.rs
Normal file
40
generate/src/parse.rs
Normal file
@ -0,0 +1,40 @@
|
||||
use anyhow::Result;
|
||||
use std::collections::BTreeSet as Set;
|
||||
use std::path::Path;
|
||||
use ucd_parse::CoreProperty;
|
||||
|
||||
pub struct Properties {
|
||||
xid_start: Set<u32>,
|
||||
xid_continue: Set<u32>,
|
||||
}
|
||||
|
||||
impl Properties {
|
||||
pub fn is_xid_start(&self, ch: char) -> bool {
|
||||
self.xid_start.contains(&(ch as u32))
|
||||
}
|
||||
|
||||
pub fn is_xid_continue(&self, ch: char) -> bool {
|
||||
self.xid_continue.contains(&(ch as u32))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_xid_properties(ucd_dir: &Path) -> Result<Properties> {
|
||||
let mut properties = Properties {
|
||||
xid_start: Set::new(),
|
||||
xid_continue: Set::new(),
|
||||
};
|
||||
|
||||
let prop_list: Vec<CoreProperty> = ucd_parse::parse(ucd_dir)?;
|
||||
for core in prop_list {
|
||||
let set = match core.property.as_str() {
|
||||
"XID_Start" => &mut properties.xid_start,
|
||||
"XID_Continue" => &mut properties.xid_continue,
|
||||
_ => continue,
|
||||
};
|
||||
for codepoint in core.codepoints {
|
||||
set.insert(codepoint.value());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(properties)
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
use crate::output::Output;
|
||||
use crate::{is_xid_continue, is_xid_start, CHUNK};
|
||||
use crate::parse::Properties;
|
||||
use crate::CHUNK;
|
||||
|
||||
const HEAD: &str = "\
|
||||
// \x40generated by ../generate. To regenerate, run the following in the repo root:
|
||||
@ -17,7 +18,12 @@ pub(crate) struct Align8<T>(pub(crate) T);
|
||||
pub(crate) struct Align64<T>(pub(crate) T);
|
||||
";
|
||||
|
||||
pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Output {
|
||||
pub fn output(
|
||||
properties: &Properties,
|
||||
index_start: &[u8],
|
||||
index_continue: &[u8],
|
||||
halfdense: &[u8],
|
||||
) -> Output {
|
||||
let mut out = Output::new();
|
||||
writeln!(out, "{}", HEAD);
|
||||
|
||||
@ -29,7 +35,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou
|
||||
write!(out, " ");
|
||||
for j in 0..32 {
|
||||
let ch = (i * 32 + j) as char;
|
||||
write!(out, " {},", if is_xid_start(ch) { 'T' } else { 'F' });
|
||||
let is_xid_start = properties.is_xid_start(ch);
|
||||
write!(out, " {},", if is_xid_start { 'T' } else { 'F' });
|
||||
}
|
||||
writeln!(out);
|
||||
}
|
||||
@ -44,7 +51,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou
|
||||
write!(out, " ");
|
||||
for j in 0..32 {
|
||||
let ch = (i * 32 + j) as char;
|
||||
write!(out, " {},", if is_xid_continue(ch) { 'T' } else { 'F' });
|
||||
let is_xid_continue = properties.is_xid_continue(ch);
|
||||
write!(out, " {},", if is_xid_continue { 'T' } else { 'F' });
|
||||
}
|
||||
writeln!(out);
|
||||
}
|
||||
|
@ -19,14 +19,13 @@ fn test_size() {
|
||||
#[test]
|
||||
fn test_xid_size() {
|
||||
#[deny(dead_code)]
|
||||
#[allow(clippy::redundant_static_lifetimes)]
|
||||
#[path = "../generate/src/ucd.rs"]
|
||||
mod ucd;
|
||||
#[path = "tables/mod.rs"]
|
||||
mod tables;
|
||||
|
||||
let size = size_of_val(ucd::XID_START) + size_of_val(ucd::XID_CONTINUE);
|
||||
let size = size_of_val(tables::XID_START) + size_of_val(tables::XID_CONTINUE);
|
||||
assert_eq!(11528, size);
|
||||
|
||||
let _ = ucd::BY_NAME;
|
||||
let _ = tables::BY_NAME;
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
|
7
tests/tables/mod.rs
Normal file
7
tests/tables/mod.rs
Normal file
@ -0,0 +1,7 @@
|
||||
#![allow(clippy::module_inception)]
|
||||
|
||||
#[allow(clippy::redundant_static_lifetimes)]
|
||||
#[rustfmt::skip]
|
||||
mod tables;
|
||||
|
||||
pub(crate) use self::tables::*;
|
Loading…
Reference in New Issue
Block a user