diff --git a/.gitattributes b/.gitattributes index 696f4e6..735b3ee 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,5 @@ -generate/src/ucd.rs linguist-generated src/tables.rs linguist-generated tests/fst/xid_continue.fst linguist-generated tests/fst/xid_start.fst linguist-generated +tests/tables/tables.rs linguist-generated tests/trie/trie.rs linguist-generated diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55dc860..9b2507c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,15 +20,15 @@ jobs: - uses: actions/checkout@v3 - uses: dtolnay/rust-toolchain@stable - id: ucd-generate - run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' generate/src/ucd.rs --only-matching)" >> $GITHUB_OUTPUT + run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' tests/tables/tables.rs --only-matching)" >> $GITHUB_OUTPUT - run: cargo install ucd-generate - run: curl -LO https://www.unicode.org/Public/zipped/latest/UCD.zip - run: unzip UCD.zip -d UCD - - run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs + - run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/tables/tables.rs - run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst - run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs - run: cargo run --manifest-path generate/Cargo.toml - - run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' generate/src/ucd.rs tests/trie/trie.rs + - run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' tests/tables/tables.rs tests/trie/trie.rs - run: git diff --exit-code test: diff --git a/generate/Cargo.toml b/generate/Cargo.toml index b58b361..f7e8455 100644 --- a/generate/Cargo.toml +++ b/generate/Cargo.toml @@ -4,3 +4,7 @@ version = "0.0.0" authors = ["David Tolnay "] edition = "2018" publish = false + +[dependencies] +anyhow = "1" +ucd-parse = "0.1.10" diff --git a/generate/src/main.rs b/generate/src/main.rs index e4b7f21..f1db656 100644 --- a/generate/src/main.rs +++ b/generate/src/main.rs @@ -3,7 +3,7 @@ // $ cargo install ucd-generate // $ curl -LO https://www.unicode.org/Public/zipped/15.0.0/UCD.zip // $ unzip UCD.zip -d UCD -// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs +// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/table/tables.rs // $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst // $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs // $ cargo run --manifest-path generate/Cargo.toml @@ -12,50 +12,43 @@ clippy::cast_lossless, clippy::cast_possible_truncation, // https://github.com/rust-lang/rust-clippy/issues/9613 clippy::match_wild_err_arm, + clippy::module_name_repetitions, clippy::too_many_lines, clippy::uninlined_format_args )] -#[rustfmt::skip] -#[allow(dead_code, clippy::all, clippy::pedantic)] -mod ucd; - mod output; +mod parse; mod write; -use std::cmp::Ordering; +use crate::parse::parse_xid_properties; +use anyhow::Result; use std::collections::{BTreeMap as Map, VecDeque}; use std::convert::TryFrom; use std::fs; -use std::io; +use std::io::{self, Write}; use std::path::Path; +use std::process; const CHUNK: usize = 64; -const PATH: &str = "../src/tables.rs"; +const UCD: &str = "UCD"; +const TABLES: &str = "src/tables.rs"; -fn is_xid_start(ch: char) -> bool { - search(ch, ucd::XID_START) -} +fn main() -> Result<()> { + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let unicode_ident_dir = manifest_dir.parent().unwrap(); + let ucd_dir = unicode_ident_dir.join(UCD); + if !ucd_dir.exists() { + writeln!( + io::stderr(), + "Not found: {}\nDownload from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip.", + ucd_dir.display(), + )?; + process::exit(1); + } -fn is_xid_continue(ch: char) -> bool { - search(ch, ucd::XID_CONTINUE) -} + let properties = parse_xid_properties(&ucd_dir)?; -fn search(ch: char, table: &[(u32, u32)]) -> bool { - table - .binary_search_by(|&(lo, hi)| { - if lo > ch as u32 { - Ordering::Greater - } else if hi < ch as u32 { - Ordering::Less - } else { - Ordering::Equal - } - }) - .is_ok() -} - -fn main() -> io::Result<()> { let mut chunkmap = Map::<[u8; CHUNK], u8>::new(); let mut dense = Vec::<[u8; CHUNK]>::new(); let mut new_chunk = |chunk| { @@ -87,8 +80,8 @@ fn main() -> io::Result<()> { let code = (i * CHUNK as u32 + j) * 8 + k; if code >= 0x80 { if let Some(ch) = char::from_u32(code) { - *this_start |= (is_xid_start(ch) as u8) << k; - *this_continue |= (is_xid_continue(ch) as u8) << k; + *this_start |= (properties.is_xid_start(ch) as u8) << k; + *this_continue |= (properties.is_xid_continue(ch) as u8) << k; } } } @@ -163,7 +156,8 @@ fn main() -> io::Result<()> { *index = dense_to_halfdense[index]; } - let out = write::output(&index_start, &index_continue, &halfdense); - let path = Path::new(env!("CARGO_MANIFEST_DIR")).join(PATH); - fs::write(path, out) + let out = write::output(&properties, &index_start, &index_continue, &halfdense); + let path = unicode_ident_dir.join(TABLES); + fs::write(path, out)?; + Ok(()) } diff --git a/generate/src/parse.rs b/generate/src/parse.rs new file mode 100644 index 0000000..39109aa --- /dev/null +++ b/generate/src/parse.rs @@ -0,0 +1,40 @@ +use anyhow::Result; +use std::collections::BTreeSet as Set; +use std::path::Path; +use ucd_parse::CoreProperty; + +pub struct Properties { + xid_start: Set, + xid_continue: Set, +} + +impl Properties { + pub fn is_xid_start(&self, ch: char) -> bool { + self.xid_start.contains(&(ch as u32)) + } + + pub fn is_xid_continue(&self, ch: char) -> bool { + self.xid_continue.contains(&(ch as u32)) + } +} + +pub fn parse_xid_properties(ucd_dir: &Path) -> Result { + let mut properties = Properties { + xid_start: Set::new(), + xid_continue: Set::new(), + }; + + let prop_list: Vec = ucd_parse::parse(ucd_dir)?; + for core in prop_list { + let set = match core.property.as_str() { + "XID_Start" => &mut properties.xid_start, + "XID_Continue" => &mut properties.xid_continue, + _ => continue, + }; + for codepoint in core.codepoints { + set.insert(codepoint.value()); + } + } + + Ok(properties) +} diff --git a/generate/src/write.rs b/generate/src/write.rs index b52e8e1..89df1e8 100644 --- a/generate/src/write.rs +++ b/generate/src/write.rs @@ -1,5 +1,6 @@ use crate::output::Output; -use crate::{is_xid_continue, is_xid_start, CHUNK}; +use crate::parse::Properties; +use crate::CHUNK; const HEAD: &str = "\ // \x40generated by ../generate. To regenerate, run the following in the repo root: @@ -17,7 +18,12 @@ pub(crate) struct Align8(pub(crate) T); pub(crate) struct Align64(pub(crate) T); "; -pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Output { +pub fn output( + properties: &Properties, + index_start: &[u8], + index_continue: &[u8], + halfdense: &[u8], +) -> Output { let mut out = Output::new(); writeln!(out, "{}", HEAD); @@ -29,7 +35,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou write!(out, " "); for j in 0..32 { let ch = (i * 32 + j) as char; - write!(out, " {},", if is_xid_start(ch) { 'T' } else { 'F' }); + let is_xid_start = properties.is_xid_start(ch); + write!(out, " {},", if is_xid_start { 'T' } else { 'F' }); } writeln!(out); } @@ -44,7 +51,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou write!(out, " "); for j in 0..32 { let ch = (i * 32 + j) as char; - write!(out, " {},", if is_xid_continue(ch) { 'T' } else { 'F' }); + let is_xid_continue = properties.is_xid_continue(ch); + write!(out, " {},", if is_xid_continue { 'T' } else { 'F' }); } writeln!(out); } diff --git a/tests/static_size.rs b/tests/static_size.rs index df65f45..24effb4 100644 --- a/tests/static_size.rs +++ b/tests/static_size.rs @@ -19,14 +19,13 @@ fn test_size() { #[test] fn test_xid_size() { #[deny(dead_code)] - #[allow(clippy::redundant_static_lifetimes)] - #[path = "../generate/src/ucd.rs"] - mod ucd; + #[path = "tables/mod.rs"] + mod tables; - let size = size_of_val(ucd::XID_START) + size_of_val(ucd::XID_CONTINUE); + let size = size_of_val(tables::XID_START) + size_of_val(tables::XID_CONTINUE); assert_eq!(11528, size); - let _ = ucd::BY_NAME; + let _ = tables::BY_NAME; } #[cfg(target_pointer_width = "64")] diff --git a/tests/tables/mod.rs b/tests/tables/mod.rs new file mode 100644 index 0000000..72bfd8b --- /dev/null +++ b/tests/tables/mod.rs @@ -0,0 +1,7 @@ +#![allow(clippy::module_inception)] + +#[allow(clippy::redundant_static_lifetimes)] +#[rustfmt::skip] +mod tables; + +pub(crate) use self::tables::*; diff --git a/generate/src/ucd.rs b/tests/tables/tables.rs similarity index 100% rename from generate/src/ucd.rs rename to tests/tables/tables.rs