mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-09 11:25:00 +00:00
Bug 1669162 - Update mapped_hyph to 0.4.2 so that .dic parse errors are non-fatal. r=heycam
Differential Revision: https://phabricator.services.mozilla.com/D92435
This commit is contained in:
parent
6259946a20
commit
1c54e5ffd7
@ -40,7 +40,7 @@ rev = "21c26326f5f45f415c49eac4ba5bc41a2f961321"
|
||||
[source."https://github.com/jfkthame/mapped_hyph.git"]
|
||||
git = "https://github.com/jfkthame/mapped_hyph.git"
|
||||
replace-with = "vendored-sources"
|
||||
tag = "v0.4.0"
|
||||
tag = "v0.4.2"
|
||||
|
||||
[source."https://github.com/hsivonen/packed_simd"]
|
||||
git = "https://github.com/hsivonen/packed_simd"
|
||||
|
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -2890,10 +2890,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mapped_hyph"
|
||||
version = "0.4.0"
|
||||
source = "git+https://github.com/jfkthame/mapped_hyph.git?tag=v0.4.0#c7737af5ebe9b404c6b7eed6006785ea41337ca1"
|
||||
version = "0.4.2"
|
||||
source = "git+https://github.com/jfkthame/mapped_hyph.git?tag=v0.4.2#d0d2e862cea33c262d8173a2dddbe0b50fdd6775"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"env_logger 0.7.1",
|
||||
"log",
|
||||
"memmap",
|
||||
]
|
||||
|
||||
|
0
third_party/rust/ahash/smhasher/clone_smhasher.sh
vendored
Normal file → Executable file
0
third_party/rust/ahash/smhasher/clone_smhasher.sh
vendored
Normal file → Executable file
@ -1 +1 @@
|
||||
{"files":{".travis.yml":"4d1af7257c9619f7ae66fc271ba2c1be5f063640ae8ceaa235c8c8aaf32f44ea","COPYRIGHT":"4df931055b82b96e13ad475c4cee3de5afa69a54a4c611c9d7dc6252d858d9c8","Cargo.toml":"16e0ee523f5c3b1bc1f85771d2102b8bb1cffa3bde3631293cfd441387e9f881","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"4ad721b5b6a3d39ca3e2202f403d897c4a1d42896486dd58963a81f8e64ef61d","README.md":"2c91137faee83f0805a9b9123e105670bf60c2fe45ce6536fb92df7ef85017a5","benches/bench.rs":"ed7143e66ecf8bfb12c87d1f9344157d97696b8194de9132d061129bc80d8d52","cbindgen.toml":"452e79bea00e2a0c16a03ac04e454a0c5955becf2d0306ccce7d1c13d3bcc51a","doc/mapped_hyph_format.md":"2f2487cf536fe4b03db6e4b384be06744ec30b3f299519492288306a93127fbb","hyph_en_US.hyf":"6262b4c5118fe277ab4add8689d9524ca72097564652baec67a8fcd5029ec9b0","src/bin/hyf_compile.rs":"04f8d4d9e47cbc1793d3b23a6cf840b37f3989d3817846ea0e45be3a08cafb29","src/builder.rs":"b6200c19ea24c1b3defbf3b6b4ded350b4d45e170a7b8798d9063c47cfd45cc3","src/ffi.rs":"652ad1b1f450af6afa4b04e3e3e73da1ada294d1c82eda117db87c9e0b9b73ac","src/lib.rs":"d9fc9daad71cda70570ed61538001d46ac204a62a72d4d4faa43be70c62d6faa","src/main.rs":"666befeb39cb1a7dfb66c6b9218d5f7b6c4ed09dbbbc8cfff6b749a33a99ebcf","tests/base.hyf":"d8bf57c6280cfa1d357d3fdba156ce64afbd9df58e28eeb084dfe3f80972b73f","tests/base.hyph":"a3f1fab24c101701fdf21e8359685d80611ab970304e2bd89ef024768b3700c8","tests/base.word":"1136c9a421b242262661b9a65723f87a5ecf77ae38eabcea057832d036d567fd","tests/compound.hyf":"929c1ba6676e4c43bc649d0abf4275ea9e8b02bffaa5acdf704a710813a7a13c","tests/compound4.hyf":"2093287bc41ee30ff9bdbf278f1f8209cb1d1a78236b46e9060af2a881572b8e","tests/compound5.hyf":"0942a5dfbb8d0ef3a937ab9da0418abb41300357cde49f4c477a59a11b2cb6bd","tests/compound6.hyf":"ebad958c2692a5b439b31e324020ed27c42dc05bd5b8c6a6dea4669e6ccf76b4","tests/hyphen.hyf":"92b8a5c86aac6a0b9f0eb7330a057065d6985fd047e851cae47039995c682d4d","tests/lhmin.hyf":"23c886704fafee7d9c54b2478029cf69a5fa946c2f2442bd86697bca5933c88d","tests/num.hyf":"4834fabe78b5c81815434d4562ce3322541649e1ea1edc555a498574bc8b237e","tests/rhmin.hyf":"239cb3d4d7f904abb43b57241e12cc1396e636220c3806e64666aca7ca46cc42","tests/settings2.hyf":"9fc4855e0b952a3593db1efef080b93ce7f1c6fe6798db0440e2bf0cc986ffa2","tests/settings3.hyf":"867db207b485a06e7d60ad10735c9111f10516ee3a5afd6306c683ace3454491","tests/test.rs":"5c81ae59b9384b70d9461407999dac1fde9214398876c4433fbbde9571cc1d94"},"package":null}
|
||||
{"files":{".travis.yml":"4d1af7257c9619f7ae66fc271ba2c1be5f063640ae8ceaa235c8c8aaf32f44ea","COPYRIGHT":"4df931055b82b96e13ad475c4cee3de5afa69a54a4c611c9d7dc6252d858d9c8","Cargo.toml":"1ae148acc03da96f02dd7ed1b0c5757056df59f47af1cdb0ec261a1ca859637e","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"4ad721b5b6a3d39ca3e2202f403d897c4a1d42896486dd58963a81f8e64ef61d","README.md":"2c91137faee83f0805a9b9123e105670bf60c2fe45ce6536fb92df7ef85017a5","benches/bench.rs":"ed7143e66ecf8bfb12c87d1f9344157d97696b8194de9132d061129bc80d8d52","cbindgen.toml":"452e79bea00e2a0c16a03ac04e454a0c5955becf2d0306ccce7d1c13d3bcc51a","doc/mapped_hyph_format.md":"2f2487cf536fe4b03db6e4b384be06744ec30b3f299519492288306a93127fbb","hyph_en_US.hyf":"6262b4c5118fe277ab4add8689d9524ca72097564652baec67a8fcd5029ec9b0","src/bin/hyf_compile.rs":"69a1c9c9124d4c4d6e8bb2fe3946547a1395723b247f7f7234e1b60941f202bd","src/builder.rs":"4169a89fb3a5025b06edeb8a6435a18814d58799d15861c3639a2ed9c63c628b","src/ffi.rs":"652ad1b1f450af6afa4b04e3e3e73da1ada294d1c82eda117db87c9e0b9b73ac","src/lib.rs":"bfee464e22d4e13057a9eebe968847195c528b73c229047ef67dfd084c45f6b7","src/main.rs":"666befeb39cb1a7dfb66c6b9218d5f7b6c4ed09dbbbc8cfff6b749a33a99ebcf","tests/base.hyf":"d8bf57c6280cfa1d357d3fdba156ce64afbd9df58e28eeb084dfe3f80972b73f","tests/base.hyph":"a3f1fab24c101701fdf21e8359685d80611ab970304e2bd89ef024768b3700c8","tests/base.word":"1136c9a421b242262661b9a65723f87a5ecf77ae38eabcea057832d036d567fd","tests/compound.hyf":"929c1ba6676e4c43bc649d0abf4275ea9e8b02bffaa5acdf704a710813a7a13c","tests/compound4.hyf":"2093287bc41ee30ff9bdbf278f1f8209cb1d1a78236b46e9060af2a881572b8e","tests/compound5.hyf":"0942a5dfbb8d0ef3a937ab9da0418abb41300357cde49f4c477a59a11b2cb6bd","tests/compound6.hyf":"ebad958c2692a5b439b31e324020ed27c42dc05bd5b8c6a6dea4669e6ccf76b4","tests/hyphen.hyf":"92b8a5c86aac6a0b9f0eb7330a057065d6985fd047e851cae47039995c682d4d","tests/lhmin.hyf":"23c886704fafee7d9c54b2478029cf69a5fa946c2f2442bd86697bca5933c88d","tests/num.hyf":"4834fabe78b5c81815434d4562ce3322541649e1ea1edc555a498574bc8b237e","tests/rhmin.hyf":"239cb3d4d7f904abb43b57241e12cc1396e636220c3806e64666aca7ca46cc42","tests/settings2.hyf":"9fc4855e0b952a3593db1efef080b93ce7f1c6fe6798db0440e2bf0cc986ffa2","tests/settings3.hyf":"867db207b485a06e7d60ad10735c9111f10516ee3a5afd6306c683ace3454491","tests/test.rs":"5c81ae59b9384b70d9461407999dac1fde9214398876c4433fbbde9571cc1d94"},"package":null}
|
4
third_party/rust/mapped_hyph/Cargo.toml
vendored
4
third_party/rust/mapped_hyph/Cargo.toml
vendored
@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "mapped_hyph"
|
||||
description = "Hyphenation using precompiled memory-mapped tables"
|
||||
version = "0.4.0"
|
||||
version = "0.4.2"
|
||||
authors = ["Jonathan Kew <jfkthame@gmail.com>"]
|
||||
license = "MIT/Apache-2.0"
|
||||
edition = "2018"
|
||||
@ -9,6 +9,8 @@ edition = "2018"
|
||||
[dependencies]
|
||||
memmap = "0.7.0"
|
||||
arrayref = "0.3.5"
|
||||
log = "0.4"
|
||||
env_logger = "0.7.1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
|
@ -8,11 +8,13 @@
|
||||
// except according to those terms.
|
||||
|
||||
extern crate mapped_hyph;
|
||||
extern crate env_logger;
|
||||
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
env_logger::init();
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() == 3 {
|
||||
let in_file = File::open(&args[1])?;
|
||||
|
50
third_party/rust/mapped_hyph/src/builder.rs
vendored
50
third_party/rust/mapped_hyph/src/builder.rs
vendored
@ -132,7 +132,10 @@ impl LevelBuilder {
|
||||
let mut got_digit = false;
|
||||
for byte in bytes {
|
||||
if *byte <= b'9' && *byte >= b'0' {
|
||||
assert!(!got_digit, "invalid pattern \"{}\": consecutive digits", pattern);
|
||||
if got_digit {
|
||||
warn!("invalid pattern \"{}\": consecutive digits", pattern);
|
||||
return;
|
||||
}
|
||||
digits.push(*byte);
|
||||
got_digit = true;
|
||||
} else {
|
||||
@ -157,7 +160,10 @@ impl LevelBuilder {
|
||||
// Convert repl_index and repl_cut from Unicode char to byte indexing.
|
||||
let start = if text[0] == b'.' { 1 } else { 0 };
|
||||
if start == 1 {
|
||||
assert_eq!(digits[0], b'0', "unexpected digit before start of word");
|
||||
if digits[0] != b'0' {
|
||||
warn!("invalid pattern \"{}\": unexpected digit before start of word", pattern);
|
||||
return;
|
||||
}
|
||||
digits.remove(0);
|
||||
}
|
||||
let word = std::str::from_utf8(&text[start..]).unwrap();
|
||||
@ -171,7 +177,10 @@ impl LevelBuilder {
|
||||
// (which should not already have a match_string).
|
||||
let mut state_num = self.find_state_number_for(&text);
|
||||
let mut state = &mut self.states[state_num as usize];
|
||||
assert!(state.match_string.is_none(), "duplicate pattern?");
|
||||
if state.match_string.is_some() {
|
||||
warn!("duplicate pattern \"{}\" discarded", pattern);
|
||||
return;
|
||||
}
|
||||
if !digits.is_empty() {
|
||||
state.match_string = Some(digits);
|
||||
}
|
||||
@ -188,7 +197,7 @@ impl LevelBuilder {
|
||||
text.truncate(text.len() - 1);
|
||||
state_num = self.find_state_number_for(&text);
|
||||
if let Some(exists) = self.states[state_num as usize].transitions.0.insert(ch, last_state) {
|
||||
assert_eq!(exists, last_state, "overwriting existing transition?");
|
||||
assert_eq!(exists, last_state, "overwriting existing transition at pattern \"{}\"", pattern);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -349,7 +358,7 @@ impl LevelBuilder {
|
||||
/// machine transitions, etc.
|
||||
/// The returned Vec can be passed to write_hyf_file() to generate a flattened
|
||||
/// representation of the state machine in mapped_hyph's binary format.
|
||||
fn read_dic_file<T: Read>(dic_file: T, compress: bool) -> Vec<LevelBuilder> {
|
||||
fn read_dic_file<T: Read>(dic_file: T, compress: bool) -> Result<Vec<LevelBuilder>, &'static str> {
|
||||
let reader = BufReader::new(dic_file);
|
||||
|
||||
let mut builders = Vec::<LevelBuilder>::new();
|
||||
@ -370,14 +379,19 @@ fn read_dic_file<T: Read>(dic_file: T, compress: bool) -> Vec<LevelBuilder> {
|
||||
if trimmed.as_bytes()[0] >= b'A' && trimmed.as_bytes()[0] <= b'Z' {
|
||||
// First line is encoding; we only support UTF-8.
|
||||
if builder.encoding.is_none() {
|
||||
assert_eq!(trimmed, "UTF-8", "Only UTF-8 patterns are accepted!");
|
||||
if trimmed != "UTF-8" {
|
||||
return Err("Only UTF-8 patterns are accepted!");
|
||||
};
|
||||
builder.encoding = Some(trimmed);
|
||||
continue;
|
||||
}
|
||||
// Check for valid keyword-value pairs.
|
||||
if trimmed.contains(' ') {
|
||||
let parts: Vec<&str> = trimmed.split(' ').collect();
|
||||
assert!(parts.len() == 2);
|
||||
if parts.len() != 2 {
|
||||
warn!("unrecognized keyword/values: {}", trimmed);
|
||||
continue;
|
||||
}
|
||||
let keyword = parts[0];
|
||||
let value = parts[1];
|
||||
match keyword {
|
||||
@ -386,7 +400,7 @@ fn read_dic_file<T: Read>(dic_file: T, compress: bool) -> Vec<LevelBuilder> {
|
||||
"COMPOUNDLEFTHYPHENMIN" => builder.clh_min = value.parse::<u8>().unwrap(),
|
||||
"COMPOUNDRIGHTHYPHENMIN" => builder.crh_min = value.parse::<u8>().unwrap(),
|
||||
"NOHYPHEN" => builder.nohyphen = Some(trimmed),
|
||||
_ => println!("unknown keyword: {}", trimmed),
|
||||
_ => warn!("unknown keyword: {}", trimmed),
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -396,11 +410,15 @@ fn read_dic_file<T: Read>(dic_file: T, compress: bool) -> Vec<LevelBuilder> {
|
||||
builder = builders.last_mut().unwrap();
|
||||
continue;
|
||||
}
|
||||
println!("unknown keyword: {}", trimmed);
|
||||
warn!("unknown keyword: {}", trimmed);
|
||||
continue;
|
||||
}
|
||||
// Patterns should always be provided in lowercase; complain if not, and discard
|
||||
// the bad pattern.
|
||||
if trimmed != trimmed.to_lowercase() {
|
||||
warn!("pattern \"{}\" not lowercased at line {}", trimmed, index);
|
||||
continue;
|
||||
}
|
||||
// Patterns should always be provided in lowercase; complain if not.
|
||||
assert_eq!(trimmed, trimmed.to_lowercase(), "pattern \"{}\" not lowercased at line {}", trimmed, index);
|
||||
builder.add_pattern(&trimmed);
|
||||
}
|
||||
|
||||
@ -446,7 +464,7 @@ fn read_dic_file<T: Read>(dic_file: T, compress: bool) -> Vec<LevelBuilder> {
|
||||
}
|
||||
}
|
||||
|
||||
builders
|
||||
Ok(builders)
|
||||
}
|
||||
|
||||
/// Write out the state machines representing a set of hyphenation rules
|
||||
@ -481,5 +499,11 @@ fn write_hyf_file<T: Write>(hyf_file: &mut T, levels: Vec<LevelBuilder>) -> std:
|
||||
/// to `hyf_file`. The `compress` param determines whether extra processing to reduce the
|
||||
/// size of the output is performed.
|
||||
pub fn compile<T1: Read, T2: Write>(dic_file: T1, hyf_file: &mut T2, compress: bool) -> std::io::Result<()> {
|
||||
write_hyf_file(hyf_file, read_dic_file(dic_file, compress))
|
||||
match read_dic_file(dic_file, compress) {
|
||||
Ok(dic) => write_hyf_file(hyf_file, dic),
|
||||
Err(e) => {
|
||||
warn!("parse error: {}", e);
|
||||
return Err(Error::from(ErrorKind::InvalidData))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
2
third_party/rust/mapped_hyph/src/lib.rs
vendored
2
third_party/rust/mapped_hyph/src/lib.rs
vendored
@ -10,6 +10,8 @@
|
||||
#[macro_use]
|
||||
extern crate arrayref;
|
||||
extern crate memmap;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
use std::slice;
|
||||
use std::str;
|
||||
|
@ -46,7 +46,7 @@ mdns_service = { path="../../../../dom/media/webrtc/transport/mdns_service", opt
|
||||
neqo_glue = { path = "../../../../netwerk/socket/neqo_glue" }
|
||||
rlbox_lucet_sandbox = { version = "0.1.0", optional = true }
|
||||
wgpu_bindings = { path = "../../../../gfx/wgpu_bindings", optional = true }
|
||||
mapped_hyph = { git = "https://github.com/jfkthame/mapped_hyph.git", tag = "v0.4.0" }
|
||||
mapped_hyph = { git = "https://github.com/jfkthame/mapped_hyph.git", tag = "v0.4.2" }
|
||||
remote = { path = "../../../../remote", optional = true }
|
||||
fog_control = { path = "../../../components/glean", optional = true }
|
||||
app_services_logger = { path = "../../../../services/common/app_services_logger" }
|
||||
|
Loading…
Reference in New Issue
Block a user