From 0fd6f30ea7f70313fd09cebacb0e1f9c1db0d579 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 4 Jul 2019 15:02:20 -0400 Subject: [PATCH] style: switch to rustfmt And enable a CI check for it. --- aho-corasick-debug/main.rs | 48 +-- bench/src/bench.rs | 23 +- bench/src/build.rs | 34 ++- bench/src/random.rs | 130 +++++++-- bench/src/same.rs | 7 +- bench/src/sherlock.rs | 67 +++-- ci/script.sh | 4 + rustfmt.toml | 2 + src/ahocorasick.rs | 135 ++++----- src/automaton.rs | 28 +- src/buffer.rs | 11 +- src/dfa.rs | 75 ++--- src/error.rs | 18 +- src/lib.rs | 14 +- src/nfa.rs | 75 ++--- src/prefilter.rs | 38 +-- src/state_id.rs | 71 +++-- src/tests.rs | 578 ++++++++++++++++++++----------------- 18 files changed, 764 insertions(+), 594 deletions(-) create mode 100644 rustfmt.toml diff --git a/aho-corasick-debug/main.rs b/aho-corasick-debug/main.rs index 1ddd089..be22f1c 100644 --- a/aho-corasick-debug/main.rs +++ b/aho-corasick-debug/main.rs @@ -58,7 +58,7 @@ struct Args { impl Args { fn parse() -> Result { - use clap::{App, Arg, crate_authors, crate_version}; + use clap::{crate_authors, crate_version, App, Arg}; let parsed = App::new("Search using aho-corasick") .author(crate_authors!()) @@ -66,18 +66,26 @@ impl Args { .max_term_width(100) .arg(Arg::with_name("dictionary").required(true)) .arg(Arg::with_name("haystack").required(true)) - .arg(Arg::with_name("kind") - .long("kind") - .possible_values(&[ - "standard", "leftmost-first", "leftmost-longest", - ]) - .default_value("standard")) - .arg(Arg::with_name("ascii-case-insensitive") - .long("ascii-case-insensitive") - .short("i")) - .arg(Arg::with_name("dense-depth") - .long("dense-depth") - .default_value("2")) + .arg( + Arg::with_name("kind") + .long("kind") + .possible_values(&[ + "standard", + "leftmost-first", + "leftmost-longest", + ]) + .default_value("standard"), + ) + .arg( + Arg::with_name("ascii-case-insensitive") + .long("ascii-case-insensitive") + .short("i"), + ) + .arg( + Arg::with_name("dense-depth") + .long("dense-depth") + .default_value("2"), + ) .arg(Arg::with_name("dfa").long("dfa").short("d")) .arg(Arg::with_name("prefilter").long("prefilter").short("f")) .arg(Arg::with_name("classes").long("classes").short("c")) @@ -85,12 +93,9 @@ impl Args { .arg(Arg::with_name("no-search").long("no-search")) .get_matches(); - let dictionary = PathBuf::from( - parsed.value_of_os("dictionary").unwrap() - ); - let haystack = PathBuf::from( - parsed.value_of_os("haystack").unwrap() - ); + let dictionary = + PathBuf::from(parsed.value_of_os("dictionary").unwrap()); + let haystack = PathBuf::from(parsed.value_of_os("haystack").unwrap()); let match_kind = match parsed.value_of("kind").unwrap() { "standard" => MatchKind::Standard, "leftmost-first" => MatchKind::LeftmostFirst, @@ -100,7 +105,10 @@ impl Args { let dense_depth = parsed.value_of("dense-depth").unwrap().parse()?; Ok(Args { - dictionary, haystack, match_kind, dense_depth, + dictionary, + haystack, + match_kind, + dense_depth, ascii_casei: parsed.is_present("ascii-case-insensitive"), dfa: parsed.is_present("dfa"), prefilter: parsed.is_present("prefilter"), diff --git a/bench/src/bench.rs b/bench/src/bench.rs index 23cbb8f..0b47b8e 100644 --- a/bench/src/bench.rs +++ b/bench/src/bench.rs @@ -30,10 +30,8 @@ fn define_aho_corasick>( count: usize, patterns: Vec, ) { - let patterns: Vec> = patterns - .into_iter() - .map(|b| b.as_ref().to_vec()) - .collect(); + let patterns: Vec> = + patterns.into_iter().map(|b| b.as_ref().to_vec()).collect(); let haystack = corpus.to_vec(); let name = format!("nfa/{}", bench_name); @@ -44,9 +42,7 @@ fn define_aho_corasick>( let haystack = corpus.to_vec(); let name = format!("dfa/{}", bench_name); - let aut = AhoCorasickBuilder::new() - .dfa(true) - .build(patterns.clone()); + let aut = AhoCorasickBuilder::new().dfa(true).build(patterns.clone()); define(c, group_name, &name, corpus, move |b| { b.iter(|| assert_eq!(count, aut.find_iter(&haystack).count())); }); @@ -63,13 +59,12 @@ fn define_aho_corasick_dfa( count: usize, patterns: Vec, find_count: F, -) where B: AsRef<[u8]>, - F: 'static + Clone + Fn(&AhoCorasick, &[u8]) -> usize +) where + B: AsRef<[u8]>, + F: 'static + Clone + Fn(&AhoCorasick, &[u8]) -> usize, { - let patterns: Vec> = patterns - .into_iter() - .map(|b| b.as_ref().to_vec()) - .collect(); + let patterns: Vec> = + patterns.into_iter().map(|b| b.as_ref().to_vec()).collect(); let counter = find_count.clone(); let haystack = corpus.to_vec(); @@ -134,7 +129,6 @@ fn define( corpus: &[u8], bench: impl FnMut(&mut Bencher) + 'static, ) { - let tput = Throughput::Bytes(corpus.len() as u32); let benchmark = Benchmark::new(bench_name, bench) .throughput(tput) @@ -152,7 +146,6 @@ fn define_long( corpus: &[u8], bench: impl FnMut(&mut Bencher) + 'static, ) { - let tput = Throughput::Bytes(corpus.len() as u32); let benchmark = Benchmark::new(bench_name, bench) .throughput(tput) diff --git a/bench/src/build.rs b/bench/src/build.rs index 791aac1..c64a8f4 100644 --- a/bench/src/build.rs +++ b/bench/src/build.rs @@ -1,7 +1,7 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; -use criterion::{Criterion, black_box}; +use criterion::{black_box, Criterion}; -use input::{words_5000, words_15000}; +use input::{words_15000, words_5000}; use {define, define_long}; /// Benchmarks that measure the performance of constructing an Aho-Corasick @@ -10,15 +10,21 @@ pub fn all(c: &mut Criterion) { define_build::(c, false, "empty", vec![]); define_build(c, false, "onebyte", vec!["a"]); define_build(c, false, "twobytes", vec!["a", "b"]); - define_build(c, false, "many-short", vec![ - "ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", "BaK", - "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh", - "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat", - "aDL", "aDl", "adL", "adl", "bAK", "bAk", "bAK", "baK", "bak", "baK", - "hOL", "hOl", "hoL", "hol", "iRE", "iRe", "irE", "ire", "jOH", "jOh", - "joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt", "waT", "wat", - "ſHE", "ſHe", "ſhE", "ſhe", - ]); + define_build( + c, + false, + "many-short", + vec![ + "ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", + "BaK", "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", + "JOH", "JOh", "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", + "WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK", "bAk", + "bAK", "baK", "bak", "baK", "hOL", "hOl", "hoL", "hol", "iRE", + "iRe", "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe", + "shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE", + "ſhe", + ], + ); define_build(c, true, "5000words", words_5000()); define_build(c, true, "15000words", words_15000()); } @@ -29,10 +35,8 @@ fn define_build>( bench_name: &str, patterns: Vec, ) { - let patterns: Vec> = patterns - .into_iter() - .map(|b| b.as_ref().to_vec()) - .collect(); + let patterns: Vec> = + patterns.into_iter().map(|b| b.as_ref().to_vec()).collect(); let pats = patterns.clone(); let name = format!("nfa/{}", bench_name); diff --git a/bench/src/random.rs b/bench/src/random.rs index a40c79f..6ce3965 100644 --- a/bench/src/random.rs +++ b/bench/src/random.rs @@ -26,30 +26,52 @@ fn memchr_optimizations(c: &mut Criterion) { define_random(c, "twobytes/nomatch", 0, vec!["\x00", "\x01"]); define_random(c, "threebytes/match", 352, vec!["a", "\x00", "\x01"]); define_random(c, "threebytes/nomatch", 0, vec!["\x00", "\x01", "\x02"]); - define_random(c, "fourbytes/match", 352, vec![ - "a", "\x00", "\x01", "\x02", - ]); - define_random(c, "fourbytes/nomatch", 0, vec![ - "\x00", "\x01", "\x02", "\x03", - ]); - define_random(c, "fivebytes/match", 352, vec![ - "a", "\x00", "\x01", "\x02", "\x03", - ]); - define_random(c, "fivebytes/nomatch", 0, vec![ - "\x00", "\x01", "\x02", "\x03", "\x04", - ]); + define_random( + c, + "fourbytes/match", + 352, + vec!["a", "\x00", "\x01", "\x02"], + ); + define_random( + c, + "fourbytes/nomatch", + 0, + vec!["\x00", "\x01", "\x02", "\x03"], + ); + define_random( + c, + "fivebytes/match", + 352, + vec!["a", "\x00", "\x01", "\x02", "\x03"], + ); + define_random( + c, + "fivebytes/nomatch", + 0, + vec!["\x00", "\x01", "\x02", "\x03", "\x04"], + ); } /// Some miscellaneous benchmarks on random data. fn misc(c: &mut Criterion) { - define_random(c, "ten-one-prefix", 0, vec![ - "zacdef", "zbcdef", "zccdef", "zdcdef", "zecdef", - "zfcdef", "zgcdef", "zhcdef", "zicdef", "zjcdef", - ]); - define_random(c, "ten-diff-prefix", 0, vec![ - "abcdef", "bcdefg", "cdefgh", "defghi", "efghij", - "fghijk", "ghijkl", "hijklm", "ijklmn", "jklmno", - ]); + define_random( + c, + "ten-one-prefix", + 0, + vec![ + "zacdef", "zbcdef", "zccdef", "zdcdef", "zecdef", "zfcdef", + "zgcdef", "zhcdef", "zicdef", "zjcdef", + ], + ); + define_random( + c, + "ten-diff-prefix", + 0, + vec![ + "abcdef", "bcdefg", "cdefgh", "defghi", "efghij", "fghijk", + "ghijkl", "hijklm", "ijklmn", "jklmno", + ], + ); } /// Various benchmarks using a large pattern set. @@ -60,23 +82,47 @@ fn many_patterns(c: &mut Criterion) { let group = "random10x/standard"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, Standard, 0, words_5000(), + c, + group, + name, + RANDOM10X, + Standard, + 0, + words_5000(), |ac, haystack| ac.find_iter(haystack).count(), ); let group = "random10x/leftmost-first"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, LeftmostFirst, 0, words_5000(), + c, + group, + name, + RANDOM10X, + LeftmostFirst, + 0, + words_5000(), |ac, haystack| ac.find_iter(haystack).count(), ); let group = "random10x/leftmost-longest"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, LeftmostLongest, 0, words_5000(), + c, + group, + name, + RANDOM10X, + LeftmostLongest, + 0, + words_5000(), |ac, haystack| ac.find_iter(haystack).count(), ); let group = "random10x/overlapping"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, Standard, 0, words_5000(), + c, + group, + name, + RANDOM10X, + Standard, + 0, + words_5000(), |ac, haystack| ac.find_overlapping_iter(haystack).count(), ); @@ -84,23 +130,47 @@ fn many_patterns(c: &mut Criterion) { let group = "random10x/standard"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, Standard, 0, words_100(), + c, + group, + name, + RANDOM10X, + Standard, + 0, + words_100(), |ac, haystack| ac.find_iter(haystack).count(), ); let group = "random10x/leftmost-first"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, LeftmostFirst, 0, words_100(), + c, + group, + name, + RANDOM10X, + LeftmostFirst, + 0, + words_100(), |ac, haystack| ac.find_iter(haystack).count(), ); let group = "random10x/leftmost-longest"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, LeftmostLongest, 0, words_100(), + c, + group, + name, + RANDOM10X, + LeftmostLongest, + 0, + words_100(), |ac, haystack| ac.find_iter(haystack).count(), ); let group = "random10x/overlapping"; define_aho_corasick_dfa( - c, group, name, RANDOM10X, Standard, 0, words_100(), + c, + group, + name, + RANDOM10X, + Standard, + 0, + words_100(), |ac, haystack| ac.find_overlapping_iter(haystack).count(), ); } @@ -111,7 +181,5 @@ fn define_random>( count: usize, patterns: Vec, ) { - define_aho_corasick( - c, "random", bench_name, RANDOM, count, patterns, - ); + define_aho_corasick(c, "random", bench_name, RANDOM, count, patterns); } diff --git a/bench/src/same.rs b/bench/src/same.rs index aae7089..c55b9ad 100644 --- a/bench/src/same.rs +++ b/bench/src/same.rs @@ -34,6 +34,11 @@ fn define_same>( ) { let corpus = "z".repeat(10_000); define_aho_corasick( - c, "same", bench_name, corpus.as_bytes(), count, patterns, + c, + "same", + bench_name, + corpus.as_bytes(), + count, + patterns, ); } diff --git a/bench/src/sherlock.rs b/bench/src/sherlock.rs index fff147a..1767eaa 100644 --- a/bench/src/sherlock.rs +++ b/bench/src/sherlock.rs @@ -1,7 +1,7 @@ use criterion::Criterion; -use input::*; use define_aho_corasick; +use input::*; /// These benchmarks test various words on natural language text. /// @@ -11,30 +11,53 @@ use define_aho_corasick; pub fn all(c: &mut Criterion) { define_sherlock(c, "name/alt1", 158, vec!["Sherlock", "Street"]); define_sherlock(c, "name/alt2", 558, vec!["Sherlock", "Holmes"]); - define_sherlock(c, "name/alt3", 740, vec![ - "Sherlock", "Holmes", "Watson", "Irene", "Adler", "John", "Baker", - ]); + define_sherlock( + c, + "name/alt3", + 740, + vec![ + "Sherlock", "Holmes", "Watson", "Irene", "Adler", "John", "Baker", + ], + ); define_sherlock(c, "name/alt4", 582, vec!["Sher", "Hol"]); define_sherlock(c, "name/alt5", 639, vec!["Sherlock", "Holmes", "Watson"]); - define_sherlock(c, "name/nocase1", 1764, vec![ - "ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", "BaK", - "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh", - "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat", - "aDL", "aDl", "adL", "adl", "bAK", "bAk", "bAK", "baK", "bak", "baK", - "hOL", "hOl", "hoL", "hol", "iRE", "iRe", "irE", "ire", "jOH", "jOh", - "joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt", "waT", "wat", - "ſHE", "ſHe", "ſhE", "ſhe", - ]); - define_sherlock(c, "name/nocase2", 1307, vec![ - "HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "hOL", "hOl", - "hoL", "hol", "sHE", "sHe", "shE", "she", "ſHE", "ſHe", "ſhE", "ſhe", - ]); - define_sherlock(c, "name/nocase3", 1442, vec![ - "HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "WAT", "WAt", - "WaT", "Wat", "hOL", "hOl", "hoL", "hol", "sHE", "sHe", "shE", "she", - "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE", "ſhe", - ]); + define_sherlock( + c, + "name/nocase1", + 1764, + vec![ + "ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", + "BaK", "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", + "JOH", "JOh", "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", + "WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK", "bAk", + "bAK", "baK", "bak", "baK", "hOL", "hOl", "hoL", "hol", "iRE", + "iRe", "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe", + "shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE", + "ſhe", + ], + ); + define_sherlock( + c, + "name/nocase2", + 1307, + vec![ + "HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "hOL", + "hOl", "hoL", "hol", "sHE", "sHe", "shE", "she", "ſHE", "ſHe", + "ſhE", "ſhe", + ], + ); + define_sherlock( + c, + "name/nocase3", + 1442, + vec![ + "HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "WAT", + "WAt", "WaT", "Wat", "hOL", "hOl", "hoL", "hol", "sHE", "sHe", + "shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE", + "ſhe", + ], + ); define_sherlock(c, "5000words", 567, words_5000()); } diff --git a/ci/script.sh b/ci/script.sh index 82df5fc..0424c3f 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -5,6 +5,10 @@ set -ex cargo build --verbose cargo doc --verbose cargo test --verbose +if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then + rustup component add rustfmt + cargo fmt -- --check +fi if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then cargo bench --verbose --manifest-path bench/Cargo.toml -- --test fi diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..aa37a21 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 79 +use_small_heuristics = "max" diff --git a/src/ahocorasick.rs b/src/ahocorasick.rs index fccd2b8..9d6e4b6 100644 --- a/src/ahocorasick.rs +++ b/src/ahocorasick.rs @@ -115,11 +115,10 @@ impl AhoCorasick { /// ]); /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); /// ``` - pub fn new( - patterns: I, - ) -> AhoCorasick - where I: IntoIterator, - P: AsRef<[u8]> + pub fn new(patterns: I) -> AhoCorasick + where + I: IntoIterator, + P: AsRef<[u8]>, { AhoCorasickBuilder::new().build(patterns) } @@ -151,10 +150,9 @@ impl AhoCorasick { /// ]); /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); /// ``` - pub fn new_auto_configured( - patterns: &[B], - ) -> AhoCorasick - where B: AsRef<[u8]> + pub fn new_auto_configured(patterns: &[B]) -> AhoCorasick + where + B: AsRef<[u8]>, { AhoCorasickBuilder::new().auto_configure(patterns).build(patterns) } @@ -213,7 +211,10 @@ impl AhoCorasick { let mut prestate = PrefilterState::new(self.max_pattern_len()); let mut start = self.imp.start_state(); self.imp.earliest_find_at( - &mut prestate, haystack.as_ref(), 0, &mut start, + &mut prestate, + haystack.as_ref(), + 0, + &mut start, ) } @@ -425,15 +426,13 @@ impl AhoCorasick { /// let result = ac.replace_all(haystack, &["x", "y", "z"]); /// assert_eq!("x the z to the xage", result); /// ``` - pub fn replace_all( - &self, - haystack: &str, - replace_with: &[B], - ) -> String - where B: AsRef + pub fn replace_all(&self, haystack: &str, replace_with: &[B]) -> String + where + B: AsRef, { assert_eq!( - replace_with.len(), self.pattern_count(), + replace_with.len(), + self.pattern_count(), "replace_all requires a replacement for every pattern \ in the automaton" ); @@ -479,10 +478,12 @@ impl AhoCorasick { haystack: &[u8], replace_with: &[B], ) -> Vec - where B: AsRef<[u8]> + where + B: AsRef<[u8]>, { assert_eq!( - replace_with.len(), self.pattern_count(), + replace_with.len(), + self.pattern_count(), "replace_all_bytes requires a replacement for every pattern \ in the automaton" ); @@ -528,7 +529,8 @@ impl AhoCorasick { haystack: &str, dst: &mut String, mut replace_with: F, - ) where F: FnMut(&Match, &str, &mut String) -> bool + ) where + F: FnMut(&Match, &str, &mut String) -> bool, { let mut last_match = 0; for mat in self.find_iter(haystack) { @@ -573,7 +575,8 @@ impl AhoCorasick { haystack: &[u8], dst: &mut Vec, mut replace_with: F, - ) where F: FnMut(&Match, &[u8], &mut Vec) -> bool + ) where + F: FnMut(&Match, &[u8], &mut Vec) -> bool, { let mut last_match = 0; for mat in self.find_iter(haystack) { @@ -710,12 +713,14 @@ impl AhoCorasick { wtr: W, replace_with: &[B], ) -> io::Result<()> - where R: io::Read, - W: io::Write, - B: AsRef<[u8]> + where + R: io::Read, + W: io::Write, + B: AsRef<[u8]>, { assert_eq!( - replace_with.len(), self.pattern_count(), + replace_with.len(), + self.pattern_count(), "stream_replace_all requires a replacement for every pattern \ in the automaton" ); @@ -795,9 +800,10 @@ impl AhoCorasick { mut wtr: W, mut replace_with: F, ) -> io::Result<()> - where R: io::Read, - W: io::Write, - F: FnMut(&Match, &[u8], &mut W) -> io::Result<()> + where + R: io::Read, + W: io::Write, + F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>, { let mut it = StreamChunkIter::new(self, rdr); while let Some(result) = it.next() { @@ -1041,16 +1047,20 @@ impl Imp { match_index: &mut usize, ) -> Option { match *self { - Imp::NFA(ref nfa) => { - nfa.overlapping_find_at( - prestate, haystack, at, state_id, match_index, - ) - } - Imp::DFA(ref dfa) => { - dfa.overlapping_find_at( - prestate, haystack, at, state_id, match_index, - ) - } + Imp::NFA(ref nfa) => nfa.overlapping_find_at( + prestate, + haystack, + at, + state_id, + match_index, + ), + Imp::DFA(ref dfa) => dfa.overlapping_find_at( + prestate, + haystack, + at, + state_id, + match_index, + ), } } @@ -1081,12 +1091,8 @@ impl Imp { state_id: &mut S, ) -> Option { match *self { - Imp::NFA(ref nfa) => { - nfa.find_at(prestate, haystack, at, state_id) - } - Imp::DFA(ref dfa) => { - dfa.find_at(prestate, haystack, at, state_id) - } + Imp::NFA(ref nfa) => nfa.find_at(prestate, haystack, at, state_id), + Imp::DFA(ref dfa) => dfa.find_at(prestate, haystack, at, state_id), } } } @@ -1133,7 +1139,10 @@ impl<'a, 'b, S: StateID> Iterator for FindIter<'a, 'b, S> { } let mut start = self.start; let result = self.fsm.find_at( - &mut self.prestate, self.haystack, self.pos, &mut start, + &mut self.prestate, + self.haystack, + self.pos, + &mut start, ); let mat = match result { None => return None, @@ -1243,9 +1252,7 @@ pub struct StreamFindIter<'a, R, S: 'a + StateID> { impl<'a, R: io::Read, S: StateID> StreamFindIter<'a, R, S> { fn new(ac: &'a AhoCorasick, rdr: R) -> StreamFindIter<'a, R, S> { - StreamFindIter { - it: StreamChunkIter::new(ac, rdr), - } + StreamFindIter { it: StreamChunkIter::new(ac, rdr) } } } @@ -1492,12 +1499,10 @@ impl AhoCorasickBuilder { /// .build(patterns); /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); /// ``` - pub fn build( - &self, - patterns: I, - ) -> AhoCorasick - where I: IntoIterator, - P: AsRef<[u8]> + pub fn build(&self, patterns: I) -> AhoCorasick + where + I: IntoIterator, + P: AsRef<[u8]>, { // The builder only returns an error if the chosen state ID // representation is too small to fit all of the given patterns. In @@ -1560,19 +1565,19 @@ impl AhoCorasickBuilder { &self, patterns: I, ) -> Result> - where S: StateID, - I: IntoIterator, - P: AsRef<[u8]> + where + S: StateID, + I: IntoIterator, + P: AsRef<[u8]>, { let nfa = self.nfa_builder.build(patterns)?; let match_kind = nfa.match_kind().clone(); - let imp = - if self.dfa { - let dfa = self.dfa_builder.build(&nfa)?; - Imp::DFA(dfa) - } else { - Imp::NFA(nfa) - }; + let imp = if self.dfa { + let dfa = self.dfa_builder.build(&nfa)?; + Imp::DFA(dfa) + } else { + Imp::NFA(nfa) + }; Ok(AhoCorasick { imp, match_kind }) } @@ -2002,7 +2007,7 @@ impl MatchKind { pub(crate) fn is_leftmost(&self) -> bool { *self == MatchKind::LeftmostFirst - || *self == MatchKind::LeftmostLongest + || *self == MatchKind::LeftmostLongest } pub(crate) fn is_leftmost_first(&self) -> bool { diff --git a/src/automaton.rs b/src/automaton.rs index d5935b5..ddfcbd2 100644 --- a/src/automaton.rs +++ b/src/automaton.rs @@ -1,6 +1,6 @@ use ahocorasick::MatchKind; use prefilter::{Prefilter, PrefilterState}; -use state_id::{StateID, dead_id, fail_id}; +use state_id::{dead_id, fail_id, StateID}; use Match; // NOTE: This trait was essentially copied from regex-automata, with some @@ -142,12 +142,14 @@ pub trait Automaton { ) -> Option { if let Some(pre) = self.prefilter() { self.standard_find_at_imp( - prestate, Some(pre), haystack, at, state_id, + prestate, + Some(pre), + haystack, + at, + state_id, ) } else { - self.standard_find_at_imp( - prestate, None, haystack, at, state_id, - ) + self.standard_find_at_imp(prestate, None, haystack, at, state_id) } } @@ -235,12 +237,14 @@ pub trait Automaton { ) -> Option { if let Some(pre) = self.prefilter() { self.leftmost_find_at_imp( - prestate, Some(pre), haystack, at, state_id, + prestate, + Some(pre), + haystack, + at, + state_id, ) } else { - self.leftmost_find_at_imp( - prestate, None, haystack, at, state_id, - ) + self.leftmost_find_at_imp(prestate, None, haystack, at, state_id) } } @@ -337,11 +341,7 @@ pub trait Automaton { if *match_index < match_count { // This is guaranteed to return a match since // match_index < match_count. - let result = self.get_match( - *state_id, - *match_index, - at, - ); + let result = self.get_match(*state_id, *match_index, at); debug_assert!(result.is_some(), "must be a match"); *match_index += 1; return result; diff --git a/src/buffer.rs b/src/buffer.rs index f3e632c..01a8453 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -3,7 +3,7 @@ use std::io; use std::ptr; /// The default buffer capacity that we use for the stream buffer. -const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB +const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB /// A fairly simple roll buffer for supporting stream searches. /// @@ -52,11 +52,7 @@ impl Buffer { // TODO: It would be good to find a way to test the streaming // implementation with the minimal buffer size. let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY); - Buffer { - buf: vec![0; capacity], - min, - end: 0, - } + Buffer { buf: vec![0; capacity], min, end: 0 } } /// Return the contents of this buffer. @@ -110,7 +106,8 @@ impl Buffer { /// This should only be called when the entire contents of this buffer have /// been searched. pub fn roll(&mut self) { - let roll_start = self.end + let roll_start = self + .end .checked_sub(self.min) .expect("buffer capacity should be bigger than minimum amount"); let roll_len = self.min; diff --git a/src/dfa.rs b/src/dfa.rs index 64fcfa3..9053bbc 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -4,9 +4,9 @@ use ahocorasick::MatchKind; use automaton::Automaton; use classes::ByteClasses; use error::Result; -use nfa::{NFA, PatternID, PatternLength}; +use nfa::{PatternID, PatternLength, NFA}; use prefilter::{Prefilter, PrefilterObj, PrefilterState}; -use state_id::{StateID, dead_id, fail_id, premultiply_overflow_error}; +use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID}; use Match; #[derive(Clone, Debug)] @@ -57,26 +57,34 @@ impl DFA { match_index: &mut usize, ) -> Option { match *self { - DFA::Standard(ref dfa) => { - dfa.overlapping_find_at( - prestate, haystack, at, state_id, match_index, - ) - } - DFA::ByteClass(ref dfa) => { - dfa.overlapping_find_at( - prestate, haystack, at, state_id, match_index, - ) - } - DFA::Premultiplied(ref dfa) => { - dfa.overlapping_find_at( - prestate, haystack, at, state_id, match_index, - ) - } - DFA::PremultipliedByteClass(ref dfa) => { - dfa.overlapping_find_at( - prestate, haystack, at, state_id, match_index, - ) - } + DFA::Standard(ref dfa) => dfa.overlapping_find_at( + prestate, + haystack, + at, + state_id, + match_index, + ), + DFA::ByteClass(ref dfa) => dfa.overlapping_find_at( + prestate, + haystack, + at, + state_id, + match_index, + ), + DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at( + prestate, + haystack, + at, + state_id, + match_index, + ), + DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at( + prestate, + haystack, + at, + state_id, + match_index, + ), } } @@ -562,10 +570,9 @@ impl Repr { /// Computes the total amount of heap used by this NFA in bytes. fn calculate_size(&mut self) { - let mut size = - (self.trans.len() * size_of::()) - + (self.matches.len() * - size_of::>()); + let mut size = (self.trans.len() * size_of::()) + + (self.matches.len() + * size_of::>()); for state_matches in &self.matches { size += state_matches.len() * size_of::<(PatternID, PatternLength)>(); @@ -584,10 +591,7 @@ pub struct Builder { impl Builder { /// Create a new builder for a DFA. pub fn new() -> Builder { - Builder { - premultiply: true, - byte_classes: true, - } + Builder { premultiply: true, byte_classes: true } } /// Build a DFA from the given NFA. @@ -596,12 +600,11 @@ impl Builder { /// representation size. This can only happen when state ids are /// premultiplied (which is enabled by default). pub fn build(&self, nfa: &NFA) -> Result> { - let byte_classes = - if self.byte_classes { - nfa.byte_classes().clone() - } else { - ByteClasses::singletons() - }; + let byte_classes = if self.byte_classes { + nfa.byte_classes().clone() + } else { + ByteClasses::singletons() + }; let alphabet_len = byte_classes.alphabet_len(); let trans = vec![fail_id(); alphabet_len * nfa.state_len()]; let matches = vec![vec![]; nfa.state_len()]; diff --git a/src/error.rs b/src/error.rs index c9cb233..7dace63 100644 --- a/src/error.rs +++ b/src/error.rs @@ -33,7 +33,7 @@ pub enum ErrorKind { max: usize, /// The maximum ID required by premultiplication. requested_max: usize, - } + }, } impl Error { @@ -70,15 +70,13 @@ impl error::Error for Error { impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.kind { - ErrorKind::StateIDOverflow { max } => { - write!( - f, - "building the automaton failed because it required \ - building more states that can be identified, where the \ - maximum ID for the chosen representation is {}", - max, - ) - } + ErrorKind::StateIDOverflow { max } => write!( + f, + "building the automaton failed because it required \ + building more states that can be identified, where the \ + maximum ID for the chosen representation is {}", + max, + ), ErrorKind::PremultiplyOverflow { max, requested_max } => { if max == requested_max { write!( diff --git a/src/lib.rs b/src/lib.rs index 60daef3..1da0170 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -196,8 +196,8 @@ extern crate doc_comment; doctest!("../README.md"); pub use ahocorasick::{ - AhoCorasick, AhoCorasickBuilder, MatchKind, - FindIter, FindOverlappingIter, StreamFindIter, + AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind, + StreamFindIter, }; pub use error::{Error, ErrorKind}; pub use state_id::StateID; @@ -205,11 +205,11 @@ pub use state_id::StateID; mod ahocorasick; mod automaton; mod buffer; +mod classes; mod dfa; mod error; -mod classes; -mod prefilter; mod nfa; +mod prefilter; mod state_id; #[cfg(test)] mod tests; @@ -287,10 +287,6 @@ impl Match { #[inline] fn increment(&self, by: usize) -> Match { - Match { - pattern: self.pattern, - len: self.len, - end: self.end + by, - } + Match { pattern: self.pattern, len: self.len, end: self.end + by } } } diff --git a/src/nfa.rs b/src/nfa.rs index a6d0f34..48714b9 100644 --- a/src/nfa.rs +++ b/src/nfa.rs @@ -1,14 +1,14 @@ -use std::collections::VecDeque; use std::cmp; +use std::collections::VecDeque; use std::fmt; use std::mem::size_of; use ahocorasick::MatchKind; use automaton::Automaton; -use classes::{ByteClasses, ByteClassBuilder}; +use classes::{ByteClassBuilder, ByteClasses}; use error::Result; use prefilter::{self, Prefilter, PrefilterObj}; -use state_id::{StateID, dead_id, fail_id, usize_to_state_id}; +use state_id::{dead_id, fail_id, usize_to_state_id, StateID}; use Match; /// The identifier for a pattern, which is simply the position of the pattern @@ -173,9 +173,8 @@ impl NFA { } fn copy_matches(&mut self, src: S, dst: S) { - let (src, dst) = get_two_mut( - &mut self.states, src.to_usize(), dst.to_usize(), - ); + let (src, dst) = + get_two_mut(&mut self.states, src.to_usize(), dst.to_usize()); dst.matches.extend_from_slice(&src.matches); } @@ -242,9 +241,11 @@ impl Automaton for NFA { None => return None, Some(state) => state, }; - state.matches - .get(match_index) - .map(|&(id, len)| Match { pattern: id, len, end }) + state.matches.get(match_index).map(|&(id, len)| Match { + pattern: id, + len, + end, + }) } fn match_count(&self, id: S) -> usize { @@ -291,7 +292,7 @@ pub struct State { impl State { fn heap_bytes(&self) -> usize { self.trans.heap_bytes() - + (self.matches.len() * size_of::<(PatternID, PatternLength)>()) + + (self.matches.len() * size_of::<(PatternID, PatternLength)>()) } fn add_match(&mut self, i: PatternID, len: PatternLength) { @@ -345,9 +346,7 @@ impl Transitions { Transitions::Sparse(ref sparse) => { sparse.len() * size_of::<(u8, S)>() } - Transitions::Dense(ref dense) => { - dense.len() * size_of::() - } + Transitions::Dense(ref dense) => dense.len() * size_of::(), } } @@ -522,12 +521,10 @@ impl Builder { Builder::default() } - pub fn build( - &self, - patterns: I, - ) -> Result> - where I: IntoIterator, - P: AsRef<[u8]> + pub fn build(&self, patterns: I) -> Result> + where + I: IntoIterator, + P: AsRef<[u8]>, { Compiler::new(self)?.compile(patterns) } @@ -581,12 +578,10 @@ impl<'a, S: StateID> Compiler<'a, S> { }) } - fn compile( - mut self, - patterns: I, - ) -> Result> - where I: IntoIterator, - P: AsRef<[u8]> + fn compile(mut self, patterns: I) -> Result> + where + I: IntoIterator, + P: AsRef<[u8]>, { self.add_state(0)?; // the fail state, which is never entered self.add_state(0)?; // the dead state, only used for leftmost @@ -610,19 +605,15 @@ impl<'a, S: StateID> Compiler<'a, S> { /// automaton. Effectively, it creates the basic structure of the /// automaton, where every pattern given has a path from the start state to /// the end of the pattern. - fn build_trie( - &mut self, - patterns: I, - ) -> Result<()> - where I: IntoIterator, - P: AsRef<[u8]> + fn build_trie(&mut self, patterns: I) -> Result<()> + where + I: IntoIterator, + P: AsRef<[u8]>, { - 'PATTERNS: - for (pati, pat) in patterns.into_iter().enumerate() { + 'PATTERNS: for (pati, pat) in patterns.into_iter().enumerate() { let pat = pat.as_ref(); - self.nfa.max_pattern_len = cmp::max( - self.nfa.max_pattern_len, pat.len(), - ); + self.nfa.max_pattern_len = + cmp::max(self.nfa.max_pattern_len, pat.len()); self.nfa.pattern_count += 1; let mut prev = self.nfa.start_id; @@ -873,11 +864,7 @@ impl<'a, S: StateID> Compiler<'a, S> { /// state. fn start(nfa: &NFA) -> QueuedState { let match_at_depth = - if nfa.start().is_match() { - Some(0) - } else { - None - }; + if nfa.start().is_match() { Some(0) } else { None }; QueuedState { id: nfa.start_id, match_at_depth } } @@ -909,8 +896,7 @@ impl<'a, S: StateID> Compiler<'a, S> { None if nfa.state(next).is_match() => {} None => return None, } - let depth = - nfa.state(next).depth + let depth = nfa.state(next).depth - nfa.state(next).get_longest_match_len().unwrap() + 1; Some(depth) @@ -1173,7 +1159,8 @@ impl fmt::Debug for NFA { }); writeln!(f, "{:04}: {}", id, trans.join(", "))?; - let matches: Vec = s.matches + let matches: Vec = s + .matches .iter() .map(|&(pattern_id, _)| pattern_id.to_string()) .collect(); diff --git a/src/prefilter.rs b/src/prefilter.rs index d3e830d..4d47b94 100644 --- a/src/prefilter.rs +++ b/src/prefilter.rs @@ -6,7 +6,9 @@ use memchr::{memchr, memchr2, memchr3}; /// A prefilter describes the behavior of fast literal scanners for quickly /// skipping past bytes in the haystack that we know cannot possibly /// participate in a match. -pub trait Prefilter: Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug { +pub trait Prefilter: + Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug +{ /// Returns the next possible match candidate. This may yield false /// positives, so callers must "confirm" a match starting at the position /// returned. This, however, must never produce false negatives. That is, @@ -165,24 +167,16 @@ impl StartBytesBuilder { } match len { 0 => None, - 1 => { - Some(PrefilterObj::new(StartBytesOne { - byte1: bytes[0], - })) - } - 2 => { - Some(PrefilterObj::new(StartBytesTwo { - byte1: bytes[0], - byte2: bytes[1], - })) - } - 3 => { - Some(PrefilterObj::new(StartBytesThree { - byte1: bytes[0], - byte2: bytes[1], - byte3: bytes[2], - })) - } + 1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })), + 2 => Some(PrefilterObj::new(StartBytesTwo { + byte1: bytes[0], + byte2: bytes[1], + })), + 3 => Some(PrefilterObj::new(StartBytesThree { + byte1: bytes[0], + byte2: bytes[1], + byte3: bytes[2], + })), _ => unreachable!(), } } @@ -204,8 +198,7 @@ pub struct StartBytesOne { impl Prefilter for StartBytesOne { fn next_candidate(&self, haystack: &[u8], at: usize) -> Option { - memchr(self.byte1, &haystack[at..]) - .map(|i| at + i) + memchr(self.byte1, &haystack[at..]).map(|i| at + i) } fn clone_prefilter(&self) -> Box { @@ -222,8 +215,7 @@ pub struct StartBytesTwo { impl Prefilter for StartBytesTwo { fn next_candidate(&self, haystack: &[u8], at: usize) -> Option { - memchr2(self.byte1, self.byte2, &haystack[at..]) - .map(|i| at + i) + memchr2(self.byte1, self.byte2, &haystack[at..]).map(|i| at + i) } fn clone_prefilter(&self) -> Box { diff --git a/src/state_id.rs b/src/state_id.rs index c7011f1..8ee58c6 100644 --- a/src/state_id.rs +++ b/src/state_id.rs @@ -49,7 +49,7 @@ pub fn dead_id() -> S { mod private { /// Sealed stops crates other than aho-corasick from implementing any /// traits that use it. - pub trait Sealed{} + pub trait Sealed {} impl Sealed for u8 {} impl Sealed for u16 {} impl Sealed for u32 {} @@ -82,7 +82,14 @@ mod private { /// bounds checks are explicitly elided for performance reasons. pub unsafe trait StateID: private::Sealed - + Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord + + Clone + + Copy + + Debug + + Eq + + Hash + + PartialEq + + PartialOrd + + Ord { /// Convert from a `usize` to this implementation's representation. /// @@ -110,57 +117,87 @@ pub unsafe trait StateID: unsafe impl StateID for usize { #[inline] - fn from_usize(n: usize) -> usize { n } + fn from_usize(n: usize) -> usize { + n + } #[inline] - fn to_usize(self) -> usize { self } + fn to_usize(self) -> usize { + self + } #[inline] - fn max_id() -> usize { ::std::usize::MAX } + fn max_id() -> usize { + ::std::usize::MAX + } } unsafe impl StateID for u8 { #[inline] - fn from_usize(n: usize) -> u8 { n as u8 } + fn from_usize(n: usize) -> u8 { + n as u8 + } #[inline] - fn to_usize(self) -> usize { self as usize } + fn to_usize(self) -> usize { + self as usize + } #[inline] - fn max_id() -> usize { ::std::u8::MAX as usize } + fn max_id() -> usize { + ::std::u8::MAX as usize + } } unsafe impl StateID for u16 { #[inline] - fn from_usize(n: usize) -> u16 { n as u16 } + fn from_usize(n: usize) -> u16 { + n as u16 + } #[inline] - fn to_usize(self) -> usize { self as usize } + fn to_usize(self) -> usize { + self as usize + } #[inline] - fn max_id() -> usize { ::std::u16::MAX as usize } + fn max_id() -> usize { + ::std::u16::MAX as usize + } } #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] unsafe impl StateID for u32 { #[inline] - fn from_usize(n: usize) -> u32 { n as u32 } + fn from_usize(n: usize) -> u32 { + n as u32 + } #[inline] - fn to_usize(self) -> usize { self as usize } + fn to_usize(self) -> usize { + self as usize + } #[inline] - fn max_id() -> usize { ::std::u32::MAX as usize } + fn max_id() -> usize { + ::std::u32::MAX as usize + } } #[cfg(target_pointer_width = "64")] unsafe impl StateID for u64 { #[inline] - fn from_usize(n: usize) -> u64 { n as u64 } + fn from_usize(n: usize) -> u64 { + n as u64 + } #[inline] - fn to_usize(self) -> usize { self as usize } + fn to_usize(self) -> usize { + self as usize + } #[inline] - fn max_id() -> usize { ::std::u64::MAX as usize } + fn max_id() -> usize { + ::std::u64::MAX as usize + } } diff --git a/src/tests.rs b/src/tests.rs index 790c001..c4a86b4 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -32,7 +32,7 @@ macro_rules! t { haystack: $haystack, matches: $matches, } - } + }; } /// A collection of test groups. @@ -43,24 +43,20 @@ type TestCollection = &'static [&'static [SearchTest]]; // but each collection should have some tests that no other collection has. /// Tests for Aho-Corasick's standard non-overlapping match semantics. -const AC_STANDARD_NON_OVERLAPPING: TestCollection = &[ - BASICS, NON_OVERLAPPING, STANDARD, REGRESSION, -]; +const AC_STANDARD_NON_OVERLAPPING: TestCollection = + &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION]; /// Tests for Aho-Corasick's standard overlapping match semantics. -const AC_STANDARD_OVERLAPPING: TestCollection = &[ - BASICS, OVERLAPPING, REGRESSION, -]; +const AC_STANDARD_OVERLAPPING: TestCollection = + &[BASICS, OVERLAPPING, REGRESSION]; /// Tests for Aho-Corasick's leftmost-first match semantics. -const AC_LEFTMOST_FIRST: TestCollection = &[ - BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION, -]; +const AC_LEFTMOST_FIRST: TestCollection = + &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION]; /// Tests for Aho-Corasick's leftmost-longest match semantics. -const AC_LEFTMOST_LONGEST: TestCollection = &[ - BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, -]; +const AC_LEFTMOST_LONGEST: TestCollection = + &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION]; // Now define the individual tests that make up the collections above. @@ -77,70 +73,51 @@ const BASICS: &'static [SearchTest] = &[ t!(basic050, &["a"], "bba", &[(0, 2, 3)]), t!(basic060, &["a"], "bbb", &[]), t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]), - t!(basic100, &["aa"], "", &[]), t!(basic110, &["aa"], "aa", &[(0, 0, 2)]), t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]), t!(basic130, &["aa"], "abbab", &[]), t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]), - t!(basic200, &["abc"], "abc", &[(0, 0, 3)]), t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]), t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]), - t!(basic300, &["a", "b"], "", &[]), t!(basic310, &["a", "b"], "z", &[]), t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]), t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]), - t!(basic340, &["a", "b"], "abba", &[ - (0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4), - ]), - t!(basic350, &["b", "a"], "abba", &[ - (1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4), - ]), - t!(nover360, &["abc", "bc"], "xbc", &[ - (1, 1, 3), - ]), - + t!( + basic340, + &["a", "b"], + "abba", + &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] + ), + t!( + basic350, + &["b", "a"], + "abba", + &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] + ), + t!(nover360, &["abc", "bc"], "xbc", &[(1, 1, 3),]), t!(basic400, &["foo", "bar"], "", &[]), - t!(basic410, &["foo", "bar"], "foobar", &[ - (0, 0, 3), (1, 3, 6), - ]), - t!(basic420, &["foo", "bar"], "barfoo", &[ - (1, 0, 3), (0, 3, 6), - ]), - t!(basic430, &["foo", "bar"], "foofoo", &[ - (0, 0, 3), (0, 3, 6), - ]), - t!(basic440, &["foo", "bar"], "barbar", &[ - (1, 0, 3), (1, 3, 6), - ]), - t!(basic450, &["foo", "bar"], "bafofoo", &[ - (0, 4, 7), - ]), - t!(basic460, &["bar", "foo"], "bafofoo", &[ - (1, 4, 7), - ]), - t!(basic470, &["foo", "bar"], "fobabar", &[ - (1, 4, 7), - ]), - t!(basic480, &["bar", "foo"], "fobabar", &[ - (0, 4, 7), - ]), - + t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]), + t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]), + t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]), + t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]), + t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]), + t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]), + t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]), + t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]), t!(basic600, &[""], "", &[(0, 0, 0)]), t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]), t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), - - t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[ - (0, 0, 7), - ]), - t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[ - (1, 1, 10), - ]), - t!(basic720, &["yabcdef", "bcdeyabc", "abcdezghi"], "yabcdezghi", &[ - (2, 1, 10), - ]), + t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]), + t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]), + t!( + basic720, + &["yabcdef", "bcdeyabc", "abcdezghi"], + "yabcdezghi", + &[(2, 1, 10),] + ), ]; /// Tests for non-overlapping standard match semantics. @@ -155,25 +132,17 @@ const STANDARD: &'static [SearchTest] = &[ t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]), t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]), t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]), - - t!(standard400, &["abcd", "bcd", "cd", "b"], "abcd", &[ - (3, 1, 2), (2, 2, 4), - ]), - t!(standard410, &["", "a"], "a", &[ - (0, 0, 0), (0, 1, 1), - ]), - t!(standard420, &["", "a"], "aa", &[ - (0, 0, 0), (0, 1, 1), (0, 2, 2), - ]), - t!(standard430, &["", "a", ""], "a", &[ - (0, 0, 0), (0, 1, 1), - ]), - t!(standard440, &["a", "", ""], "a", &[ - (1, 0, 0), (1, 1, 1), - ]), - t!(standard450, &["", "", "a"], "a", &[ - (0, 0, 0), (0, 1, 1), - ]), + t!( + standard400, + &["abcd", "bcd", "cd", "b"], + "abcd", + &[(3, 1, 2), (2, 2, 4),] + ), + t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), + t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]), + t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]), + t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]), + t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), ]; /// Tests for non-overlapping leftmost match semantics. These should pass for @@ -187,31 +156,48 @@ const LEFTMOST: &'static [SearchTest] = &[ t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]), - t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]), t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]), t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]), t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]), t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), - t!(leftmost360, &["abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[ - (2, 0, 8), - ]), - t!(leftmost370, &["abcdefghi", "cde", "hz", "abcdefgh"], "abcdefghz", &[ - (3, 0, 8), - ]), - t!(leftmost380, &["abcdefghi", "hz", "abcdefgh", "a"], "abcdefghz", &[ - (2, 0, 8), - ]), - t!(leftmost390, &["b", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[ - (3, 0, 8), - ]), - t!(leftmost400, &["h", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[ - (3, 0, 8), - ]), - t!(leftmost410, &["z", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[ - (3, 0, 8), (0, 8, 9), - ]), + t!( + leftmost360, + &["abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(2, 0, 8),] + ), + t!( + leftmost370, + &["abcdefghi", "cde", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), + t!( + leftmost380, + &["abcdefghi", "hz", "abcdefgh", "a"], + "abcdefghz", + &[(2, 0, 8),] + ), + t!( + leftmost390, + &["b", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), + t!( + leftmost400, + &["h", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), + t!( + leftmost410, + &["z", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8), (0, 8, 9),] + ), ]; /// Tests for non-overlapping leftmost-first match semantics. These tests @@ -220,29 +206,27 @@ const LEFTMOST: &'static [SearchTest] = &[ const LEFTMOST_FIRST: &'static [SearchTest] = &[ t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), - t!(leftfirst011, &["", "a", ""], "a", &[ - (0, 0, 0), (0, 1, 1), - ]), - t!(leftfirst012, &["a", "", ""], "a", &[ - (0, 0, 1), (1, 1, 1), - ]), - t!(leftfirst013, &["", "", "a"], "a", &[ - (0, 0, 0), (0, 1, 1), - ]), + t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]), + t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), + t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]), - t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]), t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]), - t!(leftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[ - (1, 1, 2), (3, 2, 4), - ]), - t!(leftfirst320, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[ - (0, 0, 1), (2, 7, 9), - ]), + t!( + leftfirst310, + &["abcd", "b", "bce", "ce"], + "abce", + &[(1, 1, 2), (3, 2, 4),] + ), + t!( + leftfirst320, + &["a", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(0, 0, 1), (2, 7, 9),] + ), t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]), ]; @@ -251,40 +235,27 @@ const LEFTMOST_FIRST: &'static [SearchTest] = &[ /// generally fail under leftmost-first semantics. const LEFTMOST_LONGEST: &'static [SearchTest] = &[ t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), - t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[ - (0, 0, 4), - ]), - t!(leftlong020, &["", "a"], "a", &[ - (1, 0, 1), (0, 1, 1), - ]), - t!(leftlong021, &["", "a", ""], "a", &[ - (1, 0, 1), (0, 1, 1), - ]), - t!(leftlong022, &["a", "", ""], "a", &[ - (0, 0, 1), (1, 1, 1), - ]), - t!(leftlong023, &["", "", "a"], "a", &[ - (2, 0, 1), (0, 1, 1), - ]), - t!(leftlong030, &["", "a"], "aa", &[ - (1, 0, 1), (1, 1, 2), (0, 2, 2), - ]), + t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), + t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]), + t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]), + t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), + t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]), + t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]), t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), - t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]), t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]), - t!(leftlong310, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[ - (3, 0, 8), - ]), + t!( + leftlong310, + &["a", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), - t!(leftlong330, &["abcd", "b", "ce"], "abce", &[ - (1, 1, 2), (2, 2, 4), - ]), + t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]), t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]), ]; @@ -293,30 +264,18 @@ const LEFTMOST_LONGEST: &'static [SearchTest] = &[ /// Generally these tests shouldn't pass when using overlapping semantics. /// These should pass for both standard and leftmost match semantics. const NON_OVERLAPPING: &'static [SearchTest] = &[ - t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[ - (0, 0, 4), - ]), - t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[ - (2, 0, 4), - ]), - t!(nover030, &["abc", "bc"], "zazabcz", &[ - (0, 3, 6), - ]), - - t!(nover100, &["ab", "ba"], "abababa", &[ - (0, 0, 2), (0, 2, 4), (0, 4, 6), - ]), - - t!(nover200, &["foo", "foo"], "foobarfoo", &[ - (0, 0, 3), (0, 6, 9), - ]), - - t!(nover300, &["", ""], "", &[ - (0, 0, 0), - ]), - t!(nover310, &["", ""], "a", &[ - (0, 0, 0), (0, 1, 1), - ]), + t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), + t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), + t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]), + t!( + nover100, + &["ab", "ba"], + "abababa", + &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] + ), + t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]), + t!(nover300, &["", ""], "", &[(0, 0, 0),]), + t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]), ]; /// Tests for overlapping match semantics. @@ -324,51 +283,75 @@ const NON_OVERLAPPING: &'static [SearchTest] = &[ /// This only supports standard match semantics, since leftmost-{first,longest} /// do not support overlapping matches. const OVERLAPPING: &'static [SearchTest] = &[ - t!(over000, &["abcd", "bcd", "cd", "b"], "abcd", &[ - (3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4), - ]), - t!(over010, &["bcd", "cd", "b", "abcd"], "abcd", &[ - (2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4), - ]), - t!(over020, &["abcd", "bcd", "cd"], "abcd", &[ - (0, 0, 4), (1, 1, 4), (2, 2, 4), - ]), - t!(over030, &["bcd", "abcd", "cd"], "abcd", &[ - (1, 0, 4), (0, 1, 4), (2, 2, 4), - ]), - t!(over040, &["bcd", "cd", "abcd"], "abcd", &[ - (2, 0, 4), (0, 1, 4), (1, 2, 4), - ]), - t!(over050, &["abc", "bc"], "zazabcz", &[ - (0, 3, 6), (1, 4, 6), - ]), - - t!(over100, &["ab", "ba"], "abababa", &[ - (0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7), - ]), - - t!(over200, &["foo", "foo"], "foobarfoo", &[ - (0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9), - ]), - - t!(over300, &["", ""], "", &[ - (0, 0, 0), (1, 0, 0), - ]), - t!(over310, &["", ""], "a", &[ - (0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1), - ]), - t!(over320, &["", "a"], "a", &[ - (0, 0, 0), (1, 0, 1), (0, 1, 1), - ]), - t!(over330, &["", "a", ""], "a", &[ - (0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1), - ]), - t!(over340, &["a", "", ""], "a", &[ - (1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1), - ]), - t!(over350, &["", "", "a"], "a", &[ - (0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1), - ]), + t!( + over000, + &["abcd", "bcd", "cd", "b"], + "abcd", + &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),] + ), + t!( + over010, + &["bcd", "cd", "b", "abcd"], + "abcd", + &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),] + ), + t!( + over020, + &["abcd", "bcd", "cd"], + "abcd", + &[(0, 0, 4), (1, 1, 4), (2, 2, 4),] + ), + t!( + over030, + &["bcd", "abcd", "cd"], + "abcd", + &[(1, 0, 4), (0, 1, 4), (2, 2, 4),] + ), + t!( + over040, + &["bcd", "cd", "abcd"], + "abcd", + &[(2, 0, 4), (0, 1, 4), (1, 2, 4),] + ), + t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]), + t!( + over100, + &["ab", "ba"], + "abababa", + &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),] + ), + t!( + over200, + &["foo", "foo"], + "foobarfoo", + &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),] + ), + t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]), + t!( + over310, + &["", ""], + "a", + &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),] + ), + t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]), + t!( + over330, + &["", "a", ""], + "a", + &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),] + ), + t!( + over340, + &["a", "", ""], + "a", + &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),] + ), + t!( + over350, + &["", "", "a"], + "a", + &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),] + ), ]; /// Regression tests that are applied to all Aho-Corasick combinations. @@ -376,24 +359,32 @@ const OVERLAPPING: &'static [SearchTest] = &[ /// If regression tests are needed for specific match semantics, then add them /// to the appropriate group above. const REGRESSION: &'static [SearchTest] = &[ - t!(regression010, &["inf", "ind"], "infind", &[ - (0, 0, 3), (1, 3, 6), - ]), - t!(regression020, &["ind", "inf"], "infind", &[ - (1, 0, 3), (0, 3, 6), - ]), - t!(regression030, &["libcore/", "libstd/"], "libcore/char/methods.rs", &[ - (0, 0, 8), - ]), - t!(regression040, &["libstd/", "libcore/"], "libcore/char/methods.rs", &[ - (1, 0, 8), - ]), - t!(regression050, &["\x00\x00\x01", "\x00\x00\x00"], "\x00\x00\x00", &[ - (1, 0, 3), - ]), - t!(regression060, &["\x00\x00\x00", "\x00\x00\x01"], "\x00\x00\x00", &[ - (0, 0, 3), - ]), + t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]), + t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]), + t!( + regression030, + &["libcore/", "libstd/"], + "libcore/char/methods.rs", + &[(0, 0, 8),] + ), + t!( + regression040, + &["libstd/", "libcore/"], + "libcore/char/methods.rs", + &[(1, 0, 8),] + ), + t!( + regression050, + &["\x00\x00\x01", "\x00\x00\x00"], + "\x00\x00\x00", + &[(1, 0, 3),] + ), + t!( + regression060, + &["\x00\x00\x00", "\x00\x00\x01"], + "\x00\x00\x00", + &[(0, 0, 3),] + ), ]; // Now define a test for each combination of things above that we want to run. @@ -424,10 +415,8 @@ macro_rules! testconfig { #[test] fn $name() { run_search_tests($collection, |test| { - let buf = io::BufReader::with_capacity( - 1, - test.haystack.as_bytes(), - ); + let buf = + io::BufReader::with_capacity(1, test.haystack.as_bytes()); let mut builder = AhoCorasickBuilder::new(); $with(&mut builder); builder @@ -461,46 +450,86 @@ macro_rules! testcombo { use super::*; testconfig!(nfa_default, $collection, $kind, |_| ()); - testconfig!(nfa_no_prefilter, $collection, $kind, + testconfig!( + nfa_no_prefilter, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.prefilter(false); - }); - testconfig!(nfa_all_sparse, $collection, $kind, + } + ); + testconfig!( + nfa_all_sparse, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dense_depth(0); - }); - testconfig!(nfa_all_dense, $collection, $kind, + } + ); + testconfig!( + nfa_all_dense, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dense_depth(usize::MAX); - }); - testconfig!(dfa_default, $collection, $kind, + } + ); + testconfig!( + dfa_default, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dfa(true); - }); - testconfig!(dfa_no_prefilter, $collection, $kind, + } + ); + testconfig!( + dfa_no_prefilter, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dfa(true).prefilter(false); - }); - testconfig!(dfa_all_sparse, $collection, $kind, + } + ); + testconfig!( + dfa_all_sparse, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(0); - }); - testconfig!(dfa_all_dense, $collection, $kind, + } + ); + testconfig!( + dfa_all_dense, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(usize::MAX); - }); - testconfig!(dfa_no_byte_class, $collection, $kind, + } + ); + testconfig!( + dfa_no_byte_class, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dfa(true).byte_classes(false); - }); - testconfig!(dfa_no_premultiply, $collection, $kind, + } + ); + testconfig!( + dfa_no_premultiply, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dfa(true).premultiply(false); - }); - testconfig!(dfa_no_byte_class_no_premultiply, $collection, $kind, + } + ); + testconfig!( + dfa_no_byte_class_no_premultiply, + $collection, + $kind, |b: &mut AhoCorasickBuilder| { b.dfa(true).byte_classes(false).premultiply(false); - }); + } + ); } }; } @@ -509,7 +538,9 @@ macro_rules! testcombo { testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest); testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst); testcombo!( - search_standard_nonoverlapping, AC_STANDARD_NON_OVERLAPPING, Standard + search_standard_nonoverlapping, + AC_STANDARD_NON_OVERLAPPING, + Standard ); // Write out the overlapping combo by hand since there is only one of them. @@ -525,49 +556,63 @@ testconfig!( search_standard_overlapping_nfa_all_sparse, AC_STANDARD_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dense_depth(0); } + |b: &mut AhoCorasickBuilder| { + b.dense_depth(0); + } ); testconfig!( overlapping, search_standard_overlapping_nfa_all_dense, AC_STANDARD_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dense_depth(usize::MAX); } + |b: &mut AhoCorasickBuilder| { + b.dense_depth(usize::MAX); + } ); testconfig!( overlapping, search_standard_overlapping_dfa_default, AC_STANDARD_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dfa(true); } + |b: &mut AhoCorasickBuilder| { + b.dfa(true); + } ); testconfig!( overlapping, search_standard_overlapping_dfa_all_sparse, AC_STANDARD_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(0); } + |b: &mut AhoCorasickBuilder| { + b.dfa(true).dense_depth(0); + } ); testconfig!( overlapping, search_standard_overlapping_dfa_all_dense, AC_STANDARD_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(usize::MAX); } + |b: &mut AhoCorasickBuilder| { + b.dfa(true).dense_depth(usize::MAX); + } ); testconfig!( overlapping, search_standard_overlapping_dfa_no_byte_class, AC_STANDARD_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dfa(true).byte_classes(false); } + |b: &mut AhoCorasickBuilder| { + b.dfa(true).byte_classes(false); + } ); testconfig!( overlapping, search_standard_overlapping_dfa_no_premultiply, AC_STANDARD_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dfa(true).premultiply(false); } + |b: &mut AhoCorasickBuilder| { + b.dfa(true).premultiply(false); + } ); testconfig!( overlapping, @@ -594,7 +639,9 @@ testconfig!( search_standard_stream_dfa_default, AC_STANDARD_NON_OVERLAPPING, Standard, - |b: &mut AhoCorasickBuilder| { b.dfa(true); } + |b: &mut AhoCorasickBuilder| { + b.dfa(true); + } ); #[test] @@ -669,8 +716,8 @@ fn state_id_too_small() { } } } - let result = AhoCorasickBuilder::new() - .build_with_size::(&patterns); + let result = + AhoCorasickBuilder::new().build_with_size::(&patterns); assert!(result.is_err()); } @@ -680,7 +727,8 @@ fn run_search_tests Vec>( ) { let get_match_triples = |matches: Vec| -> Vec<(usize, usize, usize)> { - matches.into_iter() + matches + .into_iter() .map(|m| (m.pattern(), m.start(), m.end())) .collect() };