mirror of
https://gitee.com/openharmony/third_party_rust_aho-corasick
synced 2024-11-26 17:12:09 +00:00
style: switch to rustfmt
And enable a CI check for it.
This commit is contained in:
parent
fa956e6062
commit
bb89108f8d
@ -58,7 +58,7 @@ struct Args {
|
||||
|
||||
impl Args {
|
||||
fn parse() -> Result<Args> {
|
||||
use clap::{App, Arg, crate_authors, crate_version};
|
||||
use clap::{crate_authors, crate_version, App, Arg};
|
||||
|
||||
let parsed = App::new("Search using aho-corasick")
|
||||
.author(crate_authors!())
|
||||
@ -66,18 +66,26 @@ impl Args {
|
||||
.max_term_width(100)
|
||||
.arg(Arg::with_name("dictionary").required(true))
|
||||
.arg(Arg::with_name("haystack").required(true))
|
||||
.arg(Arg::with_name("kind")
|
||||
.long("kind")
|
||||
.possible_values(&[
|
||||
"standard", "leftmost-first", "leftmost-longest",
|
||||
])
|
||||
.default_value("standard"))
|
||||
.arg(Arg::with_name("ascii-case-insensitive")
|
||||
.long("ascii-case-insensitive")
|
||||
.short("i"))
|
||||
.arg(Arg::with_name("dense-depth")
|
||||
.long("dense-depth")
|
||||
.default_value("2"))
|
||||
.arg(
|
||||
Arg::with_name("kind")
|
||||
.long("kind")
|
||||
.possible_values(&[
|
||||
"standard",
|
||||
"leftmost-first",
|
||||
"leftmost-longest",
|
||||
])
|
||||
.default_value("standard"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("ascii-case-insensitive")
|
||||
.long("ascii-case-insensitive")
|
||||
.short("i"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("dense-depth")
|
||||
.long("dense-depth")
|
||||
.default_value("2"),
|
||||
)
|
||||
.arg(Arg::with_name("dfa").long("dfa").short("d"))
|
||||
.arg(Arg::with_name("prefilter").long("prefilter").short("f"))
|
||||
.arg(Arg::with_name("classes").long("classes").short("c"))
|
||||
@ -85,12 +93,9 @@ impl Args {
|
||||
.arg(Arg::with_name("no-search").long("no-search"))
|
||||
.get_matches();
|
||||
|
||||
let dictionary = PathBuf::from(
|
||||
parsed.value_of_os("dictionary").unwrap()
|
||||
);
|
||||
let haystack = PathBuf::from(
|
||||
parsed.value_of_os("haystack").unwrap()
|
||||
);
|
||||
let dictionary =
|
||||
PathBuf::from(parsed.value_of_os("dictionary").unwrap());
|
||||
let haystack = PathBuf::from(parsed.value_of_os("haystack").unwrap());
|
||||
let match_kind = match parsed.value_of("kind").unwrap() {
|
||||
"standard" => MatchKind::Standard,
|
||||
"leftmost-first" => MatchKind::LeftmostFirst,
|
||||
@ -100,7 +105,10 @@ impl Args {
|
||||
let dense_depth = parsed.value_of("dense-depth").unwrap().parse()?;
|
||||
|
||||
Ok(Args {
|
||||
dictionary, haystack, match_kind, dense_depth,
|
||||
dictionary,
|
||||
haystack,
|
||||
match_kind,
|
||||
dense_depth,
|
||||
ascii_casei: parsed.is_present("ascii-case-insensitive"),
|
||||
dfa: parsed.is_present("dfa"),
|
||||
prefilter: parsed.is_present("prefilter"),
|
||||
|
@ -30,10 +30,8 @@ fn define_aho_corasick<B: AsRef<[u8]>>(
|
||||
count: usize,
|
||||
patterns: Vec<B>,
|
||||
) {
|
||||
let patterns: Vec<Vec<u8>> = patterns
|
||||
.into_iter()
|
||||
.map(|b| b.as_ref().to_vec())
|
||||
.collect();
|
||||
let patterns: Vec<Vec<u8>> =
|
||||
patterns.into_iter().map(|b| b.as_ref().to_vec()).collect();
|
||||
|
||||
let haystack = corpus.to_vec();
|
||||
let name = format!("nfa/{}", bench_name);
|
||||
@ -44,9 +42,7 @@ fn define_aho_corasick<B: AsRef<[u8]>>(
|
||||
|
||||
let haystack = corpus.to_vec();
|
||||
let name = format!("dfa/{}", bench_name);
|
||||
let aut = AhoCorasickBuilder::new()
|
||||
.dfa(true)
|
||||
.build(patterns.clone());
|
||||
let aut = AhoCorasickBuilder::new().dfa(true).build(patterns.clone());
|
||||
define(c, group_name, &name, corpus, move |b| {
|
||||
b.iter(|| assert_eq!(count, aut.find_iter(&haystack).count()));
|
||||
});
|
||||
@ -63,13 +59,12 @@ fn define_aho_corasick_dfa<B, F>(
|
||||
count: usize,
|
||||
patterns: Vec<B>,
|
||||
find_count: F,
|
||||
) where B: AsRef<[u8]>,
|
||||
F: 'static + Clone + Fn(&AhoCorasick, &[u8]) -> usize
|
||||
) where
|
||||
B: AsRef<[u8]>,
|
||||
F: 'static + Clone + Fn(&AhoCorasick, &[u8]) -> usize,
|
||||
{
|
||||
let patterns: Vec<Vec<u8>> = patterns
|
||||
.into_iter()
|
||||
.map(|b| b.as_ref().to_vec())
|
||||
.collect();
|
||||
let patterns: Vec<Vec<u8>> =
|
||||
patterns.into_iter().map(|b| b.as_ref().to_vec()).collect();
|
||||
|
||||
let counter = find_count.clone();
|
||||
let haystack = corpus.to_vec();
|
||||
@ -134,7 +129,6 @@ fn define(
|
||||
corpus: &[u8],
|
||||
bench: impl FnMut(&mut Bencher) + 'static,
|
||||
) {
|
||||
|
||||
let tput = Throughput::Bytes(corpus.len() as u32);
|
||||
let benchmark = Benchmark::new(bench_name, bench)
|
||||
.throughput(tput)
|
||||
@ -152,7 +146,6 @@ fn define_long(
|
||||
corpus: &[u8],
|
||||
bench: impl FnMut(&mut Bencher) + 'static,
|
||||
) {
|
||||
|
||||
let tput = Throughput::Bytes(corpus.len() as u32);
|
||||
let benchmark = Benchmark::new(bench_name, bench)
|
||||
.throughput(tput)
|
||||
|
@ -1,7 +1,7 @@
|
||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
|
||||
use criterion::{Criterion, black_box};
|
||||
use criterion::{black_box, Criterion};
|
||||
|
||||
use input::{words_5000, words_15000};
|
||||
use input::{words_15000, words_5000};
|
||||
use {define, define_long};
|
||||
|
||||
/// Benchmarks that measure the performance of constructing an Aho-Corasick
|
||||
@ -10,15 +10,21 @@ pub fn all(c: &mut Criterion) {
|
||||
define_build::<String>(c, false, "empty", vec![]);
|
||||
define_build(c, false, "onebyte", vec!["a"]);
|
||||
define_build(c, false, "twobytes", vec!["a", "b"]);
|
||||
define_build(c, false, "many-short", vec![
|
||||
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", "BaK",
|
||||
"HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh",
|
||||
"JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat",
|
||||
"aDL", "aDl", "adL", "adl", "bAK", "bAk", "bAK", "baK", "bak", "baK",
|
||||
"hOL", "hOl", "hoL", "hol", "iRE", "iRe", "irE", "ire", "jOH", "jOh",
|
||||
"joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt", "waT", "wat",
|
||||
"ſHE", "ſHe", "ſhE", "ſhe",
|
||||
]);
|
||||
define_build(
|
||||
c,
|
||||
false,
|
||||
"many-short",
|
||||
vec![
|
||||
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak",
|
||||
"BaK", "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire",
|
||||
"JOH", "JOh", "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT",
|
||||
"WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK", "bAk",
|
||||
"bAK", "baK", "bak", "baK", "hOL", "hOl", "hoL", "hol", "iRE",
|
||||
"iRe", "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe",
|
||||
"shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE",
|
||||
"ſhe",
|
||||
],
|
||||
);
|
||||
define_build(c, true, "5000words", words_5000());
|
||||
define_build(c, true, "15000words", words_15000());
|
||||
}
|
||||
@ -29,10 +35,8 @@ fn define_build<B: AsRef<[u8]>>(
|
||||
bench_name: &str,
|
||||
patterns: Vec<B>,
|
||||
) {
|
||||
let patterns: Vec<Vec<u8>> = patterns
|
||||
.into_iter()
|
||||
.map(|b| b.as_ref().to_vec())
|
||||
.collect();
|
||||
let patterns: Vec<Vec<u8>> =
|
||||
patterns.into_iter().map(|b| b.as_ref().to_vec()).collect();
|
||||
|
||||
let pats = patterns.clone();
|
||||
let name = format!("nfa/{}", bench_name);
|
||||
|
@ -26,30 +26,52 @@ fn memchr_optimizations(c: &mut Criterion) {
|
||||
define_random(c, "twobytes/nomatch", 0, vec!["\x00", "\x01"]);
|
||||
define_random(c, "threebytes/match", 352, vec!["a", "\x00", "\x01"]);
|
||||
define_random(c, "threebytes/nomatch", 0, vec!["\x00", "\x01", "\x02"]);
|
||||
define_random(c, "fourbytes/match", 352, vec![
|
||||
"a", "\x00", "\x01", "\x02",
|
||||
]);
|
||||
define_random(c, "fourbytes/nomatch", 0, vec![
|
||||
"\x00", "\x01", "\x02", "\x03",
|
||||
]);
|
||||
define_random(c, "fivebytes/match", 352, vec![
|
||||
"a", "\x00", "\x01", "\x02", "\x03",
|
||||
]);
|
||||
define_random(c, "fivebytes/nomatch", 0, vec![
|
||||
"\x00", "\x01", "\x02", "\x03", "\x04",
|
||||
]);
|
||||
define_random(
|
||||
c,
|
||||
"fourbytes/match",
|
||||
352,
|
||||
vec!["a", "\x00", "\x01", "\x02"],
|
||||
);
|
||||
define_random(
|
||||
c,
|
||||
"fourbytes/nomatch",
|
||||
0,
|
||||
vec!["\x00", "\x01", "\x02", "\x03"],
|
||||
);
|
||||
define_random(
|
||||
c,
|
||||
"fivebytes/match",
|
||||
352,
|
||||
vec!["a", "\x00", "\x01", "\x02", "\x03"],
|
||||
);
|
||||
define_random(
|
||||
c,
|
||||
"fivebytes/nomatch",
|
||||
0,
|
||||
vec!["\x00", "\x01", "\x02", "\x03", "\x04"],
|
||||
);
|
||||
}
|
||||
|
||||
/// Some miscellaneous benchmarks on random data.
|
||||
fn misc(c: &mut Criterion) {
|
||||
define_random(c, "ten-one-prefix", 0, vec![
|
||||
"zacdef", "zbcdef", "zccdef", "zdcdef", "zecdef",
|
||||
"zfcdef", "zgcdef", "zhcdef", "zicdef", "zjcdef",
|
||||
]);
|
||||
define_random(c, "ten-diff-prefix", 0, vec![
|
||||
"abcdef", "bcdefg", "cdefgh", "defghi", "efghij",
|
||||
"fghijk", "ghijkl", "hijklm", "ijklmn", "jklmno",
|
||||
]);
|
||||
define_random(
|
||||
c,
|
||||
"ten-one-prefix",
|
||||
0,
|
||||
vec![
|
||||
"zacdef", "zbcdef", "zccdef", "zdcdef", "zecdef", "zfcdef",
|
||||
"zgcdef", "zhcdef", "zicdef", "zjcdef",
|
||||
],
|
||||
);
|
||||
define_random(
|
||||
c,
|
||||
"ten-diff-prefix",
|
||||
0,
|
||||
vec![
|
||||
"abcdef", "bcdefg", "cdefgh", "defghi", "efghij", "fghijk",
|
||||
"ghijkl", "hijklm", "ijklmn", "jklmno",
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
/// Various benchmarks using a large pattern set.
|
||||
@ -60,23 +82,47 @@ fn many_patterns(c: &mut Criterion) {
|
||||
|
||||
let group = "random10x/standard";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, Standard, 0, words_5000(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
Standard,
|
||||
0,
|
||||
words_5000(),
|
||||
|ac, haystack| ac.find_iter(haystack).count(),
|
||||
);
|
||||
let group = "random10x/leftmost-first";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, LeftmostFirst, 0, words_5000(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
LeftmostFirst,
|
||||
0,
|
||||
words_5000(),
|
||||
|ac, haystack| ac.find_iter(haystack).count(),
|
||||
);
|
||||
let group = "random10x/leftmost-longest";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, LeftmostLongest, 0, words_5000(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
LeftmostLongest,
|
||||
0,
|
||||
words_5000(),
|
||||
|ac, haystack| ac.find_iter(haystack).count(),
|
||||
);
|
||||
|
||||
let group = "random10x/overlapping";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, Standard, 0, words_5000(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
Standard,
|
||||
0,
|
||||
words_5000(),
|
||||
|ac, haystack| ac.find_overlapping_iter(haystack).count(),
|
||||
);
|
||||
|
||||
@ -84,23 +130,47 @@ fn many_patterns(c: &mut Criterion) {
|
||||
|
||||
let group = "random10x/standard";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, Standard, 0, words_100(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
Standard,
|
||||
0,
|
||||
words_100(),
|
||||
|ac, haystack| ac.find_iter(haystack).count(),
|
||||
);
|
||||
let group = "random10x/leftmost-first";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, LeftmostFirst, 0, words_100(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
LeftmostFirst,
|
||||
0,
|
||||
words_100(),
|
||||
|ac, haystack| ac.find_iter(haystack).count(),
|
||||
);
|
||||
let group = "random10x/leftmost-longest";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, LeftmostLongest, 0, words_100(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
LeftmostLongest,
|
||||
0,
|
||||
words_100(),
|
||||
|ac, haystack| ac.find_iter(haystack).count(),
|
||||
);
|
||||
|
||||
let group = "random10x/overlapping";
|
||||
define_aho_corasick_dfa(
|
||||
c, group, name, RANDOM10X, Standard, 0, words_100(),
|
||||
c,
|
||||
group,
|
||||
name,
|
||||
RANDOM10X,
|
||||
Standard,
|
||||
0,
|
||||
words_100(),
|
||||
|ac, haystack| ac.find_overlapping_iter(haystack).count(),
|
||||
);
|
||||
}
|
||||
@ -111,7 +181,5 @@ fn define_random<B: AsRef<[u8]>>(
|
||||
count: usize,
|
||||
patterns: Vec<B>,
|
||||
) {
|
||||
define_aho_corasick(
|
||||
c, "random", bench_name, RANDOM, count, patterns,
|
||||
);
|
||||
define_aho_corasick(c, "random", bench_name, RANDOM, count, patterns);
|
||||
}
|
||||
|
@ -34,6 +34,11 @@ fn define_same<B: AsRef<[u8]>>(
|
||||
) {
|
||||
let corpus = "z".repeat(10_000);
|
||||
define_aho_corasick(
|
||||
c, "same", bench_name, corpus.as_bytes(), count, patterns,
|
||||
c,
|
||||
"same",
|
||||
bench_name,
|
||||
corpus.as_bytes(),
|
||||
count,
|
||||
patterns,
|
||||
);
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use criterion::Criterion;
|
||||
|
||||
use input::*;
|
||||
use define_aho_corasick;
|
||||
use input::*;
|
||||
|
||||
/// These benchmarks test various words on natural language text.
|
||||
///
|
||||
@ -11,30 +11,53 @@ use define_aho_corasick;
|
||||
pub fn all(c: &mut Criterion) {
|
||||
define_sherlock(c, "name/alt1", 158, vec!["Sherlock", "Street"]);
|
||||
define_sherlock(c, "name/alt2", 558, vec!["Sherlock", "Holmes"]);
|
||||
define_sherlock(c, "name/alt3", 740, vec![
|
||||
"Sherlock", "Holmes", "Watson", "Irene", "Adler", "John", "Baker",
|
||||
]);
|
||||
define_sherlock(
|
||||
c,
|
||||
"name/alt3",
|
||||
740,
|
||||
vec![
|
||||
"Sherlock", "Holmes", "Watson", "Irene", "Adler", "John", "Baker",
|
||||
],
|
||||
);
|
||||
define_sherlock(c, "name/alt4", 582, vec!["Sher", "Hol"]);
|
||||
define_sherlock(c, "name/alt5", 639, vec!["Sherlock", "Holmes", "Watson"]);
|
||||
|
||||
define_sherlock(c, "name/nocase1", 1764, vec![
|
||||
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak", "BaK",
|
||||
"HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh",
|
||||
"JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat",
|
||||
"aDL", "aDl", "adL", "adl", "bAK", "bAk", "bAK", "baK", "bak", "baK",
|
||||
"hOL", "hOl", "hoL", "hol", "iRE", "iRe", "irE", "ire", "jOH", "jOh",
|
||||
"joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt", "waT", "wat",
|
||||
"ſHE", "ſHe", "ſhE", "ſhe",
|
||||
]);
|
||||
define_sherlock(c, "name/nocase2", 1307, vec![
|
||||
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "hOL", "hOl",
|
||||
"hoL", "hol", "sHE", "sHe", "shE", "she", "ſHE", "ſHe", "ſhE", "ſhe",
|
||||
]);
|
||||
define_sherlock(c, "name/nocase3", 1442, vec![
|
||||
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "WAT", "WAt",
|
||||
"WaT", "Wat", "hOL", "hOl", "hoL", "hol", "sHE", "sHe", "shE", "she",
|
||||
"wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE", "ſhe",
|
||||
]);
|
||||
define_sherlock(
|
||||
c,
|
||||
"name/nocase1",
|
||||
1764,
|
||||
vec![
|
||||
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BAK", "BaK", "Bak",
|
||||
"BaK", "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire",
|
||||
"JOH", "JOh", "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT",
|
||||
"WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK", "bAk",
|
||||
"bAK", "baK", "bak", "baK", "hOL", "hOl", "hoL", "hol", "iRE",
|
||||
"iRe", "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe",
|
||||
"shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE",
|
||||
"ſhe",
|
||||
],
|
||||
);
|
||||
define_sherlock(
|
||||
c,
|
||||
"name/nocase2",
|
||||
1307,
|
||||
vec![
|
||||
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "hOL",
|
||||
"hOl", "hoL", "hol", "sHE", "sHe", "shE", "she", "ſHE", "ſHe",
|
||||
"ſhE", "ſhe",
|
||||
],
|
||||
);
|
||||
define_sherlock(
|
||||
c,
|
||||
"name/nocase3",
|
||||
1442,
|
||||
vec![
|
||||
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "WAT",
|
||||
"WAt", "WaT", "Wat", "hOL", "hOl", "hoL", "hol", "sHE", "sHe",
|
||||
"shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE",
|
||||
"ſhe",
|
||||
],
|
||||
);
|
||||
|
||||
define_sherlock(c, "5000words", 567, words_5000());
|
||||
}
|
||||
|
@ -5,6 +5,10 @@ set -ex
|
||||
cargo build --verbose
|
||||
cargo doc --verbose
|
||||
cargo test --verbose
|
||||
if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then
|
||||
rustup component add rustfmt
|
||||
cargo fmt -- --check
|
||||
fi
|
||||
if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
|
||||
cargo bench --verbose --manifest-path bench/Cargo.toml -- --test
|
||||
fi
|
||||
|
2
rustfmt.toml
Normal file
2
rustfmt.toml
Normal file
@ -0,0 +1,2 @@
|
||||
max_width = 79
|
||||
use_small_heuristics = "max"
|
@ -115,11 +115,10 @@ impl AhoCorasick {
|
||||
/// ]);
|
||||
/// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern()));
|
||||
/// ```
|
||||
pub fn new<I, P>(
|
||||
patterns: I,
|
||||
) -> AhoCorasick
|
||||
where I: IntoIterator<Item=P>,
|
||||
P: AsRef<[u8]>
|
||||
pub fn new<I, P>(patterns: I) -> AhoCorasick
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
AhoCorasickBuilder::new().build(patterns)
|
||||
}
|
||||
@ -151,10 +150,9 @@ impl AhoCorasick {
|
||||
/// ]);
|
||||
/// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern()));
|
||||
/// ```
|
||||
pub fn new_auto_configured<B>(
|
||||
patterns: &[B],
|
||||
) -> AhoCorasick
|
||||
where B: AsRef<[u8]>
|
||||
pub fn new_auto_configured<B>(patterns: &[B]) -> AhoCorasick
|
||||
where
|
||||
B: AsRef<[u8]>,
|
||||
{
|
||||
AhoCorasickBuilder::new().auto_configure(patterns).build(patterns)
|
||||
}
|
||||
@ -213,7 +211,10 @@ impl<S: StateID> AhoCorasick<S> {
|
||||
let mut prestate = PrefilterState::new(self.max_pattern_len());
|
||||
let mut start = self.imp.start_state();
|
||||
self.imp.earliest_find_at(
|
||||
&mut prestate, haystack.as_ref(), 0, &mut start,
|
||||
&mut prestate,
|
||||
haystack.as_ref(),
|
||||
0,
|
||||
&mut start,
|
||||
)
|
||||
}
|
||||
|
||||
@ -425,15 +426,13 @@ impl<S: StateID> AhoCorasick<S> {
|
||||
/// let result = ac.replace_all(haystack, &["x", "y", "z"]);
|
||||
/// assert_eq!("x the z to the xage", result);
|
||||
/// ```
|
||||
pub fn replace_all<B>(
|
||||
&self,
|
||||
haystack: &str,
|
||||
replace_with: &[B],
|
||||
) -> String
|
||||
where B: AsRef<str>
|
||||
pub fn replace_all<B>(&self, haystack: &str, replace_with: &[B]) -> String
|
||||
where
|
||||
B: AsRef<str>,
|
||||
{
|
||||
assert_eq!(
|
||||
replace_with.len(), self.pattern_count(),
|
||||
replace_with.len(),
|
||||
self.pattern_count(),
|
||||
"replace_all requires a replacement for every pattern \
|
||||
in the automaton"
|
||||
);
|
||||
@ -479,10 +478,12 @@ impl<S: StateID> AhoCorasick<S> {
|
||||
haystack: &[u8],
|
||||
replace_with: &[B],
|
||||
) -> Vec<u8>
|
||||
where B: AsRef<[u8]>
|
||||
where
|
||||
B: AsRef<[u8]>,
|
||||
{
|
||||
assert_eq!(
|
||||
replace_with.len(), self.pattern_count(),
|
||||
replace_with.len(),
|
||||
self.pattern_count(),
|
||||
"replace_all_bytes requires a replacement for every pattern \
|
||||
in the automaton"
|
||||
);
|
||||
@ -528,7 +529,8 @@ impl<S: StateID> AhoCorasick<S> {
|
||||
haystack: &str,
|
||||
dst: &mut String,
|
||||
mut replace_with: F,
|
||||
) where F: FnMut(&Match, &str, &mut String) -> bool
|
||||
) where
|
||||
F: FnMut(&Match, &str, &mut String) -> bool,
|
||||
{
|
||||
let mut last_match = 0;
|
||||
for mat in self.find_iter(haystack) {
|
||||
@ -573,7 +575,8 @@ impl<S: StateID> AhoCorasick<S> {
|
||||
haystack: &[u8],
|
||||
dst: &mut Vec<u8>,
|
||||
mut replace_with: F,
|
||||
) where F: FnMut(&Match, &[u8], &mut Vec<u8>) -> bool
|
||||
) where
|
||||
F: FnMut(&Match, &[u8], &mut Vec<u8>) -> bool,
|
||||
{
|
||||
let mut last_match = 0;
|
||||
for mat in self.find_iter(haystack) {
|
||||
@ -710,12 +713,14 @@ impl<S: StateID> AhoCorasick<S> {
|
||||
wtr: W,
|
||||
replace_with: &[B],
|
||||
) -> io::Result<()>
|
||||
where R: io::Read,
|
||||
W: io::Write,
|
||||
B: AsRef<[u8]>
|
||||
where
|
||||
R: io::Read,
|
||||
W: io::Write,
|
||||
B: AsRef<[u8]>,
|
||||
{
|
||||
assert_eq!(
|
||||
replace_with.len(), self.pattern_count(),
|
||||
replace_with.len(),
|
||||
self.pattern_count(),
|
||||
"stream_replace_all requires a replacement for every pattern \
|
||||
in the automaton"
|
||||
);
|
||||
@ -795,9 +800,10 @@ impl<S: StateID> AhoCorasick<S> {
|
||||
mut wtr: W,
|
||||
mut replace_with: F,
|
||||
) -> io::Result<()>
|
||||
where R: io::Read,
|
||||
W: io::Write,
|
||||
F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>
|
||||
where
|
||||
R: io::Read,
|
||||
W: io::Write,
|
||||
F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>,
|
||||
{
|
||||
let mut it = StreamChunkIter::new(self, rdr);
|
||||
while let Some(result) = it.next() {
|
||||
@ -1041,16 +1047,20 @@ impl<S: StateID> Imp<S> {
|
||||
match_index: &mut usize,
|
||||
) -> Option<Match> {
|
||||
match *self {
|
||||
Imp::NFA(ref nfa) => {
|
||||
nfa.overlapping_find_at(
|
||||
prestate, haystack, at, state_id, match_index,
|
||||
)
|
||||
}
|
||||
Imp::DFA(ref dfa) => {
|
||||
dfa.overlapping_find_at(
|
||||
prestate, haystack, at, state_id, match_index,
|
||||
)
|
||||
}
|
||||
Imp::NFA(ref nfa) => nfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
Imp::DFA(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@ -1081,12 +1091,8 @@ impl<S: StateID> Imp<S> {
|
||||
state_id: &mut S,
|
||||
) -> Option<Match> {
|
||||
match *self {
|
||||
Imp::NFA(ref nfa) => {
|
||||
nfa.find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
Imp::DFA(ref dfa) => {
|
||||
dfa.find_at(prestate, haystack, at, state_id)
|
||||
}
|
||||
Imp::NFA(ref nfa) => nfa.find_at(prestate, haystack, at, state_id),
|
||||
Imp::DFA(ref dfa) => dfa.find_at(prestate, haystack, at, state_id),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1133,7 +1139,10 @@ impl<'a, 'b, S: StateID> Iterator for FindIter<'a, 'b, S> {
|
||||
}
|
||||
let mut start = self.start;
|
||||
let result = self.fsm.find_at(
|
||||
&mut self.prestate, self.haystack, self.pos, &mut start,
|
||||
&mut self.prestate,
|
||||
self.haystack,
|
||||
self.pos,
|
||||
&mut start,
|
||||
);
|
||||
let mat = match result {
|
||||
None => return None,
|
||||
@ -1243,9 +1252,7 @@ pub struct StreamFindIter<'a, R, S: 'a + StateID> {
|
||||
|
||||
impl<'a, R: io::Read, S: StateID> StreamFindIter<'a, R, S> {
|
||||
fn new(ac: &'a AhoCorasick<S>, rdr: R) -> StreamFindIter<'a, R, S> {
|
||||
StreamFindIter {
|
||||
it: StreamChunkIter::new(ac, rdr),
|
||||
}
|
||||
StreamFindIter { it: StreamChunkIter::new(ac, rdr) }
|
||||
}
|
||||
}
|
||||
|
||||
@ -1492,12 +1499,10 @@ impl AhoCorasickBuilder {
|
||||
/// .build(patterns);
|
||||
/// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern()));
|
||||
/// ```
|
||||
pub fn build<I, P>(
|
||||
&self,
|
||||
patterns: I,
|
||||
) -> AhoCorasick
|
||||
where I: IntoIterator<Item=P>,
|
||||
P: AsRef<[u8]>
|
||||
pub fn build<I, P>(&self, patterns: I) -> AhoCorasick
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
// The builder only returns an error if the chosen state ID
|
||||
// representation is too small to fit all of the given patterns. In
|
||||
@ -1560,19 +1565,19 @@ impl AhoCorasickBuilder {
|
||||
&self,
|
||||
patterns: I,
|
||||
) -> Result<AhoCorasick<S>>
|
||||
where S: StateID,
|
||||
I: IntoIterator<Item=P>,
|
||||
P: AsRef<[u8]>
|
||||
where
|
||||
S: StateID,
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
let nfa = self.nfa_builder.build(patterns)?;
|
||||
let match_kind = nfa.match_kind().clone();
|
||||
let imp =
|
||||
if self.dfa {
|
||||
let dfa = self.dfa_builder.build(&nfa)?;
|
||||
Imp::DFA(dfa)
|
||||
} else {
|
||||
Imp::NFA(nfa)
|
||||
};
|
||||
let imp = if self.dfa {
|
||||
let dfa = self.dfa_builder.build(&nfa)?;
|
||||
Imp::DFA(dfa)
|
||||
} else {
|
||||
Imp::NFA(nfa)
|
||||
};
|
||||
Ok(AhoCorasick { imp, match_kind })
|
||||
}
|
||||
|
||||
@ -2002,7 +2007,7 @@ impl MatchKind {
|
||||
|
||||
pub(crate) fn is_leftmost(&self) -> bool {
|
||||
*self == MatchKind::LeftmostFirst
|
||||
|| *self == MatchKind::LeftmostLongest
|
||||
|| *self == MatchKind::LeftmostLongest
|
||||
}
|
||||
|
||||
pub(crate) fn is_leftmost_first(&self) -> bool {
|
||||
|
@ -1,6 +1,6 @@
|
||||
use ahocorasick::MatchKind;
|
||||
use prefilter::{Prefilter, PrefilterState};
|
||||
use state_id::{StateID, dead_id, fail_id};
|
||||
use state_id::{dead_id, fail_id, StateID};
|
||||
use Match;
|
||||
|
||||
// NOTE: This trait was essentially copied from regex-automata, with some
|
||||
@ -142,12 +142,14 @@ pub trait Automaton {
|
||||
) -> Option<Match> {
|
||||
if let Some(pre) = self.prefilter() {
|
||||
self.standard_find_at_imp(
|
||||
prestate, Some(pre), haystack, at, state_id,
|
||||
prestate,
|
||||
Some(pre),
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
)
|
||||
} else {
|
||||
self.standard_find_at_imp(
|
||||
prestate, None, haystack, at, state_id,
|
||||
)
|
||||
self.standard_find_at_imp(prestate, None, haystack, at, state_id)
|
||||
}
|
||||
}
|
||||
|
||||
@ -235,12 +237,14 @@ pub trait Automaton {
|
||||
) -> Option<Match> {
|
||||
if let Some(pre) = self.prefilter() {
|
||||
self.leftmost_find_at_imp(
|
||||
prestate, Some(pre), haystack, at, state_id,
|
||||
prestate,
|
||||
Some(pre),
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
)
|
||||
} else {
|
||||
self.leftmost_find_at_imp(
|
||||
prestate, None, haystack, at, state_id,
|
||||
)
|
||||
self.leftmost_find_at_imp(prestate, None, haystack, at, state_id)
|
||||
}
|
||||
}
|
||||
|
||||
@ -337,11 +341,7 @@ pub trait Automaton {
|
||||
if *match_index < match_count {
|
||||
// This is guaranteed to return a match since
|
||||
// match_index < match_count.
|
||||
let result = self.get_match(
|
||||
*state_id,
|
||||
*match_index,
|
||||
at,
|
||||
);
|
||||
let result = self.get_match(*state_id, *match_index, at);
|
||||
debug_assert!(result.is_some(), "must be a match");
|
||||
*match_index += 1;
|
||||
return result;
|
||||
|
@ -3,7 +3,7 @@ use std::io;
|
||||
use std::ptr;
|
||||
|
||||
/// The default buffer capacity that we use for the stream buffer.
|
||||
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
||||
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
|
||||
|
||||
/// A fairly simple roll buffer for supporting stream searches.
|
||||
///
|
||||
@ -52,11 +52,7 @@ impl Buffer {
|
||||
// TODO: It would be good to find a way to test the streaming
|
||||
// implementation with the minimal buffer size.
|
||||
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
|
||||
Buffer {
|
||||
buf: vec![0; capacity],
|
||||
min,
|
||||
end: 0,
|
||||
}
|
||||
Buffer { buf: vec![0; capacity], min, end: 0 }
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
@ -110,7 +106,8 @@ impl Buffer {
|
||||
/// This should only be called when the entire contents of this buffer have
|
||||
/// been searched.
|
||||
pub fn roll(&mut self) {
|
||||
let roll_start = self.end
|
||||
let roll_start = self
|
||||
.end
|
||||
.checked_sub(self.min)
|
||||
.expect("buffer capacity should be bigger than minimum amount");
|
||||
let roll_len = self.min;
|
||||
|
75
src/dfa.rs
75
src/dfa.rs
@ -4,9 +4,9 @@ use ahocorasick::MatchKind;
|
||||
use automaton::Automaton;
|
||||
use classes::ByteClasses;
|
||||
use error::Result;
|
||||
use nfa::{NFA, PatternID, PatternLength};
|
||||
use nfa::{PatternID, PatternLength, NFA};
|
||||
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
|
||||
use state_id::{StateID, dead_id, fail_id, premultiply_overflow_error};
|
||||
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
|
||||
use Match;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@ -57,26 +57,34 @@ impl<S: StateID> DFA<S> {
|
||||
match_index: &mut usize,
|
||||
) -> Option<Match> {
|
||||
match *self {
|
||||
DFA::Standard(ref dfa) => {
|
||||
dfa.overlapping_find_at(
|
||||
prestate, haystack, at, state_id, match_index,
|
||||
)
|
||||
}
|
||||
DFA::ByteClass(ref dfa) => {
|
||||
dfa.overlapping_find_at(
|
||||
prestate, haystack, at, state_id, match_index,
|
||||
)
|
||||
}
|
||||
DFA::Premultiplied(ref dfa) => {
|
||||
dfa.overlapping_find_at(
|
||||
prestate, haystack, at, state_id, match_index,
|
||||
)
|
||||
}
|
||||
DFA::PremultipliedByteClass(ref dfa) => {
|
||||
dfa.overlapping_find_at(
|
||||
prestate, haystack, at, state_id, match_index,
|
||||
)
|
||||
}
|
||||
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
|
||||
prestate,
|
||||
haystack,
|
||||
at,
|
||||
state_id,
|
||||
match_index,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@ -562,10 +570,9 @@ impl<S: StateID> Repr<S> {
|
||||
|
||||
/// Computes the total amount of heap used by this NFA in bytes.
|
||||
fn calculate_size(&mut self) {
|
||||
let mut size =
|
||||
(self.trans.len() * size_of::<S>())
|
||||
+ (self.matches.len() *
|
||||
size_of::<Vec<(PatternID, PatternLength)>>());
|
||||
let mut size = (self.trans.len() * size_of::<S>())
|
||||
+ (self.matches.len()
|
||||
* size_of::<Vec<(PatternID, PatternLength)>>());
|
||||
for state_matches in &self.matches {
|
||||
size +=
|
||||
state_matches.len() * size_of::<(PatternID, PatternLength)>();
|
||||
@ -584,10 +591,7 @@ pub struct Builder {
|
||||
impl Builder {
|
||||
/// Create a new builder for a DFA.
|
||||
pub fn new() -> Builder {
|
||||
Builder {
|
||||
premultiply: true,
|
||||
byte_classes: true,
|
||||
}
|
||||
Builder { premultiply: true, byte_classes: true }
|
||||
}
|
||||
|
||||
/// Build a DFA from the given NFA.
|
||||
@ -596,12 +600,11 @@ impl Builder {
|
||||
/// representation size. This can only happen when state ids are
|
||||
/// premultiplied (which is enabled by default).
|
||||
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
|
||||
let byte_classes =
|
||||
if self.byte_classes {
|
||||
nfa.byte_classes().clone()
|
||||
} else {
|
||||
ByteClasses::singletons()
|
||||
};
|
||||
let byte_classes = if self.byte_classes {
|
||||
nfa.byte_classes().clone()
|
||||
} else {
|
||||
ByteClasses::singletons()
|
||||
};
|
||||
let alphabet_len = byte_classes.alphabet_len();
|
||||
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
|
||||
let matches = vec![vec![]; nfa.state_len()];
|
||||
|
18
src/error.rs
18
src/error.rs
@ -33,7 +33,7 @@ pub enum ErrorKind {
|
||||
max: usize,
|
||||
/// The maximum ID required by premultiplication.
|
||||
requested_max: usize,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
@ -70,15 +70,13 @@ impl error::Error for Error {
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::StateIDOverflow { max } => {
|
||||
write!(
|
||||
f,
|
||||
"building the automaton failed because it required \
|
||||
building more states that can be identified, where the \
|
||||
maximum ID for the chosen representation is {}",
|
||||
max,
|
||||
)
|
||||
}
|
||||
ErrorKind::StateIDOverflow { max } => write!(
|
||||
f,
|
||||
"building the automaton failed because it required \
|
||||
building more states that can be identified, where the \
|
||||
maximum ID for the chosen representation is {}",
|
||||
max,
|
||||
),
|
||||
ErrorKind::PremultiplyOverflow { max, requested_max } => {
|
||||
if max == requested_max {
|
||||
write!(
|
||||
|
14
src/lib.rs
14
src/lib.rs
@ -196,8 +196,8 @@ extern crate doc_comment;
|
||||
doctest!("../README.md");
|
||||
|
||||
pub use ahocorasick::{
|
||||
AhoCorasick, AhoCorasickBuilder, MatchKind,
|
||||
FindIter, FindOverlappingIter, StreamFindIter,
|
||||
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
|
||||
StreamFindIter,
|
||||
};
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use state_id::StateID;
|
||||
@ -205,11 +205,11 @@ pub use state_id::StateID;
|
||||
mod ahocorasick;
|
||||
mod automaton;
|
||||
mod buffer;
|
||||
mod classes;
|
||||
mod dfa;
|
||||
mod error;
|
||||
mod classes;
|
||||
mod prefilter;
|
||||
mod nfa;
|
||||
mod prefilter;
|
||||
mod state_id;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
@ -287,10 +287,6 @@ impl Match {
|
||||
|
||||
#[inline]
|
||||
fn increment(&self, by: usize) -> Match {
|
||||
Match {
|
||||
pattern: self.pattern,
|
||||
len: self.len,
|
||||
end: self.end + by,
|
||||
}
|
||||
Match { pattern: self.pattern, len: self.len, end: self.end + by }
|
||||
}
|
||||
}
|
||||
|
75
src/nfa.rs
75
src/nfa.rs
@ -1,14 +1,14 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::cmp;
|
||||
use std::collections::VecDeque;
|
||||
use std::fmt;
|
||||
use std::mem::size_of;
|
||||
|
||||
use ahocorasick::MatchKind;
|
||||
use automaton::Automaton;
|
||||
use classes::{ByteClasses, ByteClassBuilder};
|
||||
use classes::{ByteClassBuilder, ByteClasses};
|
||||
use error::Result;
|
||||
use prefilter::{self, Prefilter, PrefilterObj};
|
||||
use state_id::{StateID, dead_id, fail_id, usize_to_state_id};
|
||||
use state_id::{dead_id, fail_id, usize_to_state_id, StateID};
|
||||
use Match;
|
||||
|
||||
/// The identifier for a pattern, which is simply the position of the pattern
|
||||
@ -173,9 +173,8 @@ impl<S: StateID> NFA<S> {
|
||||
}
|
||||
|
||||
fn copy_matches(&mut self, src: S, dst: S) {
|
||||
let (src, dst) = get_two_mut(
|
||||
&mut self.states, src.to_usize(), dst.to_usize(),
|
||||
);
|
||||
let (src, dst) =
|
||||
get_two_mut(&mut self.states, src.to_usize(), dst.to_usize());
|
||||
dst.matches.extend_from_slice(&src.matches);
|
||||
}
|
||||
|
||||
@ -242,9 +241,11 @@ impl<S: StateID> Automaton for NFA<S> {
|
||||
None => return None,
|
||||
Some(state) => state,
|
||||
};
|
||||
state.matches
|
||||
.get(match_index)
|
||||
.map(|&(id, len)| Match { pattern: id, len, end })
|
||||
state.matches.get(match_index).map(|&(id, len)| Match {
|
||||
pattern: id,
|
||||
len,
|
||||
end,
|
||||
})
|
||||
}
|
||||
|
||||
fn match_count(&self, id: S) -> usize {
|
||||
@ -291,7 +292,7 @@ pub struct State<S> {
|
||||
impl<S: StateID> State<S> {
|
||||
fn heap_bytes(&self) -> usize {
|
||||
self.trans.heap_bytes()
|
||||
+ (self.matches.len() * size_of::<(PatternID, PatternLength)>())
|
||||
+ (self.matches.len() * size_of::<(PatternID, PatternLength)>())
|
||||
}
|
||||
|
||||
fn add_match(&mut self, i: PatternID, len: PatternLength) {
|
||||
@ -345,9 +346,7 @@ impl<S: StateID> Transitions<S> {
|
||||
Transitions::Sparse(ref sparse) => {
|
||||
sparse.len() * size_of::<(u8, S)>()
|
||||
}
|
||||
Transitions::Dense(ref dense) => {
|
||||
dense.len() * size_of::<S>()
|
||||
}
|
||||
Transitions::Dense(ref dense) => dense.len() * size_of::<S>(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -522,12 +521,10 @@ impl Builder {
|
||||
Builder::default()
|
||||
}
|
||||
|
||||
pub fn build<I, P, S: StateID>(
|
||||
&self,
|
||||
patterns: I,
|
||||
) -> Result<NFA<S>>
|
||||
where I: IntoIterator<Item=P>,
|
||||
P: AsRef<[u8]>
|
||||
pub fn build<I, P, S: StateID>(&self, patterns: I) -> Result<NFA<S>>
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
Compiler::new(self)?.compile(patterns)
|
||||
}
|
||||
@ -581,12 +578,10 @@ impl<'a, S: StateID> Compiler<'a, S> {
|
||||
})
|
||||
}
|
||||
|
||||
fn compile<I, P>(
|
||||
mut self,
|
||||
patterns: I,
|
||||
) -> Result<NFA<S>>
|
||||
where I: IntoIterator<Item=P>,
|
||||
P: AsRef<[u8]>
|
||||
fn compile<I, P>(mut self, patterns: I) -> Result<NFA<S>>
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
self.add_state(0)?; // the fail state, which is never entered
|
||||
self.add_state(0)?; // the dead state, only used for leftmost
|
||||
@ -610,19 +605,15 @@ impl<'a, S: StateID> Compiler<'a, S> {
|
||||
/// automaton. Effectively, it creates the basic structure of the
|
||||
/// automaton, where every pattern given has a path from the start state to
|
||||
/// the end of the pattern.
|
||||
fn build_trie<I, P>(
|
||||
&mut self,
|
||||
patterns: I,
|
||||
) -> Result<()>
|
||||
where I: IntoIterator<Item=P>,
|
||||
P: AsRef<[u8]>
|
||||
fn build_trie<I, P>(&mut self, patterns: I) -> Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[u8]>,
|
||||
{
|
||||
'PATTERNS:
|
||||
for (pati, pat) in patterns.into_iter().enumerate() {
|
||||
'PATTERNS: for (pati, pat) in patterns.into_iter().enumerate() {
|
||||
let pat = pat.as_ref();
|
||||
self.nfa.max_pattern_len = cmp::max(
|
||||
self.nfa.max_pattern_len, pat.len(),
|
||||
);
|
||||
self.nfa.max_pattern_len =
|
||||
cmp::max(self.nfa.max_pattern_len, pat.len());
|
||||
self.nfa.pattern_count += 1;
|
||||
|
||||
let mut prev = self.nfa.start_id;
|
||||
@ -873,11 +864,7 @@ impl<'a, S: StateID> Compiler<'a, S> {
|
||||
/// state.
|
||||
fn start(nfa: &NFA<S>) -> QueuedState<S> {
|
||||
let match_at_depth =
|
||||
if nfa.start().is_match() {
|
||||
Some(0)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if nfa.start().is_match() { Some(0) } else { None };
|
||||
QueuedState { id: nfa.start_id, match_at_depth }
|
||||
}
|
||||
|
||||
@ -909,8 +896,7 @@ impl<'a, S: StateID> Compiler<'a, S> {
|
||||
None if nfa.state(next).is_match() => {}
|
||||
None => return None,
|
||||
}
|
||||
let depth =
|
||||
nfa.state(next).depth
|
||||
let depth = nfa.state(next).depth
|
||||
- nfa.state(next).get_longest_match_len().unwrap()
|
||||
+ 1;
|
||||
Some(depth)
|
||||
@ -1173,7 +1159,8 @@ impl<S: StateID> fmt::Debug for NFA<S> {
|
||||
});
|
||||
writeln!(f, "{:04}: {}", id, trans.join(", "))?;
|
||||
|
||||
let matches: Vec<String> = s.matches
|
||||
let matches: Vec<String> = s
|
||||
.matches
|
||||
.iter()
|
||||
.map(|&(pattern_id, _)| pattern_id.to_string())
|
||||
.collect();
|
||||
|
@ -6,7 +6,9 @@ use memchr::{memchr, memchr2, memchr3};
|
||||
/// A prefilter describes the behavior of fast literal scanners for quickly
|
||||
/// skipping past bytes in the haystack that we know cannot possibly
|
||||
/// participate in a match.
|
||||
pub trait Prefilter: Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug {
|
||||
pub trait Prefilter:
|
||||
Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug
|
||||
{
|
||||
/// Returns the next possible match candidate. This may yield false
|
||||
/// positives, so callers must "confirm" a match starting at the position
|
||||
/// returned. This, however, must never produce false negatives. That is,
|
||||
@ -165,24 +167,16 @@ impl StartBytesBuilder {
|
||||
}
|
||||
match len {
|
||||
0 => None,
|
||||
1 => {
|
||||
Some(PrefilterObj::new(StartBytesOne {
|
||||
byte1: bytes[0],
|
||||
}))
|
||||
}
|
||||
2 => {
|
||||
Some(PrefilterObj::new(StartBytesTwo {
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
}))
|
||||
}
|
||||
3 => {
|
||||
Some(PrefilterObj::new(StartBytesThree {
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
byte3: bytes[2],
|
||||
}))
|
||||
}
|
||||
1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })),
|
||||
2 => Some(PrefilterObj::new(StartBytesTwo {
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
})),
|
||||
3 => Some(PrefilterObj::new(StartBytesThree {
|
||||
byte1: bytes[0],
|
||||
byte2: bytes[1],
|
||||
byte3: bytes[2],
|
||||
})),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
@ -204,8 +198,7 @@ pub struct StartBytesOne {
|
||||
|
||||
impl Prefilter for StartBytesOne {
|
||||
fn next_candidate(&self, haystack: &[u8], at: usize) -> Option<usize> {
|
||||
memchr(self.byte1, &haystack[at..])
|
||||
.map(|i| at + i)
|
||||
memchr(self.byte1, &haystack[at..]).map(|i| at + i)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<Prefilter> {
|
||||
@ -222,8 +215,7 @@ pub struct StartBytesTwo {
|
||||
|
||||
impl Prefilter for StartBytesTwo {
|
||||
fn next_candidate(&self, haystack: &[u8], at: usize) -> Option<usize> {
|
||||
memchr2(self.byte1, self.byte2, &haystack[at..])
|
||||
.map(|i| at + i)
|
||||
memchr2(self.byte1, self.byte2, &haystack[at..]).map(|i| at + i)
|
||||
}
|
||||
|
||||
fn clone_prefilter(&self) -> Box<Prefilter> {
|
||||
|
@ -49,7 +49,7 @@ pub fn dead_id<S: StateID>() -> S {
|
||||
mod private {
|
||||
/// Sealed stops crates other than aho-corasick from implementing any
|
||||
/// traits that use it.
|
||||
pub trait Sealed{}
|
||||
pub trait Sealed {}
|
||||
impl Sealed for u8 {}
|
||||
impl Sealed for u16 {}
|
||||
impl Sealed for u32 {}
|
||||
@ -82,7 +82,14 @@ mod private {
|
||||
/// bounds checks are explicitly elided for performance reasons.
|
||||
pub unsafe trait StateID:
|
||||
private::Sealed
|
||||
+ Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord
|
||||
+ Clone
|
||||
+ Copy
|
||||
+ Debug
|
||||
+ Eq
|
||||
+ Hash
|
||||
+ PartialEq
|
||||
+ PartialOrd
|
||||
+ Ord
|
||||
{
|
||||
/// Convert from a `usize` to this implementation's representation.
|
||||
///
|
||||
@ -110,57 +117,87 @@ pub unsafe trait StateID:
|
||||
|
||||
unsafe impl StateID for usize {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> usize { n }
|
||||
fn from_usize(n: usize) -> usize {
|
||||
n
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize { self }
|
||||
fn to_usize(self) -> usize {
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize { ::std::usize::MAX }
|
||||
fn max_id() -> usize {
|
||||
::std::usize::MAX
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl StateID for u8 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u8 { n as u8 }
|
||||
fn from_usize(n: usize) -> u8 {
|
||||
n as u8
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize { ::std::u8::MAX as usize }
|
||||
fn max_id() -> usize {
|
||||
::std::u8::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl StateID for u16 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u16 { n as u16 }
|
||||
fn from_usize(n: usize) -> u16 {
|
||||
n as u16
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize { ::std::u16::MAX as usize }
|
||||
fn max_id() -> usize {
|
||||
::std::u16::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
|
||||
unsafe impl StateID for u32 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u32 { n as u32 }
|
||||
fn from_usize(n: usize) -> u32 {
|
||||
n as u32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize { ::std::u32::MAX as usize }
|
||||
fn max_id() -> usize {
|
||||
::std::u32::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
unsafe impl StateID for u64 {
|
||||
#[inline]
|
||||
fn from_usize(n: usize) -> u64 { n as u64 }
|
||||
fn from_usize(n: usize) -> u64 {
|
||||
n as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_usize(self) -> usize { self as usize }
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn max_id() -> usize { ::std::u64::MAX as usize }
|
||||
fn max_id() -> usize {
|
||||
::std::u64::MAX as usize
|
||||
}
|
||||
}
|
||||
|
578
src/tests.rs
578
src/tests.rs
@ -32,7 +32,7 @@ macro_rules! t {
|
||||
haystack: $haystack,
|
||||
matches: $matches,
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// A collection of test groups.
|
||||
@ -43,24 +43,20 @@ type TestCollection = &'static [&'static [SearchTest]];
|
||||
// but each collection should have some tests that no other collection has.
|
||||
|
||||
/// Tests for Aho-Corasick's standard non-overlapping match semantics.
|
||||
const AC_STANDARD_NON_OVERLAPPING: TestCollection = &[
|
||||
BASICS, NON_OVERLAPPING, STANDARD, REGRESSION,
|
||||
];
|
||||
const AC_STANDARD_NON_OVERLAPPING: TestCollection =
|
||||
&[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION];
|
||||
|
||||
/// Tests for Aho-Corasick's standard overlapping match semantics.
|
||||
const AC_STANDARD_OVERLAPPING: TestCollection = &[
|
||||
BASICS, OVERLAPPING, REGRESSION,
|
||||
];
|
||||
const AC_STANDARD_OVERLAPPING: TestCollection =
|
||||
&[BASICS, OVERLAPPING, REGRESSION];
|
||||
|
||||
/// Tests for Aho-Corasick's leftmost-first match semantics.
|
||||
const AC_LEFTMOST_FIRST: TestCollection = &[
|
||||
BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION,
|
||||
];
|
||||
const AC_LEFTMOST_FIRST: TestCollection =
|
||||
&[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION];
|
||||
|
||||
/// Tests for Aho-Corasick's leftmost-longest match semantics.
|
||||
const AC_LEFTMOST_LONGEST: TestCollection = &[
|
||||
BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION,
|
||||
];
|
||||
const AC_LEFTMOST_LONGEST: TestCollection =
|
||||
&[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION];
|
||||
|
||||
// Now define the individual tests that make up the collections above.
|
||||
|
||||
@ -77,70 +73,51 @@ const BASICS: &'static [SearchTest] = &[
|
||||
t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
|
||||
t!(basic060, &["a"], "bbb", &[]),
|
||||
t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
|
||||
|
||||
t!(basic100, &["aa"], "", &[]),
|
||||
t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
|
||||
t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
|
||||
t!(basic130, &["aa"], "abbab", &[]),
|
||||
t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
|
||||
|
||||
t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
|
||||
t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
|
||||
t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
|
||||
|
||||
t!(basic300, &["a", "b"], "", &[]),
|
||||
t!(basic310, &["a", "b"], "z", &[]),
|
||||
t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
|
||||
t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
|
||||
t!(basic340, &["a", "b"], "abba", &[
|
||||
(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),
|
||||
]),
|
||||
t!(basic350, &["b", "a"], "abba", &[
|
||||
(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),
|
||||
]),
|
||||
t!(nover360, &["abc", "bc"], "xbc", &[
|
||||
(1, 1, 3),
|
||||
]),
|
||||
|
||||
t!(
|
||||
basic340,
|
||||
&["a", "b"],
|
||||
"abba",
|
||||
&[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
|
||||
),
|
||||
t!(
|
||||
basic350,
|
||||
&["b", "a"],
|
||||
"abba",
|
||||
&[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
|
||||
),
|
||||
t!(nover360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
|
||||
t!(basic400, &["foo", "bar"], "", &[]),
|
||||
t!(basic410, &["foo", "bar"], "foobar", &[
|
||||
(0, 0, 3), (1, 3, 6),
|
||||
]),
|
||||
t!(basic420, &["foo", "bar"], "barfoo", &[
|
||||
(1, 0, 3), (0, 3, 6),
|
||||
]),
|
||||
t!(basic430, &["foo", "bar"], "foofoo", &[
|
||||
(0, 0, 3), (0, 3, 6),
|
||||
]),
|
||||
t!(basic440, &["foo", "bar"], "barbar", &[
|
||||
(1, 0, 3), (1, 3, 6),
|
||||
]),
|
||||
t!(basic450, &["foo", "bar"], "bafofoo", &[
|
||||
(0, 4, 7),
|
||||
]),
|
||||
t!(basic460, &["bar", "foo"], "bafofoo", &[
|
||||
(1, 4, 7),
|
||||
]),
|
||||
t!(basic470, &["foo", "bar"], "fobabar", &[
|
||||
(1, 4, 7),
|
||||
]),
|
||||
t!(basic480, &["bar", "foo"], "fobabar", &[
|
||||
(0, 4, 7),
|
||||
]),
|
||||
|
||||
t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
|
||||
t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
|
||||
t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
|
||||
t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
|
||||
t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
|
||||
t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
|
||||
t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
|
||||
t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
|
||||
t!(basic600, &[""], "", &[(0, 0, 0)]),
|
||||
t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
|
||||
t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
|
||||
|
||||
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[
|
||||
(0, 0, 7),
|
||||
]),
|
||||
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[
|
||||
(1, 1, 10),
|
||||
]),
|
||||
t!(basic720, &["yabcdef", "bcdeyabc", "abcdezghi"], "yabcdezghi", &[
|
||||
(2, 1, 10),
|
||||
]),
|
||||
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
|
||||
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
|
||||
t!(
|
||||
basic720,
|
||||
&["yabcdef", "bcdeyabc", "abcdezghi"],
|
||||
"yabcdezghi",
|
||||
&[(2, 1, 10),]
|
||||
),
|
||||
];
|
||||
|
||||
/// Tests for non-overlapping standard match semantics.
|
||||
@ -155,25 +132,17 @@ const STANDARD: &'static [SearchTest] = &[
|
||||
t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
|
||||
t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
|
||||
t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
|
||||
|
||||
t!(standard400, &["abcd", "bcd", "cd", "b"], "abcd", &[
|
||||
(3, 1, 2), (2, 2, 4),
|
||||
]),
|
||||
t!(standard410, &["", "a"], "a", &[
|
||||
(0, 0, 0), (0, 1, 1),
|
||||
]),
|
||||
t!(standard420, &["", "a"], "aa", &[
|
||||
(0, 0, 0), (0, 1, 1), (0, 2, 2),
|
||||
]),
|
||||
t!(standard430, &["", "a", ""], "a", &[
|
||||
(0, 0, 0), (0, 1, 1),
|
||||
]),
|
||||
t!(standard440, &["a", "", ""], "a", &[
|
||||
(1, 0, 0), (1, 1, 1),
|
||||
]),
|
||||
t!(standard450, &["", "", "a"], "a", &[
|
||||
(0, 0, 0), (0, 1, 1),
|
||||
]),
|
||||
t!(
|
||||
standard400,
|
||||
&["abcd", "bcd", "cd", "b"],
|
||||
"abcd",
|
||||
&[(3, 1, 2), (2, 2, 4),]
|
||||
),
|
||||
t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
|
||||
t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]),
|
||||
t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
|
||||
t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]),
|
||||
t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
|
||||
];
|
||||
|
||||
/// Tests for non-overlapping leftmost match semantics. These should pass for
|
||||
@ -187,31 +156,48 @@ const LEFTMOST: &'static [SearchTest] = &[
|
||||
t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
|
||||
t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
|
||||
t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
|
||||
|
||||
t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
|
||||
t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
|
||||
t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
|
||||
t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
|
||||
t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
|
||||
t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
|
||||
t!(leftmost360, &["abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
|
||||
(2, 0, 8),
|
||||
]),
|
||||
t!(leftmost370, &["abcdefghi", "cde", "hz", "abcdefgh"], "abcdefghz", &[
|
||||
(3, 0, 8),
|
||||
]),
|
||||
t!(leftmost380, &["abcdefghi", "hz", "abcdefgh", "a"], "abcdefghz", &[
|
||||
(2, 0, 8),
|
||||
]),
|
||||
t!(leftmost390, &["b", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
|
||||
(3, 0, 8),
|
||||
]),
|
||||
t!(leftmost400, &["h", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
|
||||
(3, 0, 8),
|
||||
]),
|
||||
t!(leftmost410, &["z", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
|
||||
(3, 0, 8), (0, 8, 9),
|
||||
]),
|
||||
t!(
|
||||
leftmost360,
|
||||
&["abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(2, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost370,
|
||||
&["abcdefghi", "cde", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost380,
|
||||
&["abcdefghi", "hz", "abcdefgh", "a"],
|
||||
"abcdefghz",
|
||||
&[(2, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost390,
|
||||
&["b", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost400,
|
||||
&["h", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
leftmost410,
|
||||
&["z", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8), (0, 8, 9),]
|
||||
),
|
||||
];
|
||||
|
||||
/// Tests for non-overlapping leftmost-first match semantics. These tests
|
||||
@ -220,29 +206,27 @@ const LEFTMOST: &'static [SearchTest] = &[
|
||||
const LEFTMOST_FIRST: &'static [SearchTest] = &[
|
||||
t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
|
||||
t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
|
||||
t!(leftfirst011, &["", "a", ""], "a", &[
|
||||
(0, 0, 0), (0, 1, 1),
|
||||
]),
|
||||
t!(leftfirst012, &["a", "", ""], "a", &[
|
||||
(0, 0, 1), (1, 1, 1),
|
||||
]),
|
||||
t!(leftfirst013, &["", "", "a"], "a", &[
|
||||
(0, 0, 0), (0, 1, 1),
|
||||
]),
|
||||
t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
|
||||
t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
|
||||
t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
|
||||
t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
|
||||
t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
|
||||
t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
|
||||
|
||||
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
|
||||
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
|
||||
|
||||
t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
|
||||
t!(leftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[
|
||||
(1, 1, 2), (3, 2, 4),
|
||||
]),
|
||||
t!(leftfirst320, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
|
||||
(0, 0, 1), (2, 7, 9),
|
||||
]),
|
||||
t!(
|
||||
leftfirst310,
|
||||
&["abcd", "b", "bce", "ce"],
|
||||
"abce",
|
||||
&[(1, 1, 2), (3, 2, 4),]
|
||||
),
|
||||
t!(
|
||||
leftfirst320,
|
||||
&["a", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(0, 0, 1), (2, 7, 9),]
|
||||
),
|
||||
t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
|
||||
];
|
||||
|
||||
@ -251,40 +235,27 @@ const LEFTMOST_FIRST: &'static [SearchTest] = &[
|
||||
/// generally fail under leftmost-first semantics.
|
||||
const LEFTMOST_LONGEST: &'static [SearchTest] = &[
|
||||
t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
|
||||
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[
|
||||
(0, 0, 4),
|
||||
]),
|
||||
t!(leftlong020, &["", "a"], "a", &[
|
||||
(1, 0, 1), (0, 1, 1),
|
||||
]),
|
||||
t!(leftlong021, &["", "a", ""], "a", &[
|
||||
(1, 0, 1), (0, 1, 1),
|
||||
]),
|
||||
t!(leftlong022, &["a", "", ""], "a", &[
|
||||
(0, 0, 1), (1, 1, 1),
|
||||
]),
|
||||
t!(leftlong023, &["", "", "a"], "a", &[
|
||||
(2, 0, 1), (0, 1, 1),
|
||||
]),
|
||||
t!(leftlong030, &["", "a"], "aa", &[
|
||||
(1, 0, 1), (1, 1, 2), (0, 2, 2),
|
||||
]),
|
||||
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
|
||||
t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]),
|
||||
t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]),
|
||||
t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
|
||||
t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]),
|
||||
t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]),
|
||||
t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
|
||||
t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
|
||||
t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
|
||||
t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
|
||||
|
||||
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
|
||||
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
|
||||
|
||||
t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
|
||||
t!(leftlong310, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
|
||||
(3, 0, 8),
|
||||
]),
|
||||
t!(
|
||||
leftlong310,
|
||||
&["a", "abcdefghi", "hz", "abcdefgh"],
|
||||
"abcdefghz",
|
||||
&[(3, 0, 8),]
|
||||
),
|
||||
t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
|
||||
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[
|
||||
(1, 1, 2), (2, 2, 4),
|
||||
]),
|
||||
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
|
||||
t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
|
||||
];
|
||||
|
||||
@ -293,30 +264,18 @@ const LEFTMOST_LONGEST: &'static [SearchTest] = &[
|
||||
/// Generally these tests shouldn't pass when using overlapping semantics.
|
||||
/// These should pass for both standard and leftmost match semantics.
|
||||
const NON_OVERLAPPING: &'static [SearchTest] = &[
|
||||
t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[
|
||||
(0, 0, 4),
|
||||
]),
|
||||
t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[
|
||||
(2, 0, 4),
|
||||
]),
|
||||
t!(nover030, &["abc", "bc"], "zazabcz", &[
|
||||
(0, 3, 6),
|
||||
]),
|
||||
|
||||
t!(nover100, &["ab", "ba"], "abababa", &[
|
||||
(0, 0, 2), (0, 2, 4), (0, 4, 6),
|
||||
]),
|
||||
|
||||
t!(nover200, &["foo", "foo"], "foobarfoo", &[
|
||||
(0, 0, 3), (0, 6, 9),
|
||||
]),
|
||||
|
||||
t!(nover300, &["", ""], "", &[
|
||||
(0, 0, 0),
|
||||
]),
|
||||
t!(nover310, &["", ""], "a", &[
|
||||
(0, 0, 0), (0, 1, 1),
|
||||
]),
|
||||
t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
|
||||
t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
|
||||
t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
|
||||
t!(
|
||||
nover100,
|
||||
&["ab", "ba"],
|
||||
"abababa",
|
||||
&[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
|
||||
),
|
||||
t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
|
||||
t!(nover300, &["", ""], "", &[(0, 0, 0),]),
|
||||
t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
|
||||
];
|
||||
|
||||
/// Tests for overlapping match semantics.
|
||||
@ -324,51 +283,75 @@ const NON_OVERLAPPING: &'static [SearchTest] = &[
|
||||
/// This only supports standard match semantics, since leftmost-{first,longest}
|
||||
/// do not support overlapping matches.
|
||||
const OVERLAPPING: &'static [SearchTest] = &[
|
||||
t!(over000, &["abcd", "bcd", "cd", "b"], "abcd", &[
|
||||
(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),
|
||||
]),
|
||||
t!(over010, &["bcd", "cd", "b", "abcd"], "abcd", &[
|
||||
(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),
|
||||
]),
|
||||
t!(over020, &["abcd", "bcd", "cd"], "abcd", &[
|
||||
(0, 0, 4), (1, 1, 4), (2, 2, 4),
|
||||
]),
|
||||
t!(over030, &["bcd", "abcd", "cd"], "abcd", &[
|
||||
(1, 0, 4), (0, 1, 4), (2, 2, 4),
|
||||
]),
|
||||
t!(over040, &["bcd", "cd", "abcd"], "abcd", &[
|
||||
(2, 0, 4), (0, 1, 4), (1, 2, 4),
|
||||
]),
|
||||
t!(over050, &["abc", "bc"], "zazabcz", &[
|
||||
(0, 3, 6), (1, 4, 6),
|
||||
]),
|
||||
|
||||
t!(over100, &["ab", "ba"], "abababa", &[
|
||||
(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),
|
||||
]),
|
||||
|
||||
t!(over200, &["foo", "foo"], "foobarfoo", &[
|
||||
(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),
|
||||
]),
|
||||
|
||||
t!(over300, &["", ""], "", &[
|
||||
(0, 0, 0), (1, 0, 0),
|
||||
]),
|
||||
t!(over310, &["", ""], "a", &[
|
||||
(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),
|
||||
]),
|
||||
t!(over320, &["", "a"], "a", &[
|
||||
(0, 0, 0), (1, 0, 1), (0, 1, 1),
|
||||
]),
|
||||
t!(over330, &["", "a", ""], "a", &[
|
||||
(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),
|
||||
]),
|
||||
t!(over340, &["a", "", ""], "a", &[
|
||||
(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),
|
||||
]),
|
||||
t!(over350, &["", "", "a"], "a", &[
|
||||
(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),
|
||||
]),
|
||||
t!(
|
||||
over000,
|
||||
&["abcd", "bcd", "cd", "b"],
|
||||
"abcd",
|
||||
&[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),]
|
||||
),
|
||||
t!(
|
||||
over010,
|
||||
&["bcd", "cd", "b", "abcd"],
|
||||
"abcd",
|
||||
&[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),]
|
||||
),
|
||||
t!(
|
||||
over020,
|
||||
&["abcd", "bcd", "cd"],
|
||||
"abcd",
|
||||
&[(0, 0, 4), (1, 1, 4), (2, 2, 4),]
|
||||
),
|
||||
t!(
|
||||
over030,
|
||||
&["bcd", "abcd", "cd"],
|
||||
"abcd",
|
||||
&[(1, 0, 4), (0, 1, 4), (2, 2, 4),]
|
||||
),
|
||||
t!(
|
||||
over040,
|
||||
&["bcd", "cd", "abcd"],
|
||||
"abcd",
|
||||
&[(2, 0, 4), (0, 1, 4), (1, 2, 4),]
|
||||
),
|
||||
t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]),
|
||||
t!(
|
||||
over100,
|
||||
&["ab", "ba"],
|
||||
"abababa",
|
||||
&[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),]
|
||||
),
|
||||
t!(
|
||||
over200,
|
||||
&["foo", "foo"],
|
||||
"foobarfoo",
|
||||
&[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),]
|
||||
),
|
||||
t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
|
||||
t!(
|
||||
over310,
|
||||
&["", ""],
|
||||
"a",
|
||||
&[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),]
|
||||
),
|
||||
t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]),
|
||||
t!(
|
||||
over330,
|
||||
&["", "a", ""],
|
||||
"a",
|
||||
&[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),]
|
||||
),
|
||||
t!(
|
||||
over340,
|
||||
&["a", "", ""],
|
||||
"a",
|
||||
&[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),]
|
||||
),
|
||||
t!(
|
||||
over350,
|
||||
&["", "", "a"],
|
||||
"a",
|
||||
&[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),]
|
||||
),
|
||||
];
|
||||
|
||||
/// Regression tests that are applied to all Aho-Corasick combinations.
|
||||
@ -376,24 +359,32 @@ const OVERLAPPING: &'static [SearchTest] = &[
|
||||
/// If regression tests are needed for specific match semantics, then add them
|
||||
/// to the appropriate group above.
|
||||
const REGRESSION: &'static [SearchTest] = &[
|
||||
t!(regression010, &["inf", "ind"], "infind", &[
|
||||
(0, 0, 3), (1, 3, 6),
|
||||
]),
|
||||
t!(regression020, &["ind", "inf"], "infind", &[
|
||||
(1, 0, 3), (0, 3, 6),
|
||||
]),
|
||||
t!(regression030, &["libcore/", "libstd/"], "libcore/char/methods.rs", &[
|
||||
(0, 0, 8),
|
||||
]),
|
||||
t!(regression040, &["libstd/", "libcore/"], "libcore/char/methods.rs", &[
|
||||
(1, 0, 8),
|
||||
]),
|
||||
t!(regression050, &["\x00\x00\x01", "\x00\x00\x00"], "\x00\x00\x00", &[
|
||||
(1, 0, 3),
|
||||
]),
|
||||
t!(regression060, &["\x00\x00\x00", "\x00\x00\x01"], "\x00\x00\x00", &[
|
||||
(0, 0, 3),
|
||||
]),
|
||||
t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
|
||||
t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
|
||||
t!(
|
||||
regression030,
|
||||
&["libcore/", "libstd/"],
|
||||
"libcore/char/methods.rs",
|
||||
&[(0, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
regression040,
|
||||
&["libstd/", "libcore/"],
|
||||
"libcore/char/methods.rs",
|
||||
&[(1, 0, 8),]
|
||||
),
|
||||
t!(
|
||||
regression050,
|
||||
&["\x00\x00\x01", "\x00\x00\x00"],
|
||||
"\x00\x00\x00",
|
||||
&[(1, 0, 3),]
|
||||
),
|
||||
t!(
|
||||
regression060,
|
||||
&["\x00\x00\x00", "\x00\x00\x01"],
|
||||
"\x00\x00\x00",
|
||||
&[(0, 0, 3),]
|
||||
),
|
||||
];
|
||||
|
||||
// Now define a test for each combination of things above that we want to run.
|
||||
@ -424,10 +415,8 @@ macro_rules! testconfig {
|
||||
#[test]
|
||||
fn $name() {
|
||||
run_search_tests($collection, |test| {
|
||||
let buf = io::BufReader::with_capacity(
|
||||
1,
|
||||
test.haystack.as_bytes(),
|
||||
);
|
||||
let buf =
|
||||
io::BufReader::with_capacity(1, test.haystack.as_bytes());
|
||||
let mut builder = AhoCorasickBuilder::new();
|
||||
$with(&mut builder);
|
||||
builder
|
||||
@ -461,46 +450,86 @@ macro_rules! testcombo {
|
||||
use super::*;
|
||||
|
||||
testconfig!(nfa_default, $collection, $kind, |_| ());
|
||||
testconfig!(nfa_no_prefilter, $collection, $kind,
|
||||
testconfig!(
|
||||
nfa_no_prefilter,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.prefilter(false);
|
||||
});
|
||||
testconfig!(nfa_all_sparse, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
nfa_all_sparse,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dense_depth(0);
|
||||
});
|
||||
testconfig!(nfa_all_dense, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
nfa_all_dense,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dense_depth(usize::MAX);
|
||||
});
|
||||
testconfig!(dfa_default, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
dfa_default,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true);
|
||||
});
|
||||
testconfig!(dfa_no_prefilter, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
dfa_no_prefilter,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).prefilter(false);
|
||||
});
|
||||
testconfig!(dfa_all_sparse, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
dfa_all_sparse,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).dense_depth(0);
|
||||
});
|
||||
testconfig!(dfa_all_dense, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
dfa_all_dense,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).dense_depth(usize::MAX);
|
||||
});
|
||||
testconfig!(dfa_no_byte_class, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
dfa_no_byte_class,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).byte_classes(false);
|
||||
});
|
||||
testconfig!(dfa_no_premultiply, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
dfa_no_premultiply,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).premultiply(false);
|
||||
});
|
||||
testconfig!(dfa_no_byte_class_no_premultiply, $collection, $kind,
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
dfa_no_byte_class_no_premultiply,
|
||||
$collection,
|
||||
$kind,
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).byte_classes(false).premultiply(false);
|
||||
});
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -509,7 +538,9 @@ macro_rules! testcombo {
|
||||
testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest);
|
||||
testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst);
|
||||
testcombo!(
|
||||
search_standard_nonoverlapping, AC_STANDARD_NON_OVERLAPPING, Standard
|
||||
search_standard_nonoverlapping,
|
||||
AC_STANDARD_NON_OVERLAPPING,
|
||||
Standard
|
||||
);
|
||||
|
||||
// Write out the overlapping combo by hand since there is only one of them.
|
||||
@ -525,49 +556,63 @@ testconfig!(
|
||||
search_standard_overlapping_nfa_all_sparse,
|
||||
AC_STANDARD_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dense_depth(0); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dense_depth(0);
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
overlapping,
|
||||
search_standard_overlapping_nfa_all_dense,
|
||||
AC_STANDARD_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dense_depth(usize::MAX); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dense_depth(usize::MAX);
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
overlapping,
|
||||
search_standard_overlapping_dfa_default,
|
||||
AC_STANDARD_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dfa(true); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true);
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
overlapping,
|
||||
search_standard_overlapping_dfa_all_sparse,
|
||||
AC_STANDARD_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(0); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).dense_depth(0);
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
overlapping,
|
||||
search_standard_overlapping_dfa_all_dense,
|
||||
AC_STANDARD_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(usize::MAX); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).dense_depth(usize::MAX);
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
overlapping,
|
||||
search_standard_overlapping_dfa_no_byte_class,
|
||||
AC_STANDARD_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dfa(true).byte_classes(false); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).byte_classes(false);
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
overlapping,
|
||||
search_standard_overlapping_dfa_no_premultiply,
|
||||
AC_STANDARD_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dfa(true).premultiply(false); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true).premultiply(false);
|
||||
}
|
||||
);
|
||||
testconfig!(
|
||||
overlapping,
|
||||
@ -594,7 +639,9 @@ testconfig!(
|
||||
search_standard_stream_dfa_default,
|
||||
AC_STANDARD_NON_OVERLAPPING,
|
||||
Standard,
|
||||
|b: &mut AhoCorasickBuilder| { b.dfa(true); }
|
||||
|b: &mut AhoCorasickBuilder| {
|
||||
b.dfa(true);
|
||||
}
|
||||
);
|
||||
|
||||
#[test]
|
||||
@ -669,8 +716,8 @@ fn state_id_too_small() {
|
||||
}
|
||||
}
|
||||
}
|
||||
let result = AhoCorasickBuilder::new()
|
||||
.build_with_size::<u8, _, _>(&patterns);
|
||||
let result =
|
||||
AhoCorasickBuilder::new().build_with_size::<u8, _, _>(&patterns);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
@ -680,7 +727,8 @@ fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
|
||||
) {
|
||||
let get_match_triples =
|
||||
|matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
|
||||
matches.into_iter()
|
||||
matches
|
||||
.into_iter()
|
||||
.map(|m| (m.pattern(), m.start(), m.end()))
|
||||
.collect()
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user