style: switch to rustfmt

And enable a CI check for it.
This commit is contained in:
Andrew Gallant 2019-07-04 15:02:20 -04:00
parent fa956e6062
commit bb89108f8d
18 changed files with 764 additions and 594 deletions

View File

@ -58,7 +58,7 @@ struct Args {
impl Args {
fn parse() -> Result<Args> {
use clap::{App, Arg, crate_authors, crate_version};
use clap::{crate_authors, crate_version, App, Arg};
let parsed = App::new("Search using aho-corasick")
.author(crate_authors!())
@ -66,18 +66,26 @@ impl Args {
.max_term_width(100)
.arg(Arg::with_name("dictionary").required(true))
.arg(Arg::with_name("haystack").required(true))
.arg(Arg::with_name("kind")
.long("kind")
.possible_values(&[
"standard", "leftmost-first", "leftmost-longest",
])
.default_value("standard"))
.arg(Arg::with_name("ascii-case-insensitive")
.long("ascii-case-insensitive")
.short("i"))
.arg(Arg::with_name("dense-depth")
.long("dense-depth")
.default_value("2"))
.arg(
Arg::with_name("kind")
.long("kind")
.possible_values(&[
"standard",
"leftmost-first",
"leftmost-longest",
])
.default_value("standard"),
)
.arg(
Arg::with_name("ascii-case-insensitive")
.long("ascii-case-insensitive")
.short("i"),
)
.arg(
Arg::with_name("dense-depth")
.long("dense-depth")
.default_value("2"),
)
.arg(Arg::with_name("dfa").long("dfa").short("d"))
.arg(Arg::with_name("prefilter").long("prefilter").short("f"))
.arg(Arg::with_name("classes").long("classes").short("c"))
@ -85,12 +93,9 @@ impl Args {
.arg(Arg::with_name("no-search").long("no-search"))
.get_matches();
let dictionary = PathBuf::from(
parsed.value_of_os("dictionary").unwrap()
);
let haystack = PathBuf::from(
parsed.value_of_os("haystack").unwrap()
);
let dictionary =
PathBuf::from(parsed.value_of_os("dictionary").unwrap());
let haystack = PathBuf::from(parsed.value_of_os("haystack").unwrap());
let match_kind = match parsed.value_of("kind").unwrap() {
"standard" => MatchKind::Standard,
"leftmost-first" => MatchKind::LeftmostFirst,
@ -100,7 +105,10 @@ impl Args {
let dense_depth = parsed.value_of("dense-depth").unwrap().parse()?;
Ok(Args {
dictionary, haystack, match_kind, dense_depth,
dictionary,
haystack,
match_kind,
dense_depth,
ascii_casei: parsed.is_present("ascii-case-insensitive"),
dfa: parsed.is_present("dfa"),
prefilter: parsed.is_present("prefilter"),

View File

@ -30,10 +30,8 @@ fn define_aho_corasick<B: AsRef<[u8]>>(
count: usize,
patterns: Vec<B>,
) {
let patterns: Vec<Vec<u8>> = patterns
.into_iter()
.map(|b| b.as_ref().to_vec())
.collect();
let patterns: Vec<Vec<u8>> =
patterns.into_iter().map(|b| b.as_ref().to_vec()).collect();
let haystack = corpus.to_vec();
let name = format!("nfa/{}", bench_name);
@ -44,9 +42,7 @@ fn define_aho_corasick<B: AsRef<[u8]>>(
let haystack = corpus.to_vec();
let name = format!("dfa/{}", bench_name);
let aut = AhoCorasickBuilder::new()
.dfa(true)
.build(patterns.clone());
let aut = AhoCorasickBuilder::new().dfa(true).build(patterns.clone());
define(c, group_name, &name, corpus, move |b| {
b.iter(|| assert_eq!(count, aut.find_iter(&haystack).count()));
});
@ -63,13 +59,12 @@ fn define_aho_corasick_dfa<B, F>(
count: usize,
patterns: Vec<B>,
find_count: F,
) where B: AsRef<[u8]>,
F: 'static + Clone + Fn(&AhoCorasick, &[u8]) -> usize
) where
B: AsRef<[u8]>,
F: 'static + Clone + Fn(&AhoCorasick, &[u8]) -> usize,
{
let patterns: Vec<Vec<u8>> = patterns
.into_iter()
.map(|b| b.as_ref().to_vec())
.collect();
let patterns: Vec<Vec<u8>> =
patterns.into_iter().map(|b| b.as_ref().to_vec()).collect();
let counter = find_count.clone();
let haystack = corpus.to_vec();
@ -134,7 +129,6 @@ fn define(
corpus: &[u8],
bench: impl FnMut(&mut Bencher) + 'static,
) {
let tput = Throughput::Bytes(corpus.len() as u32);
let benchmark = Benchmark::new(bench_name, bench)
.throughput(tput)
@ -152,7 +146,6 @@ fn define_long(
corpus: &[u8],
bench: impl FnMut(&mut Bencher) + 'static,
) {
let tput = Throughput::Bytes(corpus.len() as u32);
let benchmark = Benchmark::new(bench_name, bench)
.throughput(tput)

View File

@ -1,7 +1,7 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use criterion::{Criterion, black_box};
use criterion::{black_box, Criterion};
use input::{words_5000, words_15000};
use input::{words_15000, words_5000};
use {define, define_long};
/// Benchmarks that measure the performance of constructing an Aho-Corasick
@ -10,15 +10,21 @@ pub fn all(c: &mut Criterion) {
define_build::<String>(c, false, "empty", vec![]);
define_build(c, false, "onebyte", vec!["a"]);
define_build(c, false, "twobytes", vec!["a", "b"]);
define_build(c, false, "many-short", vec![
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BA", "BaK", "Bak", "Ba",
"HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh",
"JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat",
"aDL", "aDl", "adL", "adl", "bAK", "bAk", "bA", "baK", "bak", "ba",
"hOL", "hOl", "hoL", "hol", "iRE", "iRe", "irE", "ire", "jOH", "jOh",
"joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt", "waT", "wat",
"ſHE", "ſHe", "ſhE", "ſhe",
]);
define_build(
c,
false,
"many-short",
vec![
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BA", "BaK", "Bak",
"Ba", "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire",
"JOH", "JOh", "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT",
"WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK", "bAk",
"bA", "baK", "bak", "ba", "hOL", "hOl", "hoL", "hol", "iRE",
"iRe", "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe",
"shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE",
"ſhe",
],
);
define_build(c, true, "5000words", words_5000());
define_build(c, true, "15000words", words_15000());
}
@ -29,10 +35,8 @@ fn define_build<B: AsRef<[u8]>>(
bench_name: &str,
patterns: Vec<B>,
) {
let patterns: Vec<Vec<u8>> = patterns
.into_iter()
.map(|b| b.as_ref().to_vec())
.collect();
let patterns: Vec<Vec<u8>> =
patterns.into_iter().map(|b| b.as_ref().to_vec()).collect();
let pats = patterns.clone();
let name = format!("nfa/{}", bench_name);

View File

@ -26,30 +26,52 @@ fn memchr_optimizations(c: &mut Criterion) {
define_random(c, "twobytes/nomatch", 0, vec!["\x00", "\x01"]);
define_random(c, "threebytes/match", 352, vec!["a", "\x00", "\x01"]);
define_random(c, "threebytes/nomatch", 0, vec!["\x00", "\x01", "\x02"]);
define_random(c, "fourbytes/match", 352, vec![
"a", "\x00", "\x01", "\x02",
]);
define_random(c, "fourbytes/nomatch", 0, vec![
"\x00", "\x01", "\x02", "\x03",
]);
define_random(c, "fivebytes/match", 352, vec![
"a", "\x00", "\x01", "\x02", "\x03",
]);
define_random(c, "fivebytes/nomatch", 0, vec![
"\x00", "\x01", "\x02", "\x03", "\x04",
]);
define_random(
c,
"fourbytes/match",
352,
vec!["a", "\x00", "\x01", "\x02"],
);
define_random(
c,
"fourbytes/nomatch",
0,
vec!["\x00", "\x01", "\x02", "\x03"],
);
define_random(
c,
"fivebytes/match",
352,
vec!["a", "\x00", "\x01", "\x02", "\x03"],
);
define_random(
c,
"fivebytes/nomatch",
0,
vec!["\x00", "\x01", "\x02", "\x03", "\x04"],
);
}
/// Some miscellaneous benchmarks on random data.
fn misc(c: &mut Criterion) {
define_random(c, "ten-one-prefix", 0, vec![
"zacdef", "zbcdef", "zccdef", "zdcdef", "zecdef",
"zfcdef", "zgcdef", "zhcdef", "zicdef", "zjcdef",
]);
define_random(c, "ten-diff-prefix", 0, vec![
"abcdef", "bcdefg", "cdefgh", "defghi", "efghij",
"fghijk", "ghijkl", "hijklm", "ijklmn", "jklmno",
]);
define_random(
c,
"ten-one-prefix",
0,
vec![
"zacdef", "zbcdef", "zccdef", "zdcdef", "zecdef", "zfcdef",
"zgcdef", "zhcdef", "zicdef", "zjcdef",
],
);
define_random(
c,
"ten-diff-prefix",
0,
vec![
"abcdef", "bcdefg", "cdefgh", "defghi", "efghij", "fghijk",
"ghijkl", "hijklm", "ijklmn", "jklmno",
],
);
}
/// Various benchmarks using a large pattern set.
@ -60,23 +82,47 @@ fn many_patterns(c: &mut Criterion) {
let group = "random10x/standard";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, Standard, 0, words_5000(),
c,
group,
name,
RANDOM10X,
Standard,
0,
words_5000(),
|ac, haystack| ac.find_iter(haystack).count(),
);
let group = "random10x/leftmost-first";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, LeftmostFirst, 0, words_5000(),
c,
group,
name,
RANDOM10X,
LeftmostFirst,
0,
words_5000(),
|ac, haystack| ac.find_iter(haystack).count(),
);
let group = "random10x/leftmost-longest";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, LeftmostLongest, 0, words_5000(),
c,
group,
name,
RANDOM10X,
LeftmostLongest,
0,
words_5000(),
|ac, haystack| ac.find_iter(haystack).count(),
);
let group = "random10x/overlapping";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, Standard, 0, words_5000(),
c,
group,
name,
RANDOM10X,
Standard,
0,
words_5000(),
|ac, haystack| ac.find_overlapping_iter(haystack).count(),
);
@ -84,23 +130,47 @@ fn many_patterns(c: &mut Criterion) {
let group = "random10x/standard";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, Standard, 0, words_100(),
c,
group,
name,
RANDOM10X,
Standard,
0,
words_100(),
|ac, haystack| ac.find_iter(haystack).count(),
);
let group = "random10x/leftmost-first";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, LeftmostFirst, 0, words_100(),
c,
group,
name,
RANDOM10X,
LeftmostFirst,
0,
words_100(),
|ac, haystack| ac.find_iter(haystack).count(),
);
let group = "random10x/leftmost-longest";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, LeftmostLongest, 0, words_100(),
c,
group,
name,
RANDOM10X,
LeftmostLongest,
0,
words_100(),
|ac, haystack| ac.find_iter(haystack).count(),
);
let group = "random10x/overlapping";
define_aho_corasick_dfa(
c, group, name, RANDOM10X, Standard, 0, words_100(),
c,
group,
name,
RANDOM10X,
Standard,
0,
words_100(),
|ac, haystack| ac.find_overlapping_iter(haystack).count(),
);
}
@ -111,7 +181,5 @@ fn define_random<B: AsRef<[u8]>>(
count: usize,
patterns: Vec<B>,
) {
define_aho_corasick(
c, "random", bench_name, RANDOM, count, patterns,
);
define_aho_corasick(c, "random", bench_name, RANDOM, count, patterns);
}

View File

@ -34,6 +34,11 @@ fn define_same<B: AsRef<[u8]>>(
) {
let corpus = "z".repeat(10_000);
define_aho_corasick(
c, "same", bench_name, corpus.as_bytes(), count, patterns,
c,
"same",
bench_name,
corpus.as_bytes(),
count,
patterns,
);
}

View File

@ -1,7 +1,7 @@
use criterion::Criterion;
use input::*;
use define_aho_corasick;
use input::*;
/// These benchmarks test various words on natural language text.
///
@ -11,30 +11,53 @@ use define_aho_corasick;
pub fn all(c: &mut Criterion) {
define_sherlock(c, "name/alt1", 158, vec!["Sherlock", "Street"]);
define_sherlock(c, "name/alt2", 558, vec!["Sherlock", "Holmes"]);
define_sherlock(c, "name/alt3", 740, vec![
"Sherlock", "Holmes", "Watson", "Irene", "Adler", "John", "Baker",
]);
define_sherlock(
c,
"name/alt3",
740,
vec![
"Sherlock", "Holmes", "Watson", "Irene", "Adler", "John", "Baker",
],
);
define_sherlock(c, "name/alt4", 582, vec!["Sher", "Hol"]);
define_sherlock(c, "name/alt5", 639, vec!["Sherlock", "Holmes", "Watson"]);
define_sherlock(c, "name/nocase1", 1764, vec![
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BA", "BaK", "Bak", "Ba",
"HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire", "JOH", "JOh",
"JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT", "WAt", "WaT", "Wat",
"aDL", "aDl", "adL", "adl", "bAK", "bAk", "bA", "baK", "bak", "ba",
"hOL", "hOl", "hoL", "hol", "iRE", "iRe", "irE", "ire", "jOH", "jOh",
"joH", "joh", "sHE", "sHe", "shE", "she", "wAT", "wAt", "waT", "wat",
"ſHE", "ſHe", "ſhE", "ſhe",
]);
define_sherlock(c, "name/nocase2", 1307, vec![
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "hOL", "hOl",
"hoL", "hol", "sHE", "sHe", "shE", "she", "ſHE", "ſHe", "ſhE", "ſhe",
]);
define_sherlock(c, "name/nocase3", 1442, vec![
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "WAT", "WAt",
"WaT", "Wat", "hOL", "hOl", "hoL", "hol", "sHE", "sHe", "shE", "she",
"wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE", "ſhe",
]);
define_sherlock(
c,
"name/nocase1",
1764,
vec![
"ADL", "ADl", "AdL", "Adl", "BAK", "BAk", "BA", "BaK", "Bak",
"Ba", "HOL", "HOl", "HoL", "Hol", "IRE", "IRe", "IrE", "Ire",
"JOH", "JOh", "JoH", "Joh", "SHE", "SHe", "ShE", "She", "WAT",
"WAt", "WaT", "Wat", "aDL", "aDl", "adL", "adl", "bAK", "bAk",
"bA", "baK", "bak", "ba", "hOL", "hOl", "hoL", "hol", "iRE",
"iRe", "irE", "ire", "jOH", "jOh", "joH", "joh", "sHE", "sHe",
"shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE",
"ſhe",
],
);
define_sherlock(
c,
"name/nocase2",
1307,
vec![
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "hOL",
"hOl", "hoL", "hol", "sHE", "sHe", "shE", "she", "ſHE", "ſHe",
"ſhE", "ſhe",
],
);
define_sherlock(
c,
"name/nocase3",
1442,
vec![
"HOL", "HOl", "HoL", "Hol", "SHE", "SHe", "ShE", "She", "WAT",
"WAt", "WaT", "Wat", "hOL", "hOl", "hoL", "hol", "sHE", "sHe",
"shE", "she", "wAT", "wAt", "waT", "wat", "ſHE", "ſHe", "ſhE",
"ſhe",
],
);
define_sherlock(c, "5000words", 567, words_5000());
}

View File

@ -5,6 +5,10 @@ set -ex
cargo build --verbose
cargo doc --verbose
cargo test --verbose
if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then
rustup component add rustfmt
cargo fmt -- --check
fi
if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
cargo bench --verbose --manifest-path bench/Cargo.toml -- --test
fi

2
rustfmt.toml Normal file
View File

@ -0,0 +1,2 @@
max_width = 79
use_small_heuristics = "max"

View File

@ -115,11 +115,10 @@ impl AhoCorasick {
/// ]);
/// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern()));
/// ```
pub fn new<I, P>(
patterns: I,
) -> AhoCorasick
where I: IntoIterator<Item=P>,
P: AsRef<[u8]>
pub fn new<I, P>(patterns: I) -> AhoCorasick
where
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
AhoCorasickBuilder::new().build(patterns)
}
@ -151,10 +150,9 @@ impl AhoCorasick {
/// ]);
/// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern()));
/// ```
pub fn new_auto_configured<B>(
patterns: &[B],
) -> AhoCorasick
where B: AsRef<[u8]>
pub fn new_auto_configured<B>(patterns: &[B]) -> AhoCorasick
where
B: AsRef<[u8]>,
{
AhoCorasickBuilder::new().auto_configure(patterns).build(patterns)
}
@ -213,7 +211,10 @@ impl<S: StateID> AhoCorasick<S> {
let mut prestate = PrefilterState::new(self.max_pattern_len());
let mut start = self.imp.start_state();
self.imp.earliest_find_at(
&mut prestate, haystack.as_ref(), 0, &mut start,
&mut prestate,
haystack.as_ref(),
0,
&mut start,
)
}
@ -425,15 +426,13 @@ impl<S: StateID> AhoCorasick<S> {
/// let result = ac.replace_all(haystack, &["x", "y", "z"]);
/// assert_eq!("x the z to the xage", result);
/// ```
pub fn replace_all<B>(
&self,
haystack: &str,
replace_with: &[B],
) -> String
where B: AsRef<str>
pub fn replace_all<B>(&self, haystack: &str, replace_with: &[B]) -> String
where
B: AsRef<str>,
{
assert_eq!(
replace_with.len(), self.pattern_count(),
replace_with.len(),
self.pattern_count(),
"replace_all requires a replacement for every pattern \
in the automaton"
);
@ -479,10 +478,12 @@ impl<S: StateID> AhoCorasick<S> {
haystack: &[u8],
replace_with: &[B],
) -> Vec<u8>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
assert_eq!(
replace_with.len(), self.pattern_count(),
replace_with.len(),
self.pattern_count(),
"replace_all_bytes requires a replacement for every pattern \
in the automaton"
);
@ -528,7 +529,8 @@ impl<S: StateID> AhoCorasick<S> {
haystack: &str,
dst: &mut String,
mut replace_with: F,
) where F: FnMut(&Match, &str, &mut String) -> bool
) where
F: FnMut(&Match, &str, &mut String) -> bool,
{
let mut last_match = 0;
for mat in self.find_iter(haystack) {
@ -573,7 +575,8 @@ impl<S: StateID> AhoCorasick<S> {
haystack: &[u8],
dst: &mut Vec<u8>,
mut replace_with: F,
) where F: FnMut(&Match, &[u8], &mut Vec<u8>) -> bool
) where
F: FnMut(&Match, &[u8], &mut Vec<u8>) -> bool,
{
let mut last_match = 0;
for mat in self.find_iter(haystack) {
@ -710,12 +713,14 @@ impl<S: StateID> AhoCorasick<S> {
wtr: W,
replace_with: &[B],
) -> io::Result<()>
where R: io::Read,
W: io::Write,
B: AsRef<[u8]>
where
R: io::Read,
W: io::Write,
B: AsRef<[u8]>,
{
assert_eq!(
replace_with.len(), self.pattern_count(),
replace_with.len(),
self.pattern_count(),
"stream_replace_all requires a replacement for every pattern \
in the automaton"
);
@ -795,9 +800,10 @@ impl<S: StateID> AhoCorasick<S> {
mut wtr: W,
mut replace_with: F,
) -> io::Result<()>
where R: io::Read,
W: io::Write,
F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>
where
R: io::Read,
W: io::Write,
F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>,
{
let mut it = StreamChunkIter::new(self, rdr);
while let Some(result) = it.next() {
@ -1041,16 +1047,20 @@ impl<S: StateID> Imp<S> {
match_index: &mut usize,
) -> Option<Match> {
match *self {
Imp::NFA(ref nfa) => {
nfa.overlapping_find_at(
prestate, haystack, at, state_id, match_index,
)
}
Imp::DFA(ref dfa) => {
dfa.overlapping_find_at(
prestate, haystack, at, state_id, match_index,
)
}
Imp::NFA(ref nfa) => nfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
Imp::DFA(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
}
}
@ -1081,12 +1091,8 @@ impl<S: StateID> Imp<S> {
state_id: &mut S,
) -> Option<Match> {
match *self {
Imp::NFA(ref nfa) => {
nfa.find_at(prestate, haystack, at, state_id)
}
Imp::DFA(ref dfa) => {
dfa.find_at(prestate, haystack, at, state_id)
}
Imp::NFA(ref nfa) => nfa.find_at(prestate, haystack, at, state_id),
Imp::DFA(ref dfa) => dfa.find_at(prestate, haystack, at, state_id),
}
}
}
@ -1133,7 +1139,10 @@ impl<'a, 'b, S: StateID> Iterator for FindIter<'a, 'b, S> {
}
let mut start = self.start;
let result = self.fsm.find_at(
&mut self.prestate, self.haystack, self.pos, &mut start,
&mut self.prestate,
self.haystack,
self.pos,
&mut start,
);
let mat = match result {
None => return None,
@ -1243,9 +1252,7 @@ pub struct StreamFindIter<'a, R, S: 'a + StateID> {
impl<'a, R: io::Read, S: StateID> StreamFindIter<'a, R, S> {
fn new(ac: &'a AhoCorasick<S>, rdr: R) -> StreamFindIter<'a, R, S> {
StreamFindIter {
it: StreamChunkIter::new(ac, rdr),
}
StreamFindIter { it: StreamChunkIter::new(ac, rdr) }
}
}
@ -1492,12 +1499,10 @@ impl AhoCorasickBuilder {
/// .build(patterns);
/// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern()));
/// ```
pub fn build<I, P>(
&self,
patterns: I,
) -> AhoCorasick
where I: IntoIterator<Item=P>,
P: AsRef<[u8]>
pub fn build<I, P>(&self, patterns: I) -> AhoCorasick
where
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
// The builder only returns an error if the chosen state ID
// representation is too small to fit all of the given patterns. In
@ -1560,19 +1565,19 @@ impl AhoCorasickBuilder {
&self,
patterns: I,
) -> Result<AhoCorasick<S>>
where S: StateID,
I: IntoIterator<Item=P>,
P: AsRef<[u8]>
where
S: StateID,
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
let nfa = self.nfa_builder.build(patterns)?;
let match_kind = nfa.match_kind().clone();
let imp =
if self.dfa {
let dfa = self.dfa_builder.build(&nfa)?;
Imp::DFA(dfa)
} else {
Imp::NFA(nfa)
};
let imp = if self.dfa {
let dfa = self.dfa_builder.build(&nfa)?;
Imp::DFA(dfa)
} else {
Imp::NFA(nfa)
};
Ok(AhoCorasick { imp, match_kind })
}
@ -2002,7 +2007,7 @@ impl MatchKind {
pub(crate) fn is_leftmost(&self) -> bool {
*self == MatchKind::LeftmostFirst
|| *self == MatchKind::LeftmostLongest
|| *self == MatchKind::LeftmostLongest
}
pub(crate) fn is_leftmost_first(&self) -> bool {

View File

@ -1,6 +1,6 @@
use ahocorasick::MatchKind;
use prefilter::{Prefilter, PrefilterState};
use state_id::{StateID, dead_id, fail_id};
use state_id::{dead_id, fail_id, StateID};
use Match;
// NOTE: This trait was essentially copied from regex-automata, with some
@ -142,12 +142,14 @@ pub trait Automaton {
) -> Option<Match> {
if let Some(pre) = self.prefilter() {
self.standard_find_at_imp(
prestate, Some(pre), haystack, at, state_id,
prestate,
Some(pre),
haystack,
at,
state_id,
)
} else {
self.standard_find_at_imp(
prestate, None, haystack, at, state_id,
)
self.standard_find_at_imp(prestate, None, haystack, at, state_id)
}
}
@ -235,12 +237,14 @@ pub trait Automaton {
) -> Option<Match> {
if let Some(pre) = self.prefilter() {
self.leftmost_find_at_imp(
prestate, Some(pre), haystack, at, state_id,
prestate,
Some(pre),
haystack,
at,
state_id,
)
} else {
self.leftmost_find_at_imp(
prestate, None, haystack, at, state_id,
)
self.leftmost_find_at_imp(prestate, None, haystack, at, state_id)
}
}
@ -337,11 +341,7 @@ pub trait Automaton {
if *match_index < match_count {
// This is guaranteed to return a match since
// match_index < match_count.
let result = self.get_match(
*state_id,
*match_index,
at,
);
let result = self.get_match(*state_id, *match_index, at);
debug_assert!(result.is_some(), "must be a match");
*match_index += 1;
return result;

View File

@ -3,7 +3,7 @@ use std::io;
use std::ptr;
/// The default buffer capacity that we use for the stream buffer.
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
/// A fairly simple roll buffer for supporting stream searches.
///
@ -52,11 +52,7 @@ impl Buffer {
// TODO: It would be good to find a way to test the streaming
// implementation with the minimal buffer size.
let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
Buffer {
buf: vec![0; capacity],
min,
end: 0,
}
Buffer { buf: vec![0; capacity], min, end: 0 }
}
/// Return the contents of this buffer.
@ -110,7 +106,8 @@ impl Buffer {
/// This should only be called when the entire contents of this buffer have
/// been searched.
pub fn roll(&mut self) {
let roll_start = self.end
let roll_start = self
.end
.checked_sub(self.min)
.expect("buffer capacity should be bigger than minimum amount");
let roll_len = self.min;

View File

@ -4,9 +4,9 @@ use ahocorasick::MatchKind;
use automaton::Automaton;
use classes::ByteClasses;
use error::Result;
use nfa::{NFA, PatternID, PatternLength};
use nfa::{PatternID, PatternLength, NFA};
use prefilter::{Prefilter, PrefilterObj, PrefilterState};
use state_id::{StateID, dead_id, fail_id, premultiply_overflow_error};
use state_id::{dead_id, fail_id, premultiply_overflow_error, StateID};
use Match;
#[derive(Clone, Debug)]
@ -57,26 +57,34 @@ impl<S: StateID> DFA<S> {
match_index: &mut usize,
) -> Option<Match> {
match *self {
DFA::Standard(ref dfa) => {
dfa.overlapping_find_at(
prestate, haystack, at, state_id, match_index,
)
}
DFA::ByteClass(ref dfa) => {
dfa.overlapping_find_at(
prestate, haystack, at, state_id, match_index,
)
}
DFA::Premultiplied(ref dfa) => {
dfa.overlapping_find_at(
prestate, haystack, at, state_id, match_index,
)
}
DFA::PremultipliedByteClass(ref dfa) => {
dfa.overlapping_find_at(
prestate, haystack, at, state_id, match_index,
)
}
DFA::Standard(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::ByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at(
prestate,
haystack,
at,
state_id,
match_index,
),
}
}
@ -562,10 +570,9 @@ impl<S: StateID> Repr<S> {
/// Computes the total amount of heap used by this NFA in bytes.
fn calculate_size(&mut self) {
let mut size =
(self.trans.len() * size_of::<S>())
+ (self.matches.len() *
size_of::<Vec<(PatternID, PatternLength)>>());
let mut size = (self.trans.len() * size_of::<S>())
+ (self.matches.len()
* size_of::<Vec<(PatternID, PatternLength)>>());
for state_matches in &self.matches {
size +=
state_matches.len() * size_of::<(PatternID, PatternLength)>();
@ -584,10 +591,7 @@ pub struct Builder {
impl Builder {
/// Create a new builder for a DFA.
pub fn new() -> Builder {
Builder {
premultiply: true,
byte_classes: true,
}
Builder { premultiply: true, byte_classes: true }
}
/// Build a DFA from the given NFA.
@ -596,12 +600,11 @@ impl Builder {
/// representation size. This can only happen when state ids are
/// premultiplied (which is enabled by default).
pub fn build<S: StateID>(&self, nfa: &NFA<S>) -> Result<DFA<S>> {
let byte_classes =
if self.byte_classes {
nfa.byte_classes().clone()
} else {
ByteClasses::singletons()
};
let byte_classes = if self.byte_classes {
nfa.byte_classes().clone()
} else {
ByteClasses::singletons()
};
let alphabet_len = byte_classes.alphabet_len();
let trans = vec![fail_id(); alphabet_len * nfa.state_len()];
let matches = vec![vec![]; nfa.state_len()];

View File

@ -33,7 +33,7 @@ pub enum ErrorKind {
max: usize,
/// The maximum ID required by premultiplication.
requested_max: usize,
}
},
}
impl Error {
@ -70,15 +70,13 @@ impl error::Error for Error {
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.kind {
ErrorKind::StateIDOverflow { max } => {
write!(
f,
"building the automaton failed because it required \
building more states that can be identified, where the \
maximum ID for the chosen representation is {}",
max,
)
}
ErrorKind::StateIDOverflow { max } => write!(
f,
"building the automaton failed because it required \
building more states that can be identified, where the \
maximum ID for the chosen representation is {}",
max,
),
ErrorKind::PremultiplyOverflow { max, requested_max } => {
if max == requested_max {
write!(

View File

@ -196,8 +196,8 @@ extern crate doc_comment;
doctest!("../README.md");
pub use ahocorasick::{
AhoCorasick, AhoCorasickBuilder, MatchKind,
FindIter, FindOverlappingIter, StreamFindIter,
AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind,
StreamFindIter,
};
pub use error::{Error, ErrorKind};
pub use state_id::StateID;
@ -205,11 +205,11 @@ pub use state_id::StateID;
mod ahocorasick;
mod automaton;
mod buffer;
mod classes;
mod dfa;
mod error;
mod classes;
mod prefilter;
mod nfa;
mod prefilter;
mod state_id;
#[cfg(test)]
mod tests;
@ -287,10 +287,6 @@ impl Match {
#[inline]
fn increment(&self, by: usize) -> Match {
Match {
pattern: self.pattern,
len: self.len,
end: self.end + by,
}
Match { pattern: self.pattern, len: self.len, end: self.end + by }
}
}

View File

@ -1,14 +1,14 @@
use std::collections::VecDeque;
use std::cmp;
use std::collections::VecDeque;
use std::fmt;
use std::mem::size_of;
use ahocorasick::MatchKind;
use automaton::Automaton;
use classes::{ByteClasses, ByteClassBuilder};
use classes::{ByteClassBuilder, ByteClasses};
use error::Result;
use prefilter::{self, Prefilter, PrefilterObj};
use state_id::{StateID, dead_id, fail_id, usize_to_state_id};
use state_id::{dead_id, fail_id, usize_to_state_id, StateID};
use Match;
/// The identifier for a pattern, which is simply the position of the pattern
@ -173,9 +173,8 @@ impl<S: StateID> NFA<S> {
}
fn copy_matches(&mut self, src: S, dst: S) {
let (src, dst) = get_two_mut(
&mut self.states, src.to_usize(), dst.to_usize(),
);
let (src, dst) =
get_two_mut(&mut self.states, src.to_usize(), dst.to_usize());
dst.matches.extend_from_slice(&src.matches);
}
@ -242,9 +241,11 @@ impl<S: StateID> Automaton for NFA<S> {
None => return None,
Some(state) => state,
};
state.matches
.get(match_index)
.map(|&(id, len)| Match { pattern: id, len, end })
state.matches.get(match_index).map(|&(id, len)| Match {
pattern: id,
len,
end,
})
}
fn match_count(&self, id: S) -> usize {
@ -291,7 +292,7 @@ pub struct State<S> {
impl<S: StateID> State<S> {
fn heap_bytes(&self) -> usize {
self.trans.heap_bytes()
+ (self.matches.len() * size_of::<(PatternID, PatternLength)>())
+ (self.matches.len() * size_of::<(PatternID, PatternLength)>())
}
fn add_match(&mut self, i: PatternID, len: PatternLength) {
@ -345,9 +346,7 @@ impl<S: StateID> Transitions<S> {
Transitions::Sparse(ref sparse) => {
sparse.len() * size_of::<(u8, S)>()
}
Transitions::Dense(ref dense) => {
dense.len() * size_of::<S>()
}
Transitions::Dense(ref dense) => dense.len() * size_of::<S>(),
}
}
@ -522,12 +521,10 @@ impl Builder {
Builder::default()
}
pub fn build<I, P, S: StateID>(
&self,
patterns: I,
) -> Result<NFA<S>>
where I: IntoIterator<Item=P>,
P: AsRef<[u8]>
pub fn build<I, P, S: StateID>(&self, patterns: I) -> Result<NFA<S>>
where
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
Compiler::new(self)?.compile(patterns)
}
@ -581,12 +578,10 @@ impl<'a, S: StateID> Compiler<'a, S> {
})
}
fn compile<I, P>(
mut self,
patterns: I,
) -> Result<NFA<S>>
where I: IntoIterator<Item=P>,
P: AsRef<[u8]>
fn compile<I, P>(mut self, patterns: I) -> Result<NFA<S>>
where
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
self.add_state(0)?; // the fail state, which is never entered
self.add_state(0)?; // the dead state, only used for leftmost
@ -610,19 +605,15 @@ impl<'a, S: StateID> Compiler<'a, S> {
/// automaton. Effectively, it creates the basic structure of the
/// automaton, where every pattern given has a path from the start state to
/// the end of the pattern.
fn build_trie<I, P>(
&mut self,
patterns: I,
) -> Result<()>
where I: IntoIterator<Item=P>,
P: AsRef<[u8]>
fn build_trie<I, P>(&mut self, patterns: I) -> Result<()>
where
I: IntoIterator<Item = P>,
P: AsRef<[u8]>,
{
'PATTERNS:
for (pati, pat) in patterns.into_iter().enumerate() {
'PATTERNS: for (pati, pat) in patterns.into_iter().enumerate() {
let pat = pat.as_ref();
self.nfa.max_pattern_len = cmp::max(
self.nfa.max_pattern_len, pat.len(),
);
self.nfa.max_pattern_len =
cmp::max(self.nfa.max_pattern_len, pat.len());
self.nfa.pattern_count += 1;
let mut prev = self.nfa.start_id;
@ -873,11 +864,7 @@ impl<'a, S: StateID> Compiler<'a, S> {
/// state.
fn start(nfa: &NFA<S>) -> QueuedState<S> {
let match_at_depth =
if nfa.start().is_match() {
Some(0)
} else {
None
};
if nfa.start().is_match() { Some(0) } else { None };
QueuedState { id: nfa.start_id, match_at_depth }
}
@ -909,8 +896,7 @@ impl<'a, S: StateID> Compiler<'a, S> {
None if nfa.state(next).is_match() => {}
None => return None,
}
let depth =
nfa.state(next).depth
let depth = nfa.state(next).depth
- nfa.state(next).get_longest_match_len().unwrap()
+ 1;
Some(depth)
@ -1173,7 +1159,8 @@ impl<S: StateID> fmt::Debug for NFA<S> {
});
writeln!(f, "{:04}: {}", id, trans.join(", "))?;
let matches: Vec<String> = s.matches
let matches: Vec<String> = s
.matches
.iter()
.map(|&(pattern_id, _)| pattern_id.to_string())
.collect();

View File

@ -6,7 +6,9 @@ use memchr::{memchr, memchr2, memchr3};
/// A prefilter describes the behavior of fast literal scanners for quickly
/// skipping past bytes in the haystack that we know cannot possibly
/// participate in a match.
pub trait Prefilter: Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug {
pub trait Prefilter:
Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug
{
/// Returns the next possible match candidate. This may yield false
/// positives, so callers must "confirm" a match starting at the position
/// returned. This, however, must never produce false negatives. That is,
@ -165,24 +167,16 @@ impl StartBytesBuilder {
}
match len {
0 => None,
1 => {
Some(PrefilterObj::new(StartBytesOne {
byte1: bytes[0],
}))
}
2 => {
Some(PrefilterObj::new(StartBytesTwo {
byte1: bytes[0],
byte2: bytes[1],
}))
}
3 => {
Some(PrefilterObj::new(StartBytesThree {
byte1: bytes[0],
byte2: bytes[1],
byte3: bytes[2],
}))
}
1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })),
2 => Some(PrefilterObj::new(StartBytesTwo {
byte1: bytes[0],
byte2: bytes[1],
})),
3 => Some(PrefilterObj::new(StartBytesThree {
byte1: bytes[0],
byte2: bytes[1],
byte3: bytes[2],
})),
_ => unreachable!(),
}
}
@ -204,8 +198,7 @@ pub struct StartBytesOne {
impl Prefilter for StartBytesOne {
fn next_candidate(&self, haystack: &[u8], at: usize) -> Option<usize> {
memchr(self.byte1, &haystack[at..])
.map(|i| at + i)
memchr(self.byte1, &haystack[at..]).map(|i| at + i)
}
fn clone_prefilter(&self) -> Box<Prefilter> {
@ -222,8 +215,7 @@ pub struct StartBytesTwo {
impl Prefilter for StartBytesTwo {
fn next_candidate(&self, haystack: &[u8], at: usize) -> Option<usize> {
memchr2(self.byte1, self.byte2, &haystack[at..])
.map(|i| at + i)
memchr2(self.byte1, self.byte2, &haystack[at..]).map(|i| at + i)
}
fn clone_prefilter(&self) -> Box<Prefilter> {

View File

@ -49,7 +49,7 @@ pub fn dead_id<S: StateID>() -> S {
mod private {
/// Sealed stops crates other than aho-corasick from implementing any
/// traits that use it.
pub trait Sealed{}
pub trait Sealed {}
impl Sealed for u8 {}
impl Sealed for u16 {}
impl Sealed for u32 {}
@ -82,7 +82,14 @@ mod private {
/// bounds checks are explicitly elided for performance reasons.
pub unsafe trait StateID:
private::Sealed
+ Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord
+ Clone
+ Copy
+ Debug
+ Eq
+ Hash
+ PartialEq
+ PartialOrd
+ Ord
{
/// Convert from a `usize` to this implementation's representation.
///
@ -110,57 +117,87 @@ pub unsafe trait StateID:
unsafe impl StateID for usize {
#[inline]
fn from_usize(n: usize) -> usize { n }
fn from_usize(n: usize) -> usize {
n
}
#[inline]
fn to_usize(self) -> usize { self }
fn to_usize(self) -> usize {
self
}
#[inline]
fn max_id() -> usize { ::std::usize::MAX }
fn max_id() -> usize {
::std::usize::MAX
}
}
unsafe impl StateID for u8 {
#[inline]
fn from_usize(n: usize) -> u8 { n as u8 }
fn from_usize(n: usize) -> u8 {
n as u8
}
#[inline]
fn to_usize(self) -> usize { self as usize }
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize { ::std::u8::MAX as usize }
fn max_id() -> usize {
::std::u8::MAX as usize
}
}
unsafe impl StateID for u16 {
#[inline]
fn from_usize(n: usize) -> u16 { n as u16 }
fn from_usize(n: usize) -> u16 {
n as u16
}
#[inline]
fn to_usize(self) -> usize { self as usize }
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize { ::std::u16::MAX as usize }
fn max_id() -> usize {
::std::u16::MAX as usize
}
}
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
unsafe impl StateID for u32 {
#[inline]
fn from_usize(n: usize) -> u32 { n as u32 }
fn from_usize(n: usize) -> u32 {
n as u32
}
#[inline]
fn to_usize(self) -> usize { self as usize }
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize { ::std::u32::MAX as usize }
fn max_id() -> usize {
::std::u32::MAX as usize
}
}
#[cfg(target_pointer_width = "64")]
unsafe impl StateID for u64 {
#[inline]
fn from_usize(n: usize) -> u64 { n as u64 }
fn from_usize(n: usize) -> u64 {
n as u64
}
#[inline]
fn to_usize(self) -> usize { self as usize }
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize { ::std::u64::MAX as usize }
fn max_id() -> usize {
::std::u64::MAX as usize
}
}

View File

@ -32,7 +32,7 @@ macro_rules! t {
haystack: $haystack,
matches: $matches,
}
}
};
}
/// A collection of test groups.
@ -43,24 +43,20 @@ type TestCollection = &'static [&'static [SearchTest]];
// but each collection should have some tests that no other collection has.
/// Tests for Aho-Corasick's standard non-overlapping match semantics.
const AC_STANDARD_NON_OVERLAPPING: TestCollection = &[
BASICS, NON_OVERLAPPING, STANDARD, REGRESSION,
];
const AC_STANDARD_NON_OVERLAPPING: TestCollection =
&[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION];
/// Tests for Aho-Corasick's standard overlapping match semantics.
const AC_STANDARD_OVERLAPPING: TestCollection = &[
BASICS, OVERLAPPING, REGRESSION,
];
const AC_STANDARD_OVERLAPPING: TestCollection =
&[BASICS, OVERLAPPING, REGRESSION];
/// Tests for Aho-Corasick's leftmost-first match semantics.
const AC_LEFTMOST_FIRST: TestCollection = &[
BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION,
];
const AC_LEFTMOST_FIRST: TestCollection =
&[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION];
/// Tests for Aho-Corasick's leftmost-longest match semantics.
const AC_LEFTMOST_LONGEST: TestCollection = &[
BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION,
];
const AC_LEFTMOST_LONGEST: TestCollection =
&[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION];
// Now define the individual tests that make up the collections above.
@ -77,70 +73,51 @@ const BASICS: &'static [SearchTest] = &[
t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
t!(basic060, &["a"], "bbb", &[]),
t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
t!(basic100, &["aa"], "", &[]),
t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
t!(basic130, &["aa"], "abbab", &[]),
t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
t!(basic300, &["a", "b"], "", &[]),
t!(basic310, &["a", "b"], "z", &[]),
t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
t!(basic340, &["a", "b"], "abba", &[
(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),
]),
t!(basic350, &["b", "a"], "abba", &[
(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),
]),
t!(nover360, &["abc", "bc"], "xbc", &[
(1, 1, 3),
]),
t!(
basic340,
&["a", "b"],
"abba",
&[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
),
t!(
basic350,
&["b", "a"],
"abba",
&[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
),
t!(nover360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
t!(basic400, &["foo", "bar"], "", &[]),
t!(basic410, &["foo", "bar"], "foobar", &[
(0, 0, 3), (1, 3, 6),
]),
t!(basic420, &["foo", "bar"], "barfoo", &[
(1, 0, 3), (0, 3, 6),
]),
t!(basic430, &["foo", "bar"], "foofoo", &[
(0, 0, 3), (0, 3, 6),
]),
t!(basic440, &["foo", "bar"], "barbar", &[
(1, 0, 3), (1, 3, 6),
]),
t!(basic450, &["foo", "bar"], "bafofoo", &[
(0, 4, 7),
]),
t!(basic460, &["bar", "foo"], "bafofoo", &[
(1, 4, 7),
]),
t!(basic470, &["foo", "bar"], "fobabar", &[
(1, 4, 7),
]),
t!(basic480, &["bar", "foo"], "fobabar", &[
(0, 4, 7),
]),
t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
t!(basic600, &[""], "", &[(0, 0, 0)]),
t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[
(0, 0, 7),
]),
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[
(1, 1, 10),
]),
t!(basic720, &["yabcdef", "bcdeyabc", "abcdezghi"], "yabcdezghi", &[
(2, 1, 10),
]),
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
t!(
basic720,
&["yabcdef", "bcdeyabc", "abcdezghi"],
"yabcdezghi",
&[(2, 1, 10),]
),
];
/// Tests for non-overlapping standard match semantics.
@ -155,25 +132,17 @@ const STANDARD: &'static [SearchTest] = &[
t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
t!(standard400, &["abcd", "bcd", "cd", "b"], "abcd", &[
(3, 1, 2), (2, 2, 4),
]),
t!(standard410, &["", "a"], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(standard420, &["", "a"], "aa", &[
(0, 0, 0), (0, 1, 1), (0, 2, 2),
]),
t!(standard430, &["", "a", ""], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(standard440, &["a", "", ""], "a", &[
(1, 0, 0), (1, 1, 1),
]),
t!(standard450, &["", "", "a"], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(
standard400,
&["abcd", "bcd", "cd", "b"],
"abcd",
&[(3, 1, 2), (2, 2, 4),]
),
t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]),
t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]),
t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
];
/// Tests for non-overlapping leftmost match semantics. These should pass for
@ -187,31 +156,48 @@ const LEFTMOST: &'static [SearchTest] = &[
t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
t!(leftmost360, &["abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(2, 0, 8),
]),
t!(leftmost370, &["abcdefghi", "cde", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(leftmost380, &["abcdefghi", "hz", "abcdefgh", "a"], "abcdefghz", &[
(2, 0, 8),
]),
t!(leftmost390, &["b", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(leftmost400, &["h", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(leftmost410, &["z", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8), (0, 8, 9),
]),
t!(
leftmost360,
&["abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(2, 0, 8),]
),
t!(
leftmost370,
&["abcdefghi", "cde", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(
leftmost380,
&["abcdefghi", "hz", "abcdefgh", "a"],
"abcdefghz",
&[(2, 0, 8),]
),
t!(
leftmost390,
&["b", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(
leftmost400,
&["h", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(
leftmost410,
&["z", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8), (0, 8, 9),]
),
];
/// Tests for non-overlapping leftmost-first match semantics. These tests
@ -220,29 +206,27 @@ const LEFTMOST: &'static [SearchTest] = &[
const LEFTMOST_FIRST: &'static [SearchTest] = &[
t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
t!(leftfirst011, &["", "a", ""], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(leftfirst012, &["a", "", ""], "a", &[
(0, 0, 1), (1, 1, 1),
]),
t!(leftfirst013, &["", "", "a"], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
t!(leftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[
(1, 1, 2), (3, 2, 4),
]),
t!(leftfirst320, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(0, 0, 1), (2, 7, 9),
]),
t!(
leftfirst310,
&["abcd", "b", "bce", "ce"],
"abce",
&[(1, 1, 2), (3, 2, 4),]
),
t!(
leftfirst320,
&["a", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(0, 0, 1), (2, 7, 9),]
),
t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
];
@ -251,40 +235,27 @@ const LEFTMOST_FIRST: &'static [SearchTest] = &[
/// generally fail under leftmost-first semantics.
const LEFTMOST_LONGEST: &'static [SearchTest] = &[
t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[
(0, 0, 4),
]),
t!(leftlong020, &["", "a"], "a", &[
(1, 0, 1), (0, 1, 1),
]),
t!(leftlong021, &["", "a", ""], "a", &[
(1, 0, 1), (0, 1, 1),
]),
t!(leftlong022, &["a", "", ""], "a", &[
(0, 0, 1), (1, 1, 1),
]),
t!(leftlong023, &["", "", "a"], "a", &[
(2, 0, 1), (0, 1, 1),
]),
t!(leftlong030, &["", "a"], "aa", &[
(1, 0, 1), (1, 1, 2), (0, 2, 2),
]),
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]),
t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]),
t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]),
t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]),
t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
t!(leftlong310, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(
leftlong310,
&["a", "abcdefghi", "hz", "abcdefgh"],
"abcdefghz",
&[(3, 0, 8),]
),
t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[
(1, 1, 2), (2, 2, 4),
]),
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
];
@ -293,30 +264,18 @@ const LEFTMOST_LONGEST: &'static [SearchTest] = &[
/// Generally these tests shouldn't pass when using overlapping semantics.
/// These should pass for both standard and leftmost match semantics.
const NON_OVERLAPPING: &'static [SearchTest] = &[
t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[
(0, 0, 4),
]),
t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[
(2, 0, 4),
]),
t!(nover030, &["abc", "bc"], "zazabcz", &[
(0, 3, 6),
]),
t!(nover100, &["ab", "ba"], "abababa", &[
(0, 0, 2), (0, 2, 4), (0, 4, 6),
]),
t!(nover200, &["foo", "foo"], "foobarfoo", &[
(0, 0, 3), (0, 6, 9),
]),
t!(nover300, &["", ""], "", &[
(0, 0, 0),
]),
t!(nover310, &["", ""], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
t!(
nover100,
&["ab", "ba"],
"abababa",
&[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
),
t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
t!(nover300, &["", ""], "", &[(0, 0, 0),]),
t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
];
/// Tests for overlapping match semantics.
@ -324,51 +283,75 @@ const NON_OVERLAPPING: &'static [SearchTest] = &[
/// This only supports standard match semantics, since leftmost-{first,longest}
/// do not support overlapping matches.
const OVERLAPPING: &'static [SearchTest] = &[
t!(over000, &["abcd", "bcd", "cd", "b"], "abcd", &[
(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),
]),
t!(over010, &["bcd", "cd", "b", "abcd"], "abcd", &[
(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),
]),
t!(over020, &["abcd", "bcd", "cd"], "abcd", &[
(0, 0, 4), (1, 1, 4), (2, 2, 4),
]),
t!(over030, &["bcd", "abcd", "cd"], "abcd", &[
(1, 0, 4), (0, 1, 4), (2, 2, 4),
]),
t!(over040, &["bcd", "cd", "abcd"], "abcd", &[
(2, 0, 4), (0, 1, 4), (1, 2, 4),
]),
t!(over050, &["abc", "bc"], "zazabcz", &[
(0, 3, 6), (1, 4, 6),
]),
t!(over100, &["ab", "ba"], "abababa", &[
(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),
]),
t!(over200, &["foo", "foo"], "foobarfoo", &[
(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),
]),
t!(over300, &["", ""], "", &[
(0, 0, 0), (1, 0, 0),
]),
t!(over310, &["", ""], "a", &[
(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),
]),
t!(over320, &["", "a"], "a", &[
(0, 0, 0), (1, 0, 1), (0, 1, 1),
]),
t!(over330, &["", "a", ""], "a", &[
(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),
]),
t!(over340, &["a", "", ""], "a", &[
(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),
]),
t!(over350, &["", "", "a"], "a", &[
(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),
]),
t!(
over000,
&["abcd", "bcd", "cd", "b"],
"abcd",
&[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),]
),
t!(
over010,
&["bcd", "cd", "b", "abcd"],
"abcd",
&[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),]
),
t!(
over020,
&["abcd", "bcd", "cd"],
"abcd",
&[(0, 0, 4), (1, 1, 4), (2, 2, 4),]
),
t!(
over030,
&["bcd", "abcd", "cd"],
"abcd",
&[(1, 0, 4), (0, 1, 4), (2, 2, 4),]
),
t!(
over040,
&["bcd", "cd", "abcd"],
"abcd",
&[(2, 0, 4), (0, 1, 4), (1, 2, 4),]
),
t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]),
t!(
over100,
&["ab", "ba"],
"abababa",
&[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),]
),
t!(
over200,
&["foo", "foo"],
"foobarfoo",
&[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),]
),
t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
t!(
over310,
&["", ""],
"a",
&[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),]
),
t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]),
t!(
over330,
&["", "a", ""],
"a",
&[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),]
),
t!(
over340,
&["a", "", ""],
"a",
&[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),]
),
t!(
over350,
&["", "", "a"],
"a",
&[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),]
),
];
/// Regression tests that are applied to all Aho-Corasick combinations.
@ -376,24 +359,32 @@ const OVERLAPPING: &'static [SearchTest] = &[
/// If regression tests are needed for specific match semantics, then add them
/// to the appropriate group above.
const REGRESSION: &'static [SearchTest] = &[
t!(regression010, &["inf", "ind"], "infind", &[
(0, 0, 3), (1, 3, 6),
]),
t!(regression020, &["ind", "inf"], "infind", &[
(1, 0, 3), (0, 3, 6),
]),
t!(regression030, &["libcore/", "libstd/"], "libcore/char/methods.rs", &[
(0, 0, 8),
]),
t!(regression040, &["libstd/", "libcore/"], "libcore/char/methods.rs", &[
(1, 0, 8),
]),
t!(regression050, &["\x00\x00\x01", "\x00\x00\x00"], "\x00\x00\x00", &[
(1, 0, 3),
]),
t!(regression060, &["\x00\x00\x00", "\x00\x00\x01"], "\x00\x00\x00", &[
(0, 0, 3),
]),
t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
t!(
regression030,
&["libcore/", "libstd/"],
"libcore/char/methods.rs",
&[(0, 0, 8),]
),
t!(
regression040,
&["libstd/", "libcore/"],
"libcore/char/methods.rs",
&[(1, 0, 8),]
),
t!(
regression050,
&["\x00\x00\x01", "\x00\x00\x00"],
"\x00\x00\x00",
&[(1, 0, 3),]
),
t!(
regression060,
&["\x00\x00\x00", "\x00\x00\x01"],
"\x00\x00\x00",
&[(0, 0, 3),]
),
];
// Now define a test for each combination of things above that we want to run.
@ -424,10 +415,8 @@ macro_rules! testconfig {
#[test]
fn $name() {
run_search_tests($collection, |test| {
let buf = io::BufReader::with_capacity(
1,
test.haystack.as_bytes(),
);
let buf =
io::BufReader::with_capacity(1, test.haystack.as_bytes());
let mut builder = AhoCorasickBuilder::new();
$with(&mut builder);
builder
@ -461,46 +450,86 @@ macro_rules! testcombo {
use super::*;
testconfig!(nfa_default, $collection, $kind, |_| ());
testconfig!(nfa_no_prefilter, $collection, $kind,
testconfig!(
nfa_no_prefilter,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.prefilter(false);
});
testconfig!(nfa_all_sparse, $collection, $kind,
}
);
testconfig!(
nfa_all_sparse,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dense_depth(0);
});
testconfig!(nfa_all_dense, $collection, $kind,
}
);
testconfig!(
nfa_all_dense,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dense_depth(usize::MAX);
});
testconfig!(dfa_default, $collection, $kind,
}
);
testconfig!(
dfa_default,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true);
});
testconfig!(dfa_no_prefilter, $collection, $kind,
}
);
testconfig!(
dfa_no_prefilter,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).prefilter(false);
});
testconfig!(dfa_all_sparse, $collection, $kind,
}
);
testconfig!(
dfa_all_sparse,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).dense_depth(0);
});
testconfig!(dfa_all_dense, $collection, $kind,
}
);
testconfig!(
dfa_all_dense,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).dense_depth(usize::MAX);
});
testconfig!(dfa_no_byte_class, $collection, $kind,
}
);
testconfig!(
dfa_no_byte_class,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).byte_classes(false);
});
testconfig!(dfa_no_premultiply, $collection, $kind,
}
);
testconfig!(
dfa_no_premultiply,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).premultiply(false);
});
testconfig!(dfa_no_byte_class_no_premultiply, $collection, $kind,
}
);
testconfig!(
dfa_no_byte_class_no_premultiply,
$collection,
$kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).byte_classes(false).premultiply(false);
});
}
);
}
};
}
@ -509,7 +538,9 @@ macro_rules! testcombo {
testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest);
testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst);
testcombo!(
search_standard_nonoverlapping, AC_STANDARD_NON_OVERLAPPING, Standard
search_standard_nonoverlapping,
AC_STANDARD_NON_OVERLAPPING,
Standard
);
// Write out the overlapping combo by hand since there is only one of them.
@ -525,49 +556,63 @@ testconfig!(
search_standard_overlapping_nfa_all_sparse,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dense_depth(0); }
|b: &mut AhoCorasickBuilder| {
b.dense_depth(0);
}
);
testconfig!(
overlapping,
search_standard_overlapping_nfa_all_dense,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dense_depth(usize::MAX); }
|b: &mut AhoCorasickBuilder| {
b.dense_depth(usize::MAX);
}
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_default,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true); }
|b: &mut AhoCorasickBuilder| {
b.dfa(true);
}
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_all_sparse,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(0); }
|b: &mut AhoCorasickBuilder| {
b.dfa(true).dense_depth(0);
}
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_all_dense,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(usize::MAX); }
|b: &mut AhoCorasickBuilder| {
b.dfa(true).dense_depth(usize::MAX);
}
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_no_byte_class,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).byte_classes(false); }
|b: &mut AhoCorasickBuilder| {
b.dfa(true).byte_classes(false);
}
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_no_premultiply,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).premultiply(false); }
|b: &mut AhoCorasickBuilder| {
b.dfa(true).premultiply(false);
}
);
testconfig!(
overlapping,
@ -594,7 +639,9 @@ testconfig!(
search_standard_stream_dfa_default,
AC_STANDARD_NON_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true); }
|b: &mut AhoCorasickBuilder| {
b.dfa(true);
}
);
#[test]
@ -669,8 +716,8 @@ fn state_id_too_small() {
}
}
}
let result = AhoCorasickBuilder::new()
.build_with_size::<u8, _, _>(&patterns);
let result =
AhoCorasickBuilder::new().build_with_size::<u8, _, _>(&patterns);
assert!(result.is_err());
}
@ -680,7 +727,8 @@ fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
) {
let get_match_triples =
|matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
matches.into_iter()
matches
.into_iter()
.map(|m| (m.pattern(), m.start(), m.end()))
.collect()
};