Andrew Gallant f858ff321d deps: update quickcheck and rand
The quickcheck update seems to have sussed out a bug in our DFA logic
regarding the encoding of NFA state IDs. But the bug seems unlikely to
occur in real code, so we massage the test data for now until the lazy
DFA gets moved into regex-automata.
2021-03-11 21:10:40 -05:00

460 lines
11 KiB
Rust

mat!(ascii_literal, r"a", "a", Some((0, 1)));
// Some crazy expressions from regular-expressions.info.
mat!(
match_ranges,
r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
"num: 255",
Some((5, 8))
);
mat!(
match_ranges_not,
r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
"num: 256",
None
);
mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
mat!(
match_email,
r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
"mine is jam.slam@gmail.com ",
Some((8, 26))
);
mat!(
match_email_not,
r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
"mine is jam.slam@gmail ",
None
);
mat!(
match_email_big,
r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
"mine is jam.slam@gmail.com ",
Some((8, 26))
);
mat!(
match_date1,
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-01-01",
Some((0, 10))
);
mat!(
match_date2,
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-00-01",
None
);
mat!(
match_date3,
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-13-01",
None
);
// Do some crazy dancing with the start/end assertions.
matiter!(match_start_end_empty, r"^$", "", (0, 0));
matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
matiter!(
match_start_end_empty_rep,
r"(?:^$)*",
"a\nb\nc",
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(5, 5)
);
matiter!(
match_start_end_empty_rep_rev,
r"(?:$^)*",
"a\nb\nc",
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(5, 5)
);
// Test negated character classes.
mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3)));
mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2)));
mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3)));
mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3)));
mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
// Test that repeated empty expressions don't loop forever.
mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
// Test that we handle various flavors of empty expressions.
matiter!(match_empty1, r"", "", (0, 0));
matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
// Test that the DFA can handle pathological cases.
// (This should result in the DFA's cache being flushed too frequently, which
// should cause it to quit and fall back to the NFA algorithm.)
#[test]
fn dfa_handles_pathological_case() {
fn ones_and_zeroes(count: usize) -> String {
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};
let mut rng = SmallRng::from_entropy();
let mut s = String::new();
for _ in 0..count {
if rng.gen() {
s.push('1');
} else {
s.push('0');
}
}
s
}
let re = regex!(r"[01]*1[01]{20}$");
let text = {
let mut pieces = ones_and_zeroes(100_000);
pieces.push('1');
pieces.push_str(&ones_and_zeroes(20));
pieces
};
assert!(re.is_match(text!(&*text)));
}
#[test]
fn nest_limit_makes_it_parse() {
use regex::RegexBuilder;
RegexBuilder::new(
r#"(?-u)
2(?:
[45]\d{3}|
7(?:
1[0-267]|
2[0-289]|
3[0-29]|
4[01]|
5[1-3]|
6[013]|
7[0178]|
91
)|
8(?:
0[125]|
[139][1-6]|
2[0157-9]|
41|
6[1-35]|
7[1-5]|
8[1-8]|
90
)|
9(?:
0[0-2]|
1[0-4]|
2[568]|
3[3-6]|
5[5-7]|
6[0167]|
7[15]|
8[0146-9]
)
)\d{4}|
3(?:
12?[5-7]\d{2}|
0(?:
2(?:
[025-79]\d|
[348]\d{1,2}
)|
3(?:
[2-4]\d|
[56]\d?
)
)|
2(?:
1\d{2}|
2(?:
[12]\d|
[35]\d{1,2}|
4\d?
)
)|
3(?:
1\d{2}|
2(?:
[2356]\d|
4\d{1,2}
)
)|
4(?:
1\d{2}|
2(?:
2\d{1,2}|
[47]|
5\d{2}
)
)|
5(?:
1\d{2}|
29
)|
[67]1\d{2}|
8(?:
1\d{2}|
2(?:
2\d{2}|
3|
4\d
)
)
)\d{3}|
4(?:
0(?:
2(?:
[09]\d|
7
)|
33\d{2}
)|
1\d{3}|
2(?:
1\d{2}|
2(?:
[25]\d?|
[348]\d|
[67]\d{1,2}
)
)|
3(?:
1\d{2}(?:
\d{2}
)?|
2(?:
[045]\d|
[236-9]\d{1,2}
)|
32\d{2}
)|
4(?:
[18]\d{2}|
2(?:
[2-46]\d{2}|
3
)|
5[25]\d{2}
)|
5(?:
1\d{2}|
2(?:
3\d|
5
)
)|
6(?:
[18]\d{2}|
2(?:
3(?:
\d{2}
)?|
[46]\d{1,2}|
5\d{2}|
7\d
)|
5(?:
3\d?|
4\d|
[57]\d{1,2}|
6\d{2}|
8
)
)|
71\d{2}|
8(?:
[18]\d{2}|
23\d{2}|
54\d{2}
)|
9(?:
[18]\d{2}|
2[2-5]\d{2}|
53\d{1,2}
)
)\d{3}|
5(?:
02[03489]\d{2}|
1\d{2}|
2(?:
1\d{2}|
2(?:
2(?:
\d{2}
)?|
[457]\d{2}
)
)|
3(?:
1\d{2}|
2(?:
[37](?:
\d{2}
)?|
[569]\d{2}
)
)|
4(?:
1\d{2}|
2[46]\d{2}
)|
5(?:
1\d{2}|
26\d{1,2}
)|
6(?:
[18]\d{2}|
2|
53\d{2}
)|
7(?:
1|
24
)\d{2}|
8(?:
1|
26
)\d{2}|
91\d{2}
)\d{3}|
6(?:
0(?:
1\d{2}|
2(?:
3\d{2}|
4\d{1,2}
)
)|
2(?:
2[2-5]\d{2}|
5(?:
[3-5]\d{2}|
7
)|
8\d{2}
)|
3(?:
1|
2[3478]
)\d{2}|
4(?:
1|
2[34]
)\d{2}|
5(?:
1|
2[47]
)\d{2}|
6(?:
[18]\d{2}|
6(?:
2(?:
2\d|
[34]\d{2}
)|
5(?:
[24]\d{2}|
3\d|
5\d{1,2}
)
)
)|
72[2-5]\d{2}|
8(?:
1\d{2}|
2[2-5]\d{2}
)|
9(?:
1\d{2}|
2[2-6]\d{2}
)
)\d{3}|
7(?:
(?:
02|
[3-589]1|
6[12]|
72[24]
)\d{2}|
21\d{3}|
32
)\d{3}|
8(?:
(?:
4[12]|
[5-7]2|
1\d?
)|
(?:
0|
3[12]|
[5-7]1|
217
)\d
)\d{4}|
9(?:
[35]1|
(?:
[024]2|
81
)\d|
(?:
1|
[24]1
)\d{2}
)\d{3}
"#,
)
.build()
.unwrap();
}