mirror of
https://gitee.com/openharmony/third_party_rust_regex
synced 2025-04-17 10:00:49 +00:00

The DFA handles word boundaries by tagging each state with an `is_word` flag that lets us determine whether the next byte in the haystack should cause a word boundary instruction to match. We were mishandling how this tagging happened for start states. In particular, the tag was not used as an index into the start state cache, and therefore could wind up choosing an incorrect but previously computed start state with the wrong flags set. This leads to incorrect matches. We fix this by using the right flags to generate an index.
128 lines
4.1 KiB
Rust
128 lines
4.1 KiB
Rust
// Convenience macros.
|
|
|
|
macro_rules! findall {
|
|
($re:expr, $text:expr) => {{
|
|
$re.find_iter(text!($text)).collect::<Vec<_>>()
|
|
}}
|
|
}
|
|
|
|
// Macros for automatically producing tests.
|
|
|
|
macro_rules! mat(
|
|
($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
|
|
#[test]
|
|
fn $name() {
|
|
let text = text!($text);
|
|
let expected: Vec<Option<_>> = vec![$($loc)+];
|
|
let r = regex!($re);
|
|
let got: Vec<Option<_>> = match r.captures(text) {
|
|
Some(c) => {
|
|
assert!(r.is_match(text));
|
|
assert!(r.shortest_match(text).is_some());
|
|
c.iter_pos().collect()
|
|
}
|
|
None => vec![None],
|
|
};
|
|
// The test set sometimes leave out capture groups, so truncate
|
|
// actual capture groups to match test set.
|
|
let mut sgot = &got[..];
|
|
if sgot.len() > expected.len() {
|
|
sgot = &sgot[0..expected.len()]
|
|
}
|
|
if expected != sgot {
|
|
panic!("For RE '{}' against '{:?}', \
|
|
expected '{:?}' but got '{:?}'",
|
|
$re, text, expected, sgot);
|
|
}
|
|
}
|
|
);
|
|
);
|
|
|
|
macro_rules! matiter(
|
|
($name:ident, $re:expr, $text:expr) => (
|
|
#[test]
|
|
fn $name() {
|
|
let text = text!($text);
|
|
let expected: Vec<(usize, usize)> = vec![];
|
|
let r = regex!($re);
|
|
let got: Vec<_> = r.find_iter(text).collect();
|
|
if expected != got {
|
|
panic!("For RE '{}' against '{:?}', \
|
|
expected '{:?}' but got '{:?}'",
|
|
$re, text, expected, got);
|
|
}
|
|
let captures_got: Vec<_> =
|
|
r.captures_iter(text).map(|c| c.pos(0).unwrap()).collect();
|
|
if captures_got != got {
|
|
panic!("For RE '{}' against '{:?}', \
|
|
got '{:?}' using find_iter but got '{:?}' \
|
|
using captures_iter",
|
|
$re, text, got, captures_got);
|
|
}
|
|
}
|
|
);
|
|
($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
|
|
#[test]
|
|
fn $name() {
|
|
let text = text!($text);
|
|
let expected: Vec<_> = vec![$($loc)+];
|
|
let r = regex!($re);
|
|
let got: Vec<_> = r.find_iter(text).collect();
|
|
if expected != got {
|
|
panic!("For RE '{}' against '{:?}', \
|
|
expected '{:?}' but got '{:?}'",
|
|
$re, text, expected, got);
|
|
}
|
|
let captures_got: Vec<_> =
|
|
r.captures_iter(text).map(|c| c.pos(0).unwrap()).collect();
|
|
if captures_got != got {
|
|
panic!("For RE '{}' against '{:?}', \
|
|
got '{:?}' using find_iter but got '{:?}' \
|
|
using captures_iter",
|
|
$re, text, got, captures_got);
|
|
}
|
|
}
|
|
);
|
|
);
|
|
|
|
macro_rules! matset {
|
|
($name:ident, $res:expr, $text:expr, $($match_index:expr),*) => {
|
|
#[test]
|
|
fn $name() {
|
|
let text = text!($text);
|
|
let set = regex_set!($res);
|
|
assert!(set.is_match(text));
|
|
let expected = vec![$($match_index),*];
|
|
let matches = set.matches(text);
|
|
assert!(matches.matched_any());
|
|
let got: Vec<_> = matches.into_iter().collect();
|
|
assert_eq!(expected, got);
|
|
}
|
|
}
|
|
}
|
|
|
|
macro_rules! nomatset {
|
|
($name:ident, $res:expr, $text:expr) => {
|
|
#[test]
|
|
fn $name() {
|
|
let text = text!($text);
|
|
let set = regex_set!($res);
|
|
assert!(!set.is_match(text));
|
|
let matches = set.matches(text);
|
|
assert!(!matches.matched_any());
|
|
assert_eq!(0, matches.into_iter().count());
|
|
}
|
|
}
|
|
}
|
|
|
|
macro_rules! split {
|
|
($name:ident, $re:expr, $text:expr, $expected:expr) => {
|
|
#[test]
|
|
fn $name() {
|
|
let re = regex!($re);
|
|
let splitted: Vec<_> = re.split(t!($text)).collect();
|
|
assert_eq!($expected, &*splitted);
|
|
}
|
|
}
|
|
}
|