mirror of
https://gitee.com/openharmony/third_party_rust_regex
synced 2025-04-14 08:30:18 +00:00

Fixes #31 and #33. There are a number of related changes in this commit: 1. A script that generates the 'match' tests has been reintroduced. 2. The regex-dna shootout benchmark has been updated. 3. Running `cargo test` on the `regex` crate does not require `regex_macros`. 4. The documentation has been updated to use `Regex::new(...).unwrap()` instead of `regex!`. The emphasis on using `regex!` has been reduced, and a note about its unavailability in Rust 1.0 beta/stable has been added. 5. Updated Travis to test both `regex` and `regex_macros`.
275 lines
10 KiB
Rust
275 lines
10 KiB
Rust
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||
// file at the top-level directory of this distribution and at
|
||
// http://rust-lang.org/COPYRIGHT.
|
||
//
|
||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||
// option. This file may not be copied, modified, or distributed
|
||
// except according to those terms.
|
||
|
||
use regex::{Regex, NoExpand};
|
||
|
||
#[test]
|
||
fn splitn() {
|
||
let re = regex!(r"\d+");
|
||
let text = "cauchy123plato456tyler789binx";
|
||
let subs: Vec<&str> = re.splitn(text, 2).collect();
|
||
assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
|
||
}
|
||
|
||
#[test]
|
||
fn split() {
|
||
let re = regex!(r"\d+");
|
||
let text = "cauchy123plato456tyler789binx";
|
||
let subs: Vec<&str> = re.split(text).collect();
|
||
assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
|
||
}
|
||
|
||
#[test]
|
||
fn empty_regex_empty_match() {
|
||
let re = regex!("");
|
||
let ms = re.find_iter("").collect::<Vec<_>>();
|
||
assert_eq!(ms, vec![(0, 0)]);
|
||
}
|
||
|
||
#[test]
|
||
fn empty_regex_nonempty_match() {
|
||
let re = regex!("");
|
||
let ms = re.find_iter("abc").collect::<Vec<_>>();
|
||
assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
|
||
}
|
||
|
||
#[test]
|
||
fn quoted_bracket_set() {
|
||
let re = regex!(r"([\x{5b}\x{5d}])");
|
||
let ms = re.find_iter("[]").collect::<Vec<_>>();
|
||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||
let re = regex!(r"([\[\]])");
|
||
let ms = re.find_iter("[]").collect::<Vec<_>>();
|
||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||
}
|
||
|
||
#[test]
|
||
fn first_range_starts_with_left_bracket() {
|
||
let re = regex!(r"([[-z])");
|
||
let ms = re.find_iter("[]").collect::<Vec<_>>();
|
||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||
}
|
||
|
||
#[test]
|
||
fn range_ends_with_escape() {
|
||
let re = regex!(r"([\[-\x{5d}])");
|
||
let ms = re.find_iter("[]").collect::<Vec<_>>();
|
||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||
}
|
||
|
||
#[test]
|
||
fn empty_match_find_iter() {
|
||
let re = regex!(r".*?");
|
||
let ms: Vec<_> = re.find_iter("abc").collect();
|
||
assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
|
||
}
|
||
|
||
#[test]
|
||
fn empty_match_captures_iter() {
|
||
let re = regex!(r".*?");
|
||
let ms: Vec<_> = re.captures_iter("abc")
|
||
.map(|c| c.pos(0).unwrap())
|
||
.collect();
|
||
assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
|
||
}
|
||
|
||
#[test]
|
||
fn empty_match_unicode_find_iter() {
|
||
let re = regex!(r".*?");
|
||
let ms: Vec<_> = re.find_iter("Ⅰ1Ⅱ2").collect();
|
||
assert_eq!(ms, vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)]);
|
||
}
|
||
|
||
#[test]
|
||
fn empty_match_unicode_captures_iter() {
|
||
let re = regex!(r".*?");
|
||
let ms: Vec<_> = re.captures_iter("Ⅰ1Ⅱ2")
|
||
.map(|c| c.pos(0).unwrap())
|
||
.collect();
|
||
assert_eq!(ms, vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)]);
|
||
}
|
||
|
||
macro_rules! replace(
|
||
($name:ident, $which:ident, $re:expr,
|
||
$search:expr, $replace:expr, $result:expr) => (
|
||
#[test]
|
||
fn $name() {
|
||
let re = regex!($re);
|
||
assert_eq!(re.$which($search, $replace), String::from_str($result));
|
||
}
|
||
);
|
||
);
|
||
|
||
replace!(rep_first, replace, r"\d", "age: 26", "Z", "age: Z6");
|
||
replace!(rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z");
|
||
replace!(rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ");
|
||
replace!(rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1");
|
||
replace!(rep_double_dollar, replace,
|
||
r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1");
|
||
replace!(rep_no_expand, replace,
|
||
r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1");
|
||
replace!(rep_named, replace_all,
|
||
r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
|
||
"w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3");
|
||
replace!(rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t",
|
||
"", "trim me");
|
||
|
||
macro_rules! noparse(
|
||
($name:ident, $re:expr) => (
|
||
#[test]
|
||
fn $name() {
|
||
let re = $re;
|
||
match Regex::new(re) {
|
||
Err(_) => {},
|
||
Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
|
||
}
|
||
}
|
||
);
|
||
);
|
||
|
||
noparse!(fail_double_repeat, "a**");
|
||
noparse!(fail_no_repeat_arg, "*");
|
||
noparse!(fail_no_repeat_arg_begin, "^*");
|
||
noparse!(fail_incomplete_escape, "\\");
|
||
noparse!(fail_class_incomplete, "[A-");
|
||
noparse!(fail_class_not_closed, "[A");
|
||
noparse!(fail_class_no_begin, r"[\A]");
|
||
noparse!(fail_class_no_end, r"[\z]");
|
||
noparse!(fail_class_no_boundary, r"[\b]");
|
||
noparse!(fail_open_paren, "(");
|
||
noparse!(fail_close_paren, ")");
|
||
noparse!(fail_invalid_range, "[a-Z]");
|
||
noparse!(fail_empty_capture_name, "(?P<>a)");
|
||
noparse!(fail_empty_capture_exp, "(?P<name>)");
|
||
noparse!(fail_bad_capture_name, "(?P<na-me>)");
|
||
noparse!(fail_bad_flag, "(?a)a");
|
||
noparse!(fail_empty_alt_before, "|a");
|
||
noparse!(fail_empty_alt_after, "a|");
|
||
noparse!(fail_counted_big_exact, "a{1001}");
|
||
noparse!(fail_counted_big_min, "a{1001,}");
|
||
noparse!(fail_counted_no_close, "a{1001");
|
||
noparse!(fail_unfinished_cap, "(?");
|
||
noparse!(fail_unfinished_escape, "\\");
|
||
noparse!(fail_octal_digit, r"\8");
|
||
noparse!(fail_hex_digit, r"\xG0");
|
||
noparse!(fail_hex_short, r"\xF");
|
||
noparse!(fail_hex_long_digits, r"\x{fffg}");
|
||
noparse!(fail_flag_bad, "(?a)");
|
||
noparse!(fail_flag_empty, "(?)");
|
||
noparse!(fail_double_neg, "(?-i-i)");
|
||
noparse!(fail_neg_empty, "(?i-)");
|
||
noparse!(fail_empty_group, "()");
|
||
noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)");
|
||
noparse!(fail_range_end_no_class, "[a-[:lower:]]");
|
||
noparse!(fail_range_end_no_begin, r"[a-\A]");
|
||
noparse!(fail_range_end_no_end, r"[a-\z]");
|
||
noparse!(fail_range_end_no_boundary, r"[a-\b]");
|
||
|
||
macro_rules! mat(
|
||
($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
|
||
#[test]
|
||
fn $name() {
|
||
let text = $text;
|
||
let expected: Vec<Option<_>> = vec!($($loc)+);
|
||
let r = regex!($re);
|
||
let got = match r.captures(text) {
|
||
Some(c) => c.iter_pos().collect::<Vec<Option<_>>>(),
|
||
None => vec!(None),
|
||
};
|
||
// The test set sometimes leave out capture groups, so truncate
|
||
// actual capture groups to match test set.
|
||
let mut sgot = got.as_slice();
|
||
if sgot.len() > expected.len() {
|
||
sgot = &sgot[0..expected.len()]
|
||
}
|
||
if expected != sgot {
|
||
panic!("For RE '{}' against '{:?}', expected '{:?}' but got '{:?}'",
|
||
$re, text, expected, sgot);
|
||
}
|
||
}
|
||
);
|
||
);
|
||
|
||
// Some crazy expressions from regular-expressions.info.
|
||
mat!(match_ranges,
|
||
r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
|
||
"num: 255", Some((5, 8)));
|
||
mat!(match_ranges_not,
|
||
r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
|
||
"num: 256", None);
|
||
mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
|
||
mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
|
||
mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
|
||
mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
|
||
mat!(match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
|
||
"mine is jam.slam@gmail.com ", Some((8, 26)));
|
||
mat!(match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
|
||
"mine is jam.slam@gmail ", None);
|
||
mat!(match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
|
||
"mine is jam.slam@gmail.com ", Some((8, 26)));
|
||
mat!(match_date1,
|
||
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||
"1900-01-01", Some((0, 10)));
|
||
mat!(match_date2,
|
||
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||
"1900-00-01", None);
|
||
mat!(match_date3,
|
||
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||
"1900-13-01", None);
|
||
|
||
// Exercise the flags.
|
||
mat!(match_flag_case, "(?i)abc", "ABC", Some((0, 3)));
|
||
mat!(match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3)));
|
||
mat!(match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None);
|
||
mat!(match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2)));
|
||
mat!(match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4)));
|
||
mat!(match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None);
|
||
mat!(match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2)));
|
||
mat!(match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11)));
|
||
mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1)));
|
||
mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2)));
|
||
mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2)));
|
||
|
||
// Some Unicode tests.
|
||
// A couple of these are commented out because something in the guts of macro expansion is creating
|
||
// invalid byte strings.
|
||
//mat!(uni_literal, r"Ⅰ", "Ⅰ", Some((0, 3)))
|
||
mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3)));
|
||
mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8)));
|
||
mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2)));
|
||
mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2)));
|
||
mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5)));
|
||
mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2)));
|
||
//mat!(uni_case_not, r"Δ", "δ", None)
|
||
mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8)));
|
||
mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10)));
|
||
mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10)));
|
||
mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10)));
|
||
|
||
// Test the Unicode friendliness of Perl character classes.
|
||
mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4)));
|
||
mat!(uni_perl_w_not, r"\w+", "⥡", None);
|
||
mat!(uni_perl_w_neg, r"\W+", "⥡", Some((0, 3)));
|
||
mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8)));
|
||
mat!(uni_perl_d_not, r"\d+", "Ⅱ", None);
|
||
mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3)));
|
||
mat!(uni_perl_s, r"\s+", " ", Some((0, 3)));
|
||
mat!(uni_perl_s_not, r"\s+", "☃", None);
|
||
mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3)));
|
||
|
||
// And do the same for word boundaries.
|
||
mat!(uni_boundary_none, r"\d\b", "6δ", None);
|
||
mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
|
||
|
||
// A whole mess of tests from Glenn Fowler's regex test suite.
|
||
// Generated by the 'src/etc/regex-match-tests' program.
|
||
#[path = "matches.rs"]
|
||
mod matches;
|