Andrew Gallant 2d0e77a457 Divorce regex_macros from regex.
Fixes #31 and #33.

There are a number of related changes in this commit:

1. A script that generates the 'match' tests has been reintroduced.
2. The regex-dna shootout benchmark has been updated.
3. Running `cargo test` on the `regex` crate does not require
   `regex_macros`.
4. The documentation has been updated to use `Regex::new(...).unwrap()`
   instead of `regex!`. The emphasis on using `regex!` has been reduced,
   and a note about its unavailability in Rust 1.0 beta/stable has been
   added.
5. Updated Travis to test both `regex` and `regex_macros`.
2015-02-28 14:15:36 -05:00

275 lines
10 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use regex::{Regex, NoExpand};
#[test]
fn splitn() {
let re = regex!(r"\d+");
let text = "cauchy123plato456tyler789binx";
let subs: Vec<&str> = re.splitn(text, 2).collect();
assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
}
#[test]
fn split() {
let re = regex!(r"\d+");
let text = "cauchy123plato456tyler789binx";
let subs: Vec<&str> = re.split(text).collect();
assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
}
#[test]
fn empty_regex_empty_match() {
let re = regex!("");
let ms = re.find_iter("").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 0)]);
}
#[test]
fn empty_regex_nonempty_match() {
let re = regex!("");
let ms = re.find_iter("abc").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
}
#[test]
fn quoted_bracket_set() {
let re = regex!(r"([\x{5b}\x{5d}])");
let ms = re.find_iter("[]").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 1), (1, 2)]);
let re = regex!(r"([\[\]])");
let ms = re.find_iter("[]").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 1), (1, 2)]);
}
#[test]
fn first_range_starts_with_left_bracket() {
let re = regex!(r"([[-z])");
let ms = re.find_iter("[]").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 1), (1, 2)]);
}
#[test]
fn range_ends_with_escape() {
let re = regex!(r"([\[-\x{5d}])");
let ms = re.find_iter("[]").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 1), (1, 2)]);
}
#[test]
fn empty_match_find_iter() {
let re = regex!(r".*?");
let ms: Vec<_> = re.find_iter("abc").collect();
assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
}
#[test]
fn empty_match_captures_iter() {
let re = regex!(r".*?");
let ms: Vec<_> = re.captures_iter("abc")
.map(|c| c.pos(0).unwrap())
.collect();
assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
}
#[test]
fn empty_match_unicode_find_iter() {
let re = regex!(r".*?");
let ms: Vec<_> = re.find_iter("1Ⅱ2").collect();
assert_eq!(ms, vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)]);
}
#[test]
fn empty_match_unicode_captures_iter() {
let re = regex!(r".*?");
let ms: Vec<_> = re.captures_iter("1Ⅱ2")
.map(|c| c.pos(0).unwrap())
.collect();
assert_eq!(ms, vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)]);
}
macro_rules! replace(
($name:ident, $which:ident, $re:expr,
$search:expr, $replace:expr, $result:expr) => (
#[test]
fn $name() {
let re = regex!($re);
assert_eq!(re.$which($search, $replace), String::from_str($result));
}
);
);
replace!(rep_first, replace, r"\d", "age: 26", "Z", "age: Z6");
replace!(rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z");
replace!(rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ");
replace!(rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1");
replace!(rep_double_dollar, replace,
r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1");
replace!(rep_no_expand, replace,
r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1");
replace!(rep_named, replace_all,
r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
"w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3");
replace!(rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t",
"", "trim me");
macro_rules! noparse(
($name:ident, $re:expr) => (
#[test]
fn $name() {
let re = $re;
match Regex::new(re) {
Err(_) => {},
Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
}
}
);
);
noparse!(fail_double_repeat, "a**");
noparse!(fail_no_repeat_arg, "*");
noparse!(fail_no_repeat_arg_begin, "^*");
noparse!(fail_incomplete_escape, "\\");
noparse!(fail_class_incomplete, "[A-");
noparse!(fail_class_not_closed, "[A");
noparse!(fail_class_no_begin, r"[\A]");
noparse!(fail_class_no_end, r"[\z]");
noparse!(fail_class_no_boundary, r"[\b]");
noparse!(fail_open_paren, "(");
noparse!(fail_close_paren, ")");
noparse!(fail_invalid_range, "[a-Z]");
noparse!(fail_empty_capture_name, "(?P<>a)");
noparse!(fail_empty_capture_exp, "(?P<name>)");
noparse!(fail_bad_capture_name, "(?P<na-me>)");
noparse!(fail_bad_flag, "(?a)a");
noparse!(fail_empty_alt_before, "|a");
noparse!(fail_empty_alt_after, "a|");
noparse!(fail_counted_big_exact, "a{1001}");
noparse!(fail_counted_big_min, "a{1001,}");
noparse!(fail_counted_no_close, "a{1001");
noparse!(fail_unfinished_cap, "(?");
noparse!(fail_unfinished_escape, "\\");
noparse!(fail_octal_digit, r"\8");
noparse!(fail_hex_digit, r"\xG0");
noparse!(fail_hex_short, r"\xF");
noparse!(fail_hex_long_digits, r"\x{fffg}");
noparse!(fail_flag_bad, "(?a)");
noparse!(fail_flag_empty, "(?)");
noparse!(fail_double_neg, "(?-i-i)");
noparse!(fail_neg_empty, "(?i-)");
noparse!(fail_empty_group, "()");
noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)");
noparse!(fail_range_end_no_class, "[a-[:lower:]]");
noparse!(fail_range_end_no_begin, r"[a-\A]");
noparse!(fail_range_end_no_end, r"[a-\z]");
noparse!(fail_range_end_no_boundary, r"[a-\b]");
macro_rules! mat(
($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
#[test]
fn $name() {
let text = $text;
let expected: Vec<Option<_>> = vec!($($loc)+);
let r = regex!($re);
let got = match r.captures(text) {
Some(c) => c.iter_pos().collect::<Vec<Option<_>>>(),
None => vec!(None),
};
// The test set sometimes leave out capture groups, so truncate
// actual capture groups to match test set.
let mut sgot = got.as_slice();
if sgot.len() > expected.len() {
sgot = &sgot[0..expected.len()]
}
if expected != sgot {
panic!("For RE '{}' against '{:?}', expected '{:?}' but got '{:?}'",
$re, text, expected, sgot);
}
}
);
);
// Some crazy expressions from regular-expressions.info.
mat!(match_ranges,
r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
"num: 255", Some((5, 8)));
mat!(match_ranges_not,
r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
"num: 256", None);
mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
mat!(match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
"mine is jam.slam@gmail.com ", Some((8, 26)));
mat!(match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
"mine is jam.slam@gmail ", None);
mat!(match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
"mine is jam.slam@gmail.com ", Some((8, 26)));
mat!(match_date1,
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-01-01", Some((0, 10)));
mat!(match_date2,
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-00-01", None);
mat!(match_date3,
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-13-01", None);
// Exercise the flags.
mat!(match_flag_case, "(?i)abc", "ABC", Some((0, 3)));
mat!(match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3)));
mat!(match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None);
mat!(match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2)));
mat!(match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4)));
mat!(match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None);
mat!(match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2)));
mat!(match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11)));
mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1)));
mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2)));
mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2)));
// Some Unicode tests.
// A couple of these are commented out because something in the guts of macro expansion is creating
// invalid byte strings.
//mat!(uni_literal, r"", "", Some((0, 3)))
mat!(uni_one, r"\pN", "", Some((0, 3)));
mat!(uni_mixed, r"\pN+", "1Ⅱ2", Some((0, 8)));
mat!(uni_not, r"\PN+", "ab", Some((0, 2)));
mat!(uni_not_class, r"[\PN]+", "ab", Some((0, 2)));
mat!(uni_not_class_neg, r"[^\PN]+", "ab", Some((2, 5)));
mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2)));
//mat!(uni_case_not, r"Δ", "δ", None)
mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8)));
mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10)));
mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10)));
mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10)));
// Test the Unicode friendliness of Perl character classes.
mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4)));
mat!(uni_perl_w_not, r"\w+", "", None);
mat!(uni_perl_w_neg, r"\W+", "", Some((0, 3)));
mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8)));
mat!(uni_perl_d_not, r"\d+", "", None);
mat!(uni_perl_d_neg, r"\D+", "", Some((0, 3)));
mat!(uni_perl_s, r"\s+", "", Some((0, 3)));
mat!(uni_perl_s_not, r"\s+", "", None);
mat!(uni_perl_s_neg, r"\S+", "", Some((0, 3)));
// And do the same for word boundaries.
mat!(uni_boundary_none, r"\d\b", "", None);
mat!(uni_boundary_ogham, r"\d\b", "6", Some((0, 1)));
// A whole mess of tests from Glenn Fowler's regex test suite.
// Generated by the 'src/etc/regex-match-tests' program.
#[path = "matches.rs"]
mod matches;