Divorce regex_macros from regex.

Fixes #31 and #33.

There are a number of related changes in this commit:

1. A script that generates the 'match' tests has been reintroduced.
2. The regex-dna shootout benchmark has been updated.
3. Running `cargo test` on the `regex` crate does not require
   `regex_macros`.
4. The documentation has been updated to use `Regex::new(...).unwrap()`
   instead of `regex!`. The emphasis on using `regex!` has been reduced,
   and a note about its unavailability in Rust 1.0 beta/stable has been
   added.
5. Updated Travis to test both `regex` and `regex_macros`.
This commit is contained in:
Andrew Gallant 2015-02-28 14:15:36 -05:00
parent 8b44176a91
commit 2d0e77a457
18 changed files with 2084 additions and 135 deletions

View File

@ -4,6 +4,9 @@ script:
- cargo build --verbose
- cargo test --verbose
- cargo doc
- cargo bench --verbose
- cargo test --verbose --manifest-path=regex_macros/Cargo.toml
- cargo bench --verbose --manifest-path=regex_macros/Cargo.toml
after_success: |
[ $TRAVIS_BRANCH = master ] &&
[ $TRAVIS_PULL_REQUEST = false ] &&

View File

@ -1,5 +1,4 @@
[package]
name = "regex"
version = "0.1.15"
authors = ["The Rust Project Developers"]
@ -13,12 +12,14 @@ An implementation of regular expressions for Rust.
"""
[[test]]
path = "tests/mod.rs"
path = "regex_macros/tests/test_dynamic.rs"
name = "all"
[[bench]]
name = "all"
path = "regex_macros/benches/bench_dynamic.rs"
test = false
bench = true
[dev-dependencies]
rand = "0.1"
[dev-dependencies.regex_macros]
path = "regex_macros"
version = "0.1.0"

View File

@ -1,5 +1,4 @@
[package]
name = "regex_macros"
version = "0.1.8"
authors = ["The Rust Project Developers"]
@ -14,6 +13,23 @@ An implementation of statically compiled regular expressions for Rust.
name = "regex_macros"
plugin = true
[[test]]
path = "tests/test_native.rs"
name = "all"
[[test]]
path = "benches/shootout-regex-dna.rs"
name = "shootout-regex-dna"
[[bench]]
name = "all"
path = "benches/bench_native.rs"
test = false
bench = true
[dependencies.regex]
path = ".."
version = "0.1.0"
[dev-dependencies]
rand = "0.1"

View File

@ -10,8 +10,7 @@
#![allow(non_snake_case)]
use std::iter::repeat;
use stdtest::Bencher;
use test::Bencher;
use rand::{Rng, thread_rng};
use regex::{Regex, NoExpand};

View File

@ -8,19 +8,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(core, plugin, test, collections)]
#![plugin(regex_macros)]
#![feature(core, test)]
extern crate rand;
extern crate regex;
extern crate "test" as stdtest;
#[path = "bench.rs"]
mod native_bench;
#[path = "tests.rs"]
mod native_tests;
mod native_static;
extern crate test;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
@ -34,8 +26,4 @@ macro_rules! regex(
);
);
#[path = "bench.rs"]
mod dynamic_bench;
#[path = "tests.rs"]
mod dynamic_tests;
mod bench;

View File

@ -0,0 +1,18 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(core, plugin, test)]
#![plugin(regex_macros)]
extern crate rand;
extern crate regex;
extern crate test;
mod bench;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,132 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// contributed by the Rust Project Developers
// Copyright (c) 2014 The Rust Project Developers
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in
// the documentation and/or other materials provided with the
// distribution.
//
// - Neither the name of "The Computer Language Benchmarks Game" nor
// the name of "The Computer Language Shootout Benchmarks" nor the
// names of its contributors may be used to endorse or promote
// products derived from this software without specific prior
// written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.
#![feature(old_io, plugin, std_misc)]
#![plugin(regex_macros)]
extern crate regex;
use std::old_io as io;
use std::sync::{Arc, Future};
use regex::{NoExpand, Regex};
#[test]
fn check() {
static ANSWER: &'static str = "\
agggtaaa|tttaccct 0
[cgt]gggtaaa|tttaccc[acg] 3
a[act]ggtaaa|tttacc[agt]t 9
ag[act]gtaaa|tttac[agt]ct 8
agg[act]taaa|ttta[agt]cct 10
aggg[acg]aaa|ttt[cgt]ccct 3
agggt[cgt]aa|tt[acg]accct 4
agggta[cgt]a|t[acg]taccct 3
agggtaa[cgt]|[acg]ttaccct 5
101745
100000
133640";
static SEQ: &'static str = include_str!("regexdna-input.txt");
let got = run(SEQ.to_string()).connect("\n");
assert_eq!(ANSWER, got);
}
#[allow(dead_code)]
fn main() {
println!("{}", run(io::stdin().read_to_string().unwrap()).connect("\n"));
}
fn run(mut seq: String) -> Vec<String> {
let ilen = seq.len();
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, NoExpand(""));
let seq_arc = Arc::new(seq.clone()); // copy before it moves
let clen = seq.len();
let mut seqlen = Future::spawn(move|| {
let substs = vec![
(regex!("B"), "(c|g|t)"),
(regex!("D"), "(a|g|t)"),
(regex!("H"), "(a|c|t)"),
(regex!("K"), "(g|t)"),
(regex!("M"), "(a|c)"),
(regex!("N"), "(a|c|g|t)"),
(regex!("R"), "(a|g)"),
(regex!("S"), "(c|g)"),
(regex!("V"), "(a|c|g)"),
(regex!("W"), "(a|t)"),
(regex!("Y"), "(c|t)"),
];
let mut seq = seq;
for (re, replacement) in substs.into_iter() {
seq = re.replace_all(&seq, NoExpand(replacement));
}
seq.len()
});
let variants = vec![
regex!("agggtaaa|tttaccct"),
regex!("[cgt]gggtaaa|tttaccc[acg]"),
regex!("a[act]ggtaaa|tttacc[agt]t"),
regex!("ag[act]gtaaa|tttac[agt]ct"),
regex!("agg[act]taaa|ttta[agt]cct"),
regex!("aggg[acg]aaa|ttt[cgt]ccct"),
regex!("agggt[cgt]aa|tt[acg]accct"),
regex!("agggta[cgt]a|t[acg]taccct"),
regex!("agggtaa[cgt]|[acg]ttaccct"),
];
let (mut variant_strs, mut counts) = (vec!(), vec!());
for variant in variants.into_iter() {
let seq_arc_copy = seq_arc.clone();
variant_strs.push(variant.to_string());
counts.push(Future::spawn(move|| {
variant.find_iter(&seq_arc_copy).count()
}));
}
let mut olines = Vec::new();
for (i, variant) in variant_strs.iter().enumerate() {
olines.push(format!("{} {}", variant, counts[i].get()));
}
olines.push("".to_string());
olines.push(format!("{}", ilen));
olines.push(format!("{}", clen));
olines.push(format!("{}", seqlen.get()));
olines
}

View File

@ -8,10 +8,8 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// ignore-tidy-linelength
// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests'
// on 2014-04-23 01:33:36.539280.
// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
// on 2015-02-28 11:00:00.161706.
// Tests from basic.dat
mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18)));

View File

@ -0,0 +1,28 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(collections, core, test)]
extern crate regex;
extern crate test;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
// native and dynamic regexes.
macro_rules! regex(
($re:expr) => (
match ::regex::Regex::new($re) {
Ok(re) => re,
Err(err) => panic!("{}", err),
}
);
);
mod tests;

View File

@ -0,0 +1,18 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(collections, plugin, test)]
#![plugin(regex_macros)]
extern crate regex;
extern crate test;
mod tests;
mod native_static;

View File

@ -270,4 +270,5 @@ mat!(uni_boundary_ogham, r"\d\b", "6", Some((0, 1)));
// A whole mess of tests from Glenn Fowler's regex test suite.
// Generated by the 'src/etc/regex-match-tests' program.
#[path = "matches.rs"]
mod matches;

107
scripts/regex-match-tests.py Executable file
View File

@ -0,0 +1,107 @@
#!/usr/bin/env python2
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
from __future__ import absolute_import, division, print_function
import argparse
import datetime
import os.path as path
def print_tests(tests):
print('\n'.join([test_tostr(t) for t in tests]))
def read_tests(f):
basename, _ = path.splitext(path.basename(f))
tests = []
for lineno, line in enumerate(open(f), 1):
fields = filter(None, map(str.strip, line.split('\t')))
if not (4 <= len(fields) <= 5) \
or 'E' not in fields[0] or fields[0][0] == '#':
continue
opts, pat, text, sgroups = fields[0:4]
groups = [] # groups as integer ranges
if sgroups == 'NOMATCH':
groups = [None]
elif ',' in sgroups:
noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
for g in noparen:
s, e = map(str.strip, g.split(','))
if s == '?' and e == '?':
groups.append(None)
else:
groups.append((int(s), int(e)))
else:
# This skips tests that should result in an error.
# There aren't many, so I think we can just capture those
# manually. Possibly fix this in future.
continue
if pat == 'SAME':
pat = tests[-1][1]
if '$' in opts:
pat = pat.decode('string_escape')
text = text.decode('string_escape')
if 'i' in opts:
pat = '(?i)%s' % pat
name = '%s_%d' % (basename, lineno)
tests.append((name, pat, text, groups))
return tests
def test_tostr(t):
lineno, pat, text, groups = t
options = map(group_tostr, groups)
return 'mat!(match_%s, r"%s", r"%s", %s);' \
% (lineno, pat, '' if text == "NULL" else text, ', '.join(options))
def group_tostr(g):
if g is None:
return 'None'
else:
return 'Some((%d, %d))' % (g[0], g[1])
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Generate match tests from an AT&T POSIX test file.')
aa = parser.add_argument
aa('files', nargs='+',
help='A list of dat AT&T POSIX test files. See src/testdata')
args = parser.parse_args()
tests = []
for f in args.files:
tests += read_tests(f)
tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
// on {date}.
'''
print(tpl.format(date=str(datetime.datetime.now())))
for f in args.files:
print('// Tests from %s' % path.basename(f))
print_tests(read_tests(f))
print('')

View File

@ -7,8 +7,6 @@
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// ignore-lexer-test FIXME #15679
//! This crate provides a native implementation of regular expressions that is
//! heavily based on RE2 both in syntax and in implementation. Notably,
@ -21,6 +19,9 @@
//! support and exhaustively lists the supported syntax. For more specific
//! details on the API, please see the documentation for the `Regex` type.
//!
//! This crates is [on crates.io](https://crates.io/crates/regex) and can be
//! used by adding `regex` to your dependencies in your projects `Cargo.toml`.
//!
//! # First example: find a date
//!
//! General use of regular expressions in this package involves compiling an
@ -57,16 +58,10 @@
//! given expression to native Rust code, which makes it much faster for
//! searching text.
//!
//! Since `regex!` provides compiled regular expressions that are both safer
//! and faster to use, you should use them whenever possible. The only
//! requirement for using them is that you have a string literal corresponding
//! to your expression. Otherwise, it is indistinguishable from an expression
//! compiled at runtime with `Regex::new`.
//!
//! To use the `regex!` macro, you must enable the `phase` feature and import
//! To use the `regex!` macro, you must enable the `plugin` feature and import
//! the `regex_macros` crate as a syntax extension:
//!
//! ```rust
//! ```ignore
//! #![feature(plugin)]
//! #![plugin(regex_macros)]
//! extern crate regex;
@ -88,6 +83,21 @@
//! expressions, but 100+ calls to `regex!` will probably result in a
//! noticeably bigger binary.
//!
//! **NOTE**: This is implemented using a compiler plugin, which will not be
//! available on the Rust 1.0 beta/stable channels. Therefore, you'll only
//! be able to use `regex!` on the nightlies. If you want to retain the
//! `regex!` macro, you can cheat and define this:
//!
//! ```rust
//! macro_rules! regex(
//! ($s:expr) => (regex::Regex::new($s).unwrap());
//! );
//! ```
//!
//! But this just replaces native regexes with dynamic regexes under the hood.
//! Moreover, this will cause your program to panic *at runtime* if an invalid
//! regular expression is given.
//!
//! # Example: iterating over capture groups
//!
//! This crate provides convenient iterators for matching an expression
@ -96,10 +106,9 @@
//! them by their component pieces:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(\d{4})-(\d{2})-(\d{2})");
//! let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
//! let text = "2012-03-14, 2013-01-01 and 2014-07-05";
//! for cap in re.captures_iter(text) {
//! println!("Month: {} Day: {} Year: {}",
@ -124,10 +133,9 @@
//! in our replacement text:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})");
//! let re = Regex::new(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})").unwrap();
//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
//! let after = re.replace_all(before, "$m/$d/$y");
//! assert_eq!(after.as_slice(), "03/14/2012, 01/01/2013 and 07/05/2014");
@ -171,10 +179,9 @@
//! directly in your expression:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(?i)Δ+");
//! let re = Regex::new(r"(?i)Δ+").unwrap();
//! assert_eq!(re.find("ΔδΔ"), Some((0, 6)));
//! # }
//! ```
@ -184,10 +191,9 @@
//! Cherokee letters:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"[\pN\p{Greek}\p{Cherokee}]+");
//! let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
//! assert_eq!(re.find("abcΔβγδⅡxyz"), Some((3, 23)));
//! # }
//! ```
@ -281,10 +287,9 @@
//! expression:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(?i)a+(?-i)b+");
//! let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
//! let cap = re.captures("AaAaAbbBBBb").unwrap();
//! assert_eq!(cap.at(0), Some("AaAaAbb"));
//! # }
@ -401,8 +406,10 @@ pub mod native {
// On the bright side, `rustdoc` lets us hide this from the public API
// documentation.
pub use compile::Program;
pub use compile::Inst::{Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd,
EmptyWordBoundary, Save, Jump, Split};
pub use compile::Inst::{
Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd,
EmptyWordBoundary, Save, Jump, Split,
};
pub use parse::{
FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
FLAG_SWAP_GREED, FLAG_NEGATED,
@ -411,5 +418,7 @@ pub mod native {
pub use re::Regex::{Dynamic, Native};
pub use vm::{CharReader, find_prefix};
pub use vm::MatchKind::{self, Exists, Location, Submatches};
pub use vm::StepState::{self, StepMatchEarlyReturn, StepMatch, StepContinue};
pub use vm::StepState::{
self, StepMatchEarlyReturn, StepMatch, StepContinue,
};
}

View File

@ -8,11 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use self::Ast::*;
use self::Repeater::*;
use self::Greed::*;
use self::BuildAst::*;
use std::char;
use std::cmp;
use std::fmt;
@ -22,6 +17,11 @@ use std::num;
/// Static data containing Unicode ranges for general categories and scripts.
use unicode::regex::{UNICODE_CLASSES, PERLD, PERLS, PERLW};
use self::Ast::*;
use self::Repeater::*;
use self::Greed::*;
use self::BuildAst::*;
/// The maximum number of repetitions allowed with the `{n,m}` syntax.
static MAX_REPEAT: usize = 1000;

View File

@ -44,9 +44,6 @@ pub fn quote(text: &str) -> String {
///
/// To find submatches, split or replace text, you'll need to compile an
/// expression first.
///
/// Note that you should prefer the `regex!` macro when possible. For example,
/// `regex!("...").is_match("...")`.
pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
Regex::new(regex).map(|r| r.is_match(text))
}
@ -78,33 +75,9 @@ pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
///
/// ```rust
/// # use regex::Regex;
/// let re = match Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}") {
/// Ok(re) => re,
/// Err(err) => panic!("{}", err),
/// };
/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
/// assert_eq!(re.find("phone: 111-222-3333"), Some((7, 19)));
/// ```
///
/// You can also use the `regex!` macro to compile a regular expression when
/// you compile your program:
///
/// ```rust
/// #![feature(plugin)]
/// #![plugin(regex_macros)]
/// extern crate regex;
///
/// fn main() {
/// let re = regex!(r"\d+");
/// assert_eq!(re.find("123 abc"), Some((0, 3)));
/// }
/// ```
///
/// Given an incorrect regular expression, `regex!` will cause the Rust
/// compiler to produce a compile time error.
/// Note that `regex!` will compile the expression to native Rust code, which
/// makes it much faster when searching text.
/// More details about the `regex!` macro can be found in the `regex` crate
/// documentation.
#[derive(Clone)]
pub enum Regex {
// The representation of `Regex` is exported to support the `regex!`
@ -163,9 +136,6 @@ impl Regex {
/// Compiles a dynamic regular expression. Once compiled, it can be
/// used repeatedly to search, split or replace text in a string.
///
/// When possible, you should prefer the `regex!` macro since it is
/// safer and always faster.
///
/// If an invalid expression is given, then an error is returned.
pub fn new(re: &str) -> Result<Regex, parse::Error> {
let ast = try!(parse::parse(re));
@ -185,11 +155,10 @@ impl Regex {
/// characters:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let matched = regex!(r"\b\w{13}\b").is_match(text);
/// let matched = Regex::new(r"\b\w{13}\b").unwrap().is_match(text);
/// assert!(matched);
/// # }
/// ```
@ -210,11 +179,10 @@ impl Regex {
/// characters:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let pos = regex!(r"\b\w{13}\b").find(text);
/// let pos = Regex::new(r"\b\w{13}\b").unwrap().find(text);
/// assert_eq!(pos, Some((2, 15)));
/// # }
/// ```
@ -237,11 +205,10 @@ impl Regex {
/// characters:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "Retroactively relinquishing remunerations is reprehensible.";
/// for pos in regex!(r"\b\w{13}\b").find_iter(text) {
/// for pos in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
/// println!("{:?}", pos);
/// }
/// // Output:
@ -276,10 +243,9 @@ impl Regex {
/// year separately.
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"'([^']+)'\s+\((\d{4})\)");
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
/// let caps = re.captures(text).unwrap();
/// assert_eq!(caps.at(1), Some("Citizen Kane"));
@ -294,10 +260,10 @@ impl Regex {
/// We can make this example a bit clearer by using *named* capture groups:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
/// let caps = re.captures(text).unwrap();
/// assert_eq!(caps.name("title"), Some("Citizen Kane"));
@ -327,10 +293,10 @@ impl Regex {
/// some text, where the movie is formatted like "'Title' (xxxx)":
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
/// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
/// for caps in re.captures_iter(text) {
/// println!("Movie: {:?}, Released: {:?}", caps.name("title"), caps.name("year"));
@ -363,10 +329,9 @@ impl Regex {
/// To split a string delimited by arbitrary amounts of spaces or tabs:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"[ \t]+");
/// let re = Regex::new(r"[ \t]+").unwrap();
/// let fields: Vec<&str> = re.split("a b \t c\td e").collect();
/// assert_eq!(fields, vec!("a", "b", "c", "d", "e"));
/// # }
@ -393,10 +358,9 @@ impl Regex {
/// Get the first two words in some text:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"\W+");
/// let re = Regex::new(r"\W+").unwrap();
/// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
/// assert_eq!(fields, vec!("Hey", "How", "are you?"));
/// # }
@ -423,10 +387,9 @@ impl Regex {
/// In typical usage, this can just be a normal string:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!("[^01]+");
/// let re = Regex::new("[^01]+").unwrap();
/// assert_eq!(re.replace("1078910", ""), "1010");
/// # }
/// ```
@ -437,10 +400,9 @@ impl Regex {
/// submatches easily:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # use regex::Captures; fn main() {
/// let re = regex!(r"([^,\s]+),\s+(\S+)");
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
/// format!("{} {}", caps.at(2).unwrap_or(""), caps.at(1).unwrap_or(""))
/// });
@ -454,10 +416,9 @@ impl Regex {
/// with named capture groups:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)");
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", "$first $last");
/// assert_eq!(result, "Bruce Springsteen");
/// # }
@ -471,12 +432,11 @@ impl Regex {
/// `NoExpand`:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// use regex::NoExpand;
///
/// let re = regex!(r"(?P<last>[^,\s]+),\s+(\S+)");
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
/// assert_eq!(result, "$2 $last");
/// # }

14
src/testdata/README vendored
View File

@ -1,4 +1,4 @@
Test data was taken from the Go distribution, which was in turn taken from the
Test data was taken from the Go distribution, which was in turn taken from the
testregex test suite:
http://www2.research.att.com/~astopen/testregex/testregex.html
@ -6,12 +6,12 @@ testregex test suite:
The LICENSE in this directory corresponds to the LICENSE that the data was
released under.
The tests themselves were modified for RE2/Go. A couple were modified further
by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
have been a bad idea, but I think being consistent with an established Regex
The tests themselves were modified for RE2/Go. A couple were modified further
by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
have been a bad idea, but I think being consistent with an established Regex
library is worth something.
Note that these files are read by 'src/etc/regexp-match-tests' and turned into
Rust tests found in 'src/libregexp/tests/matches.rs'.
Note that these files are read by 'scripts/regex-match-tests.py' and turned
into Rust tests found in 'regex_macros/tests/matches.rs'.