Divorce regex_macros from regex.

Fixes #31 and #33.

There are a number of related changes in this commit:

1. A script that generates the 'match' tests has been reintroduced.
2. The regex-dna shootout benchmark has been updated.
3. Running `cargo test` on the `regex` crate does not require
   `regex_macros`.
4. The documentation has been updated to use `Regex::new(...).unwrap()`
   instead of `regex!`. The emphasis on using `regex!` has been reduced,
   and a note about its unavailability in Rust 1.0 beta/stable has been
   added.
5. Updated Travis to test both `regex` and `regex_macros`.
This commit is contained in:
Andrew Gallant 2015-02-28 14:15:36 -05:00
parent 8b44176a91
commit 2d0e77a457
18 changed files with 2084 additions and 135 deletions

View File

@ -4,6 +4,9 @@ script:
- cargo build --verbose
- cargo test --verbose
- cargo doc
- cargo bench --verbose
- cargo test --verbose --manifest-path=regex_macros/Cargo.toml
- cargo bench --verbose --manifest-path=regex_macros/Cargo.toml
after_success: |
[ $TRAVIS_BRANCH = master ] &&
[ $TRAVIS_PULL_REQUEST = false ] &&

View File

@ -1,5 +1,4 @@
[package]
name = "regex"
version = "0.1.15"
authors = ["The Rust Project Developers"]
@ -13,12 +12,14 @@ An implementation of regular expressions for Rust.
"""
[[test]]
path = "tests/mod.rs"
path = "regex_macros/tests/test_dynamic.rs"
name = "all"
[[bench]]
name = "all"
path = "regex_macros/benches/bench_dynamic.rs"
test = false
bench = true
[dev-dependencies]
rand = "0.1"
[dev-dependencies.regex_macros]
path = "regex_macros"
version = "0.1.0"

View File

@ -1,5 +1,4 @@
[package]
name = "regex_macros"
version = "0.1.8"
authors = ["The Rust Project Developers"]
@ -14,6 +13,23 @@ An implementation of statically compiled regular expressions for Rust.
name = "regex_macros"
plugin = true
[[test]]
path = "tests/test_native.rs"
name = "all"
[[test]]
path = "benches/shootout-regex-dna.rs"
name = "shootout-regex-dna"
[[bench]]
name = "all"
path = "benches/bench_native.rs"
test = false
bench = true
[dependencies.regex]
path = ".."
version = "0.1.0"
[dev-dependencies]
rand = "0.1"

View File

@ -10,8 +10,7 @@
#![allow(non_snake_case)]
use std::iter::repeat;
use stdtest::Bencher;
use test::Bencher;
use rand::{Rng, thread_rng};
use regex::{Regex, NoExpand};

View File

@ -8,19 +8,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(core, plugin, test, collections)]
#![plugin(regex_macros)]
#![feature(core, test)]
extern crate rand;
extern crate regex;
extern crate "test" as stdtest;
#[path = "bench.rs"]
mod native_bench;
#[path = "tests.rs"]
mod native_tests;
mod native_static;
extern crate test;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
@ -34,8 +26,4 @@ macro_rules! regex(
);
);
#[path = "bench.rs"]
mod dynamic_bench;
#[path = "tests.rs"]
mod dynamic_tests;
mod bench;

View File

@ -0,0 +1,18 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(core, plugin, test)]
#![plugin(regex_macros)]
extern crate rand;
extern crate regex;
extern crate test;
mod bench;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,132 @@
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// contributed by the Rust Project Developers
// Copyright (c) 2014 The Rust Project Developers
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in
// the documentation and/or other materials provided with the
// distribution.
//
// - Neither the name of "The Computer Language Benchmarks Game" nor
// the name of "The Computer Language Shootout Benchmarks" nor the
// names of its contributors may be used to endorse or promote
// products derived from this software without specific prior
// written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.
#![feature(old_io, plugin, std_misc)]
#![plugin(regex_macros)]
extern crate regex;
use std::old_io as io;
use std::sync::{Arc, Future};
use regex::{NoExpand, Regex};
#[test]
fn check() {
static ANSWER: &'static str = "\
agggtaaa|tttaccct 0
[cgt]gggtaaa|tttaccc[acg] 3
a[act]ggtaaa|tttacc[agt]t 9
ag[act]gtaaa|tttac[agt]ct 8
agg[act]taaa|ttta[agt]cct 10
aggg[acg]aaa|ttt[cgt]ccct 3
agggt[cgt]aa|tt[acg]accct 4
agggta[cgt]a|t[acg]taccct 3
agggtaa[cgt]|[acg]ttaccct 5
101745
100000
133640";
static SEQ: &'static str = include_str!("regexdna-input.txt");
let got = run(SEQ.to_string()).connect("\n");
assert_eq!(ANSWER, got);
}
#[allow(dead_code)]
fn main() {
println!("{}", run(io::stdin().read_to_string().unwrap()).connect("\n"));
}
fn run(mut seq: String) -> Vec<String> {
let ilen = seq.len();
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, NoExpand(""));
let seq_arc = Arc::new(seq.clone()); // copy before it moves
let clen = seq.len();
let mut seqlen = Future::spawn(move|| {
let substs = vec![
(regex!("B"), "(c|g|t)"),
(regex!("D"), "(a|g|t)"),
(regex!("H"), "(a|c|t)"),
(regex!("K"), "(g|t)"),
(regex!("M"), "(a|c)"),
(regex!("N"), "(a|c|g|t)"),
(regex!("R"), "(a|g)"),
(regex!("S"), "(c|g)"),
(regex!("V"), "(a|c|g)"),
(regex!("W"), "(a|t)"),
(regex!("Y"), "(c|t)"),
];
let mut seq = seq;
for (re, replacement) in substs.into_iter() {
seq = re.replace_all(&seq, NoExpand(replacement));
}
seq.len()
});
let variants = vec![
regex!("agggtaaa|tttaccct"),
regex!("[cgt]gggtaaa|tttaccc[acg]"),
regex!("a[act]ggtaaa|tttacc[agt]t"),
regex!("ag[act]gtaaa|tttac[agt]ct"),
regex!("agg[act]taaa|ttta[agt]cct"),
regex!("aggg[acg]aaa|ttt[cgt]ccct"),
regex!("agggt[cgt]aa|tt[acg]accct"),
regex!("agggta[cgt]a|t[acg]taccct"),
regex!("agggtaa[cgt]|[acg]ttaccct"),
];
let (mut variant_strs, mut counts) = (vec!(), vec!());
for variant in variants.into_iter() {
let seq_arc_copy = seq_arc.clone();
variant_strs.push(variant.to_string());
counts.push(Future::spawn(move|| {
variant.find_iter(&seq_arc_copy).count()
}));
}
let mut olines = Vec::new();
for (i, variant) in variant_strs.iter().enumerate() {
olines.push(format!("{} {}", variant, counts[i].get()));
}
olines.push("".to_string());
olines.push(format!("{}", ilen));
olines.push(format!("{}", clen));
olines.push(format!("{}", seqlen.get()));
olines
}

View File

@ -8,10 +8,8 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// ignore-tidy-linelength
// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests'
// on 2014-04-23 01:33:36.539280.
// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
// on 2015-02-28 11:00:00.161706.
// Tests from basic.dat
mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18)));

View File

@ -0,0 +1,28 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(collections, core, test)]
extern crate regex;
extern crate test;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
// native and dynamic regexes.
macro_rules! regex(
($re:expr) => (
match ::regex::Regex::new($re) {
Ok(re) => re,
Err(err) => panic!("{}", err),
}
);
);
mod tests;

View File

@ -0,0 +1,18 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(collections, plugin, test)]
#![plugin(regex_macros)]
extern crate regex;
extern crate test;
mod tests;
mod native_static;

View File

@ -270,4 +270,5 @@ mat!(uni_boundary_ogham, r"\d\b", "6", Some((0, 1)));
// A whole mess of tests from Glenn Fowler's regex test suite.
// Generated by the 'src/etc/regex-match-tests' program.
#[path = "matches.rs"]
mod matches;

107
scripts/regex-match-tests.py Executable file
View File

@ -0,0 +1,107 @@
#!/usr/bin/env python2
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
from __future__ import absolute_import, division, print_function
import argparse
import datetime
import os.path as path
def print_tests(tests):
print('\n'.join([test_tostr(t) for t in tests]))
def read_tests(f):
basename, _ = path.splitext(path.basename(f))
tests = []
for lineno, line in enumerate(open(f), 1):
fields = filter(None, map(str.strip, line.split('\t')))
if not (4 <= len(fields) <= 5) \
or 'E' not in fields[0] or fields[0][0] == '#':
continue
opts, pat, text, sgroups = fields[0:4]
groups = [] # groups as integer ranges
if sgroups == 'NOMATCH':
groups = [None]
elif ',' in sgroups:
noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
for g in noparen:
s, e = map(str.strip, g.split(','))
if s == '?' and e == '?':
groups.append(None)
else:
groups.append((int(s), int(e)))
else:
# This skips tests that should result in an error.
# There aren't many, so I think we can just capture those
# manually. Possibly fix this in future.
continue
if pat == 'SAME':
pat = tests[-1][1]
if '$' in opts:
pat = pat.decode('string_escape')
text = text.decode('string_escape')
if 'i' in opts:
pat = '(?i)%s' % pat
name = '%s_%d' % (basename, lineno)
tests.append((name, pat, text, groups))
return tests
def test_tostr(t):
lineno, pat, text, groups = t
options = map(group_tostr, groups)
return 'mat!(match_%s, r"%s", r"%s", %s);' \
% (lineno, pat, '' if text == "NULL" else text, ', '.join(options))
def group_tostr(g):
if g is None:
return 'None'
else:
return 'Some((%d, %d))' % (g[0], g[1])
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Generate match tests from an AT&T POSIX test file.')
aa = parser.add_argument
aa('files', nargs='+',
help='A list of dat AT&T POSIX test files. See src/testdata')
args = parser.parse_args()
tests = []
for f in args.files:
tests += read_tests(f)
tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
// on {date}.
'''
print(tpl.format(date=str(datetime.datetime.now())))
for f in args.files:
print('// Tests from %s' % path.basename(f))
print_tests(read_tests(f))
print('')

View File

@ -7,8 +7,6 @@
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// ignore-lexer-test FIXME #15679
//! This crate provides a native implementation of regular expressions that is
//! heavily based on RE2 both in syntax and in implementation. Notably,
@ -21,6 +19,9 @@
//! support and exhaustively lists the supported syntax. For more specific
//! details on the API, please see the documentation for the `Regex` type.
//!
//! This crates is [on crates.io](https://crates.io/crates/regex) and can be
//! used by adding `regex` to your dependencies in your projects `Cargo.toml`.
//!
//! # First example: find a date
//!
//! General use of regular expressions in this package involves compiling an
@ -57,16 +58,10 @@
//! given expression to native Rust code, which makes it much faster for
//! searching text.
//!
//! Since `regex!` provides compiled regular expressions that are both safer
//! and faster to use, you should use them whenever possible. The only
//! requirement for using them is that you have a string literal corresponding
//! to your expression. Otherwise, it is indistinguishable from an expression
//! compiled at runtime with `Regex::new`.
//!
//! To use the `regex!` macro, you must enable the `phase` feature and import
//! To use the `regex!` macro, you must enable the `plugin` feature and import
//! the `regex_macros` crate as a syntax extension:
//!
//! ```rust
//! ```ignore
//! #![feature(plugin)]
//! #![plugin(regex_macros)]
//! extern crate regex;
@ -88,6 +83,21 @@
//! expressions, but 100+ calls to `regex!` will probably result in a
//! noticeably bigger binary.
//!
//! **NOTE**: This is implemented using a compiler plugin, which will not be
//! available on the Rust 1.0 beta/stable channels. Therefore, you'll only
//! be able to use `regex!` on the nightlies. If you want to retain the
//! `regex!` macro, you can cheat and define this:
//!
//! ```rust
//! macro_rules! regex(
//! ($s:expr) => (regex::Regex::new($s).unwrap());
//! );
//! ```
//!
//! But this just replaces native regexes with dynamic regexes under the hood.
//! Moreover, this will cause your program to panic *at runtime* if an invalid
//! regular expression is given.
//!
//! # Example: iterating over capture groups
//!
//! This crate provides convenient iterators for matching an expression
@ -96,10 +106,9 @@
//! them by their component pieces:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(\d{4})-(\d{2})-(\d{2})");
//! let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
//! let text = "2012-03-14, 2013-01-01 and 2014-07-05";
//! for cap in re.captures_iter(text) {
//! println!("Month: {} Day: {} Year: {}",
@ -124,10 +133,9 @@
//! in our replacement text:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})");
//! let re = Regex::new(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})").unwrap();
//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
//! let after = re.replace_all(before, "$m/$d/$y");
//! assert_eq!(after.as_slice(), "03/14/2012, 01/01/2013 and 07/05/2014");
@ -171,10 +179,9 @@
//! directly in your expression:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(?i)Δ+");
//! let re = Regex::new(r"(?i)Δ+").unwrap();
//! assert_eq!(re.find("ΔδΔ"), Some((0, 6)));
//! # }
//! ```
@ -184,10 +191,9 @@
//! Cherokee letters:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"[\pN\p{Greek}\p{Cherokee}]+");
//! let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
//! assert_eq!(re.find("abcΔβγδⅡxyz"), Some((3, 23)));
//! # }
//! ```
@ -281,10 +287,9 @@
//! expression:
//!
//! ```rust
//! # #![feature(plugin)] #![plugin(regex_macros)]
//! # extern crate regex;
//! # extern crate regex; use regex::Regex;
//! # fn main() {
//! let re = regex!(r"(?i)a+(?-i)b+");
//! let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
//! let cap = re.captures("AaAaAbbBBBb").unwrap();
//! assert_eq!(cap.at(0), Some("AaAaAbb"));
//! # }
@ -401,8 +406,10 @@ pub mod native {
// On the bright side, `rustdoc` lets us hide this from the public API
// documentation.
pub use compile::Program;
pub use compile::Inst::{Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd,
EmptyWordBoundary, Save, Jump, Split};
pub use compile::Inst::{
Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd,
EmptyWordBoundary, Save, Jump, Split,
};
pub use parse::{
FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
FLAG_SWAP_GREED, FLAG_NEGATED,
@ -411,5 +418,7 @@ pub mod native {
pub use re::Regex::{Dynamic, Native};
pub use vm::{CharReader, find_prefix};
pub use vm::MatchKind::{self, Exists, Location, Submatches};
pub use vm::StepState::{self, StepMatchEarlyReturn, StepMatch, StepContinue};
pub use vm::StepState::{
self, StepMatchEarlyReturn, StepMatch, StepContinue,
};
}

View File

@ -8,11 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use self::Ast::*;
use self::Repeater::*;
use self::Greed::*;
use self::BuildAst::*;
use std::char;
use std::cmp;
use std::fmt;
@ -22,6 +17,11 @@ use std::num;
/// Static data containing Unicode ranges for general categories and scripts.
use unicode::regex::{UNICODE_CLASSES, PERLD, PERLS, PERLW};
use self::Ast::*;
use self::Repeater::*;
use self::Greed::*;
use self::BuildAst::*;
/// The maximum number of repetitions allowed with the `{n,m}` syntax.
static MAX_REPEAT: usize = 1000;

View File

@ -44,9 +44,6 @@ pub fn quote(text: &str) -> String {
///
/// To find submatches, split or replace text, you'll need to compile an
/// expression first.
///
/// Note that you should prefer the `regex!` macro when possible. For example,
/// `regex!("...").is_match("...")`.
pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
Regex::new(regex).map(|r| r.is_match(text))
}
@ -78,33 +75,9 @@ pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
///
/// ```rust
/// # use regex::Regex;
/// let re = match Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}") {
/// Ok(re) => re,
/// Err(err) => panic!("{}", err),
/// };
/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
/// assert_eq!(re.find("phone: 111-222-3333"), Some((7, 19)));
/// ```
///
/// You can also use the `regex!` macro to compile a regular expression when
/// you compile your program:
///
/// ```rust
/// #![feature(plugin)]
/// #![plugin(regex_macros)]
/// extern crate regex;
///
/// fn main() {
/// let re = regex!(r"\d+");
/// assert_eq!(re.find("123 abc"), Some((0, 3)));
/// }
/// ```
///
/// Given an incorrect regular expression, `regex!` will cause the Rust
/// compiler to produce a compile time error.
/// Note that `regex!` will compile the expression to native Rust code, which
/// makes it much faster when searching text.
/// More details about the `regex!` macro can be found in the `regex` crate
/// documentation.
#[derive(Clone)]
pub enum Regex {
// The representation of `Regex` is exported to support the `regex!`
@ -163,9 +136,6 @@ impl Regex {
/// Compiles a dynamic regular expression. Once compiled, it can be
/// used repeatedly to search, split or replace text in a string.
///
/// When possible, you should prefer the `regex!` macro since it is
/// safer and always faster.
///
/// If an invalid expression is given, then an error is returned.
pub fn new(re: &str) -> Result<Regex, parse::Error> {
let ast = try!(parse::parse(re));
@ -185,11 +155,10 @@ impl Regex {
/// characters:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let matched = regex!(r"\b\w{13}\b").is_match(text);
/// let matched = Regex::new(r"\b\w{13}\b").unwrap().is_match(text);
/// assert!(matched);
/// # }
/// ```
@ -210,11 +179,10 @@ impl Regex {
/// characters:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let pos = regex!(r"\b\w{13}\b").find(text);
/// let pos = Regex::new(r"\b\w{13}\b").unwrap().find(text);
/// assert_eq!(pos, Some((2, 15)));
/// # }
/// ```
@ -237,11 +205,10 @@ impl Regex {
/// characters:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let text = "Retroactively relinquishing remunerations is reprehensible.";
/// for pos in regex!(r"\b\w{13}\b").find_iter(text) {
/// for pos in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
/// println!("{:?}", pos);
/// }
/// // Output:
@ -276,10 +243,9 @@ impl Regex {
/// year separately.
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"'([^']+)'\s+\((\d{4})\)");
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
/// let caps = re.captures(text).unwrap();
/// assert_eq!(caps.at(1), Some("Citizen Kane"));
@ -294,10 +260,10 @@ impl Regex {
/// We can make this example a bit clearer by using *named* capture groups:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
/// let caps = re.captures(text).unwrap();
/// assert_eq!(caps.name("title"), Some("Citizen Kane"));
@ -327,10 +293,10 @@ impl Regex {
/// some text, where the movie is formatted like "'Title' (xxxx)":
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
/// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
/// for caps in re.captures_iter(text) {
/// println!("Movie: {:?}, Released: {:?}", caps.name("title"), caps.name("year"));
@ -363,10 +329,9 @@ impl Regex {
/// To split a string delimited by arbitrary amounts of spaces or tabs:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"[ \t]+");
/// let re = Regex::new(r"[ \t]+").unwrap();
/// let fields: Vec<&str> = re.split("a b \t c\td e").collect();
/// assert_eq!(fields, vec!("a", "b", "c", "d", "e"));
/// # }
@ -393,10 +358,9 @@ impl Regex {
/// Get the first two words in some text:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"\W+");
/// let re = Regex::new(r"\W+").unwrap();
/// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
/// assert_eq!(fields, vec!("Hey", "How", "are you?"));
/// # }
@ -423,10 +387,9 @@ impl Regex {
/// In typical usage, this can just be a normal string:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!("[^01]+");
/// let re = Regex::new("[^01]+").unwrap();
/// assert_eq!(re.replace("1078910", ""), "1010");
/// # }
/// ```
@ -437,10 +400,9 @@ impl Regex {
/// submatches easily:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # use regex::Captures; fn main() {
/// let re = regex!(r"([^,\s]+),\s+(\S+)");
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
/// format!("{} {}", caps.at(2).unwrap_or(""), caps.at(1).unwrap_or(""))
/// });
@ -454,10 +416,9 @@ impl Regex {
/// with named capture groups:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// let re = regex!(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)");
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", "$first $last");
/// assert_eq!(result, "Bruce Springsteen");
/// # }
@ -471,12 +432,11 @@ impl Regex {
/// `NoExpand`:
///
/// ```rust
/// # #![feature(plugin)] #![plugin(regex_macros)]
/// # extern crate regex;
/// # extern crate regex; use regex::Regex;
/// # fn main() {
/// use regex::NoExpand;
///
/// let re = regex!(r"(?P<last>[^,\s]+),\s+(\S+)");
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
/// assert_eq!(result, "$2 $last");
/// # }

4
src/testdata/README vendored
View File

@ -12,6 +12,6 @@ by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
have been a bad idea, but I think being consistent with an established Regex
library is worth something.
Note that these files are read by 'src/etc/regexp-match-tests' and turned into
Rust tests found in 'src/libregexp/tests/matches.rs'.
Note that these files are read by 'scripts/regex-match-tests.py' and turned
into Rust tests found in 'regex_macros/tests/matches.rs'.