mirror of
https://gitee.com/openharmony/third_party_rust_regex
synced 2025-04-12 23:50:30 +00:00
Divorce regex_macros from regex.
Fixes #31 and #33. There are a number of related changes in this commit: 1. A script that generates the 'match' tests has been reintroduced. 2. The regex-dna shootout benchmark has been updated. 3. Running `cargo test` on the `regex` crate does not require `regex_macros`. 4. The documentation has been updated to use `Regex::new(...).unwrap()` instead of `regex!`. The emphasis on using `regex!` has been reduced, and a note about its unavailability in Rust 1.0 beta/stable has been added. 5. Updated Travis to test both `regex` and `regex_macros`.
This commit is contained in:
parent
8b44176a91
commit
2d0e77a457
@ -4,6 +4,9 @@ script:
|
||||
- cargo build --verbose
|
||||
- cargo test --verbose
|
||||
- cargo doc
|
||||
- cargo bench --verbose
|
||||
- cargo test --verbose --manifest-path=regex_macros/Cargo.toml
|
||||
- cargo bench --verbose --manifest-path=regex_macros/Cargo.toml
|
||||
after_success: |
|
||||
[ $TRAVIS_BRANCH = master ] &&
|
||||
[ $TRAVIS_PULL_REQUEST = false ] &&
|
||||
|
13
Cargo.toml
13
Cargo.toml
@ -1,5 +1,4 @@
|
||||
[package]
|
||||
|
||||
name = "regex"
|
||||
version = "0.1.15"
|
||||
authors = ["The Rust Project Developers"]
|
||||
@ -13,12 +12,14 @@ An implementation of regular expressions for Rust.
|
||||
"""
|
||||
|
||||
[[test]]
|
||||
path = "tests/mod.rs"
|
||||
path = "regex_macros/tests/test_dynamic.rs"
|
||||
name = "all"
|
||||
|
||||
[[bench]]
|
||||
name = "all"
|
||||
path = "regex_macros/benches/bench_dynamic.rs"
|
||||
test = false
|
||||
bench = true
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.1"
|
||||
|
||||
[dev-dependencies.regex_macros]
|
||||
path = "regex_macros"
|
||||
version = "0.1.0"
|
||||
|
@ -1,5 +1,4 @@
|
||||
[package]
|
||||
|
||||
name = "regex_macros"
|
||||
version = "0.1.8"
|
||||
authors = ["The Rust Project Developers"]
|
||||
@ -14,6 +13,23 @@ An implementation of statically compiled regular expressions for Rust.
|
||||
name = "regex_macros"
|
||||
plugin = true
|
||||
|
||||
[[test]]
|
||||
path = "tests/test_native.rs"
|
||||
name = "all"
|
||||
|
||||
[[test]]
|
||||
path = "benches/shootout-regex-dna.rs"
|
||||
name = "shootout-regex-dna"
|
||||
|
||||
[[bench]]
|
||||
name = "all"
|
||||
path = "benches/bench_native.rs"
|
||||
test = false
|
||||
bench = true
|
||||
|
||||
[dependencies.regex]
|
||||
path = ".."
|
||||
version = "0.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.1"
|
||||
|
@ -10,8 +10,7 @@
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use std::iter::repeat;
|
||||
use stdtest::Bencher;
|
||||
|
||||
use test::Bencher;
|
||||
use rand::{Rng, thread_rng};
|
||||
use regex::{Regex, NoExpand};
|
||||
|
@ -8,19 +8,11 @@
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(core, plugin, test, collections)]
|
||||
#![plugin(regex_macros)]
|
||||
#![feature(core, test)]
|
||||
|
||||
extern crate rand;
|
||||
extern crate regex;
|
||||
extern crate "test" as stdtest;
|
||||
|
||||
#[path = "bench.rs"]
|
||||
mod native_bench;
|
||||
#[path = "tests.rs"]
|
||||
mod native_tests;
|
||||
|
||||
mod native_static;
|
||||
extern crate test;
|
||||
|
||||
// Due to macro scoping rules, this definition only applies for the modules
|
||||
// defined below. Effectively, it allows us to use the same tests for both
|
||||
@ -34,8 +26,4 @@ macro_rules! regex(
|
||||
);
|
||||
);
|
||||
|
||||
#[path = "bench.rs"]
|
||||
mod dynamic_bench;
|
||||
#[path = "tests.rs"]
|
||||
mod dynamic_tests;
|
||||
|
||||
mod bench;
|
18
regex_macros/benches/bench_native.rs
Normal file
18
regex_macros/benches/bench_native.rs
Normal file
@ -0,0 +1,18 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(core, plugin, test)]
|
||||
#![plugin(regex_macros)]
|
||||
|
||||
extern crate rand;
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
mod bench;
|
1671
regex_macros/benches/regexdna-input.txt
Normal file
1671
regex_macros/benches/regexdna-input.txt
Normal file
File diff suppressed because it is too large
Load Diff
132
regex_macros/benches/shootout-regex-dna.rs
Normal file
132
regex_macros/benches/shootout-regex-dna.rs
Normal file
@ -0,0 +1,132 @@
|
||||
// The Computer Language Benchmarks Game
|
||||
// http://benchmarksgame.alioth.debian.org/
|
||||
//
|
||||
// contributed by the Rust Project Developers
|
||||
|
||||
// Copyright (c) 2014 The Rust Project Developers
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in
|
||||
// the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
//
|
||||
// - Neither the name of "The Computer Language Benchmarks Game" nor
|
||||
// the name of "The Computer Language Shootout Benchmarks" nor the
|
||||
// names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior
|
||||
// written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
// OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#![feature(old_io, plugin, std_misc)]
|
||||
#![plugin(regex_macros)]
|
||||
extern crate regex;
|
||||
|
||||
use std::old_io as io;
|
||||
use std::sync::{Arc, Future};
|
||||
use regex::{NoExpand, Regex};
|
||||
|
||||
#[test]
|
||||
fn check() {
|
||||
static ANSWER: &'static str = "\
|
||||
agggtaaa|tttaccct 0
|
||||
[cgt]gggtaaa|tttaccc[acg] 3
|
||||
a[act]ggtaaa|tttacc[agt]t 9
|
||||
ag[act]gtaaa|tttac[agt]ct 8
|
||||
agg[act]taaa|ttta[agt]cct 10
|
||||
aggg[acg]aaa|ttt[cgt]ccct 3
|
||||
agggt[cgt]aa|tt[acg]accct 4
|
||||
agggta[cgt]a|t[acg]taccct 3
|
||||
agggtaa[cgt]|[acg]ttaccct 5
|
||||
|
||||
101745
|
||||
100000
|
||||
133640";
|
||||
static SEQ: &'static str = include_str!("regexdna-input.txt");
|
||||
let got = run(SEQ.to_string()).connect("\n");
|
||||
assert_eq!(ANSWER, got);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn main() {
|
||||
println!("{}", run(io::stdin().read_to_string().unwrap()).connect("\n"));
|
||||
}
|
||||
|
||||
fn run(mut seq: String) -> Vec<String> {
|
||||
let ilen = seq.len();
|
||||
|
||||
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, NoExpand(""));
|
||||
let seq_arc = Arc::new(seq.clone()); // copy before it moves
|
||||
let clen = seq.len();
|
||||
|
||||
let mut seqlen = Future::spawn(move|| {
|
||||
let substs = vec![
|
||||
(regex!("B"), "(c|g|t)"),
|
||||
(regex!("D"), "(a|g|t)"),
|
||||
(regex!("H"), "(a|c|t)"),
|
||||
(regex!("K"), "(g|t)"),
|
||||
(regex!("M"), "(a|c)"),
|
||||
(regex!("N"), "(a|c|g|t)"),
|
||||
(regex!("R"), "(a|g)"),
|
||||
(regex!("S"), "(c|g)"),
|
||||
(regex!("V"), "(a|c|g)"),
|
||||
(regex!("W"), "(a|t)"),
|
||||
(regex!("Y"), "(c|t)"),
|
||||
];
|
||||
let mut seq = seq;
|
||||
for (re, replacement) in substs.into_iter() {
|
||||
seq = re.replace_all(&seq, NoExpand(replacement));
|
||||
}
|
||||
seq.len()
|
||||
});
|
||||
|
||||
let variants = vec![
|
||||
regex!("agggtaaa|tttaccct"),
|
||||
regex!("[cgt]gggtaaa|tttaccc[acg]"),
|
||||
regex!("a[act]ggtaaa|tttacc[agt]t"),
|
||||
regex!("ag[act]gtaaa|tttac[agt]ct"),
|
||||
regex!("agg[act]taaa|ttta[agt]cct"),
|
||||
regex!("aggg[acg]aaa|ttt[cgt]ccct"),
|
||||
regex!("agggt[cgt]aa|tt[acg]accct"),
|
||||
regex!("agggta[cgt]a|t[acg]taccct"),
|
||||
regex!("agggtaa[cgt]|[acg]ttaccct"),
|
||||
];
|
||||
let (mut variant_strs, mut counts) = (vec!(), vec!());
|
||||
for variant in variants.into_iter() {
|
||||
let seq_arc_copy = seq_arc.clone();
|
||||
variant_strs.push(variant.to_string());
|
||||
counts.push(Future::spawn(move|| {
|
||||
variant.find_iter(&seq_arc_copy).count()
|
||||
}));
|
||||
}
|
||||
|
||||
let mut olines = Vec::new();
|
||||
for (i, variant) in variant_strs.iter().enumerate() {
|
||||
olines.push(format!("{} {}", variant, counts[i].get()));
|
||||
}
|
||||
olines.push("".to_string());
|
||||
olines.push(format!("{}", ilen));
|
||||
olines.push(format!("{}", clen));
|
||||
olines.push(format!("{}", seqlen.get()));
|
||||
olines
|
||||
}
|
@ -8,10 +8,8 @@
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// ignore-tidy-linelength
|
||||
|
||||
// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests'
|
||||
// on 2014-04-23 01:33:36.539280.
|
||||
// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
|
||||
// on 2015-02-28 11:00:00.161706.
|
||||
|
||||
// Tests from basic.dat
|
||||
mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18)));
|
28
regex_macros/tests/test_dynamic.rs
Normal file
28
regex_macros/tests/test_dynamic.rs
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(collections, core, test)]
|
||||
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
// Due to macro scoping rules, this definition only applies for the modules
|
||||
// defined below. Effectively, it allows us to use the same tests for both
|
||||
// native and dynamic regexes.
|
||||
macro_rules! regex(
|
||||
($re:expr) => (
|
||||
match ::regex::Regex::new($re) {
|
||||
Ok(re) => re,
|
||||
Err(err) => panic!("{}", err),
|
||||
}
|
||||
);
|
||||
);
|
||||
|
||||
mod tests;
|
18
regex_macros/tests/test_native.rs
Normal file
18
regex_macros/tests/test_native.rs
Normal file
@ -0,0 +1,18 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(collections, plugin, test)]
|
||||
#![plugin(regex_macros)]
|
||||
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
mod tests;
|
||||
mod native_static;
|
@ -270,4 +270,5 @@ mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
|
||||
|
||||
// A whole mess of tests from Glenn Fowler's regex test suite.
|
||||
// Generated by the 'src/etc/regex-match-tests' program.
|
||||
#[path = "matches.rs"]
|
||||
mod matches;
|
107
scripts/regex-match-tests.py
Executable file
107
scripts/regex-match-tests.py
Executable file
@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
import argparse
|
||||
import datetime
|
||||
import os.path as path
|
||||
|
||||
|
||||
def print_tests(tests):
|
||||
print('\n'.join([test_tostr(t) for t in tests]))
|
||||
|
||||
|
||||
def read_tests(f):
|
||||
basename, _ = path.splitext(path.basename(f))
|
||||
tests = []
|
||||
for lineno, line in enumerate(open(f), 1):
|
||||
fields = filter(None, map(str.strip, line.split('\t')))
|
||||
if not (4 <= len(fields) <= 5) \
|
||||
or 'E' not in fields[0] or fields[0][0] == '#':
|
||||
continue
|
||||
|
||||
opts, pat, text, sgroups = fields[0:4]
|
||||
groups = [] # groups as integer ranges
|
||||
if sgroups == 'NOMATCH':
|
||||
groups = [None]
|
||||
elif ',' in sgroups:
|
||||
noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
|
||||
for g in noparen:
|
||||
s, e = map(str.strip, g.split(','))
|
||||
if s == '?' and e == '?':
|
||||
groups.append(None)
|
||||
else:
|
||||
groups.append((int(s), int(e)))
|
||||
else:
|
||||
# This skips tests that should result in an error.
|
||||
# There aren't many, so I think we can just capture those
|
||||
# manually. Possibly fix this in future.
|
||||
continue
|
||||
|
||||
if pat == 'SAME':
|
||||
pat = tests[-1][1]
|
||||
if '$' in opts:
|
||||
pat = pat.decode('string_escape')
|
||||
text = text.decode('string_escape')
|
||||
if 'i' in opts:
|
||||
pat = '(?i)%s' % pat
|
||||
|
||||
name = '%s_%d' % (basename, lineno)
|
||||
tests.append((name, pat, text, groups))
|
||||
return tests
|
||||
|
||||
|
||||
def test_tostr(t):
|
||||
lineno, pat, text, groups = t
|
||||
options = map(group_tostr, groups)
|
||||
return 'mat!(match_%s, r"%s", r"%s", %s);' \
|
||||
% (lineno, pat, '' if text == "NULL" else text, ', '.join(options))
|
||||
|
||||
|
||||
def group_tostr(g):
|
||||
if g is None:
|
||||
return 'None'
|
||||
else:
|
||||
return 'Some((%d, %d))' % (g[0], g[1])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate match tests from an AT&T POSIX test file.')
|
||||
aa = parser.add_argument
|
||||
aa('files', nargs='+',
|
||||
help='A list of dat AT&T POSIX test files. See src/testdata')
|
||||
args = parser.parse_args()
|
||||
|
||||
tests = []
|
||||
for f in args.files:
|
||||
tests += read_tests(f)
|
||||
|
||||
tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
|
||||
// on {date}.
|
||||
'''
|
||||
print(tpl.format(date=str(datetime.datetime.now())))
|
||||
|
||||
for f in args.files:
|
||||
print('// Tests from %s' % path.basename(f))
|
||||
print_tests(read_tests(f))
|
||||
print('')
|
65
src/lib.rs
65
src/lib.rs
@ -7,8 +7,6 @@
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
//
|
||||
// ignore-lexer-test FIXME #15679
|
||||
|
||||
//! This crate provides a native implementation of regular expressions that is
|
||||
//! heavily based on RE2 both in syntax and in implementation. Notably,
|
||||
@ -21,6 +19,9 @@
|
||||
//! support and exhaustively lists the supported syntax. For more specific
|
||||
//! details on the API, please see the documentation for the `Regex` type.
|
||||
//!
|
||||
//! This crates is [on crates.io](https://crates.io/crates/regex) and can be
|
||||
//! used by adding `regex` to your dependencies in your projects `Cargo.toml`.
|
||||
//!
|
||||
//! # First example: find a date
|
||||
//!
|
||||
//! General use of regular expressions in this package involves compiling an
|
||||
@ -57,16 +58,10 @@
|
||||
//! given expression to native Rust code, which makes it much faster for
|
||||
//! searching text.
|
||||
//!
|
||||
//! Since `regex!` provides compiled regular expressions that are both safer
|
||||
//! and faster to use, you should use them whenever possible. The only
|
||||
//! requirement for using them is that you have a string literal corresponding
|
||||
//! to your expression. Otherwise, it is indistinguishable from an expression
|
||||
//! compiled at runtime with `Regex::new`.
|
||||
//!
|
||||
//! To use the `regex!` macro, you must enable the `phase` feature and import
|
||||
//! To use the `regex!` macro, you must enable the `plugin` feature and import
|
||||
//! the `regex_macros` crate as a syntax extension:
|
||||
//!
|
||||
//! ```rust
|
||||
//! ```ignore
|
||||
//! #![feature(plugin)]
|
||||
//! #![plugin(regex_macros)]
|
||||
//! extern crate regex;
|
||||
@ -88,6 +83,21 @@
|
||||
//! expressions, but 100+ calls to `regex!` will probably result in a
|
||||
//! noticeably bigger binary.
|
||||
//!
|
||||
//! **NOTE**: This is implemented using a compiler plugin, which will not be
|
||||
//! available on the Rust 1.0 beta/stable channels. Therefore, you'll only
|
||||
//! be able to use `regex!` on the nightlies. If you want to retain the
|
||||
//! `regex!` macro, you can cheat and define this:
|
||||
//!
|
||||
//! ```rust
|
||||
//! macro_rules! regex(
|
||||
//! ($s:expr) => (regex::Regex::new($s).unwrap());
|
||||
//! );
|
||||
//! ```
|
||||
//!
|
||||
//! But this just replaces native regexes with dynamic regexes under the hood.
|
||||
//! Moreover, this will cause your program to panic *at runtime* if an invalid
|
||||
//! regular expression is given.
|
||||
//!
|
||||
//! # Example: iterating over capture groups
|
||||
//!
|
||||
//! This crate provides convenient iterators for matching an expression
|
||||
@ -96,10 +106,9 @@
|
||||
//! them by their component pieces:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
//! # extern crate regex;
|
||||
//! # extern crate regex; use regex::Regex;
|
||||
//! # fn main() {
|
||||
//! let re = regex!(r"(\d{4})-(\d{2})-(\d{2})");
|
||||
//! let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
|
||||
//! let text = "2012-03-14, 2013-01-01 and 2014-07-05";
|
||||
//! for cap in re.captures_iter(text) {
|
||||
//! println!("Month: {} Day: {} Year: {}",
|
||||
@ -124,10 +133,9 @@
|
||||
//! in our replacement text:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
//! # extern crate regex;
|
||||
//! # extern crate regex; use regex::Regex;
|
||||
//! # fn main() {
|
||||
//! let re = regex!(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})");
|
||||
//! let re = Regex::new(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})").unwrap();
|
||||
//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
|
||||
//! let after = re.replace_all(before, "$m/$d/$y");
|
||||
//! assert_eq!(after.as_slice(), "03/14/2012, 01/01/2013 and 07/05/2014");
|
||||
@ -171,10 +179,9 @@
|
||||
//! directly in your expression:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
//! # extern crate regex;
|
||||
//! # extern crate regex; use regex::Regex;
|
||||
//! # fn main() {
|
||||
//! let re = regex!(r"(?i)Δ+");
|
||||
//! let re = Regex::new(r"(?i)Δ+").unwrap();
|
||||
//! assert_eq!(re.find("ΔδΔ"), Some((0, 6)));
|
||||
//! # }
|
||||
//! ```
|
||||
@ -184,10 +191,9 @@
|
||||
//! Cherokee letters:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
//! # extern crate regex;
|
||||
//! # extern crate regex; use regex::Regex;
|
||||
//! # fn main() {
|
||||
//! let re = regex!(r"[\pN\p{Greek}\p{Cherokee}]+");
|
||||
//! let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
|
||||
//! assert_eq!(re.find("abcΔᎠβⅠᏴγδⅡxyz"), Some((3, 23)));
|
||||
//! # }
|
||||
//! ```
|
||||
@ -281,10 +287,9 @@
|
||||
//! expression:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
//! # extern crate regex;
|
||||
//! # extern crate regex; use regex::Regex;
|
||||
//! # fn main() {
|
||||
//! let re = regex!(r"(?i)a+(?-i)b+");
|
||||
//! let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
|
||||
//! let cap = re.captures("AaAaAbbBBBb").unwrap();
|
||||
//! assert_eq!(cap.at(0), Some("AaAaAbb"));
|
||||
//! # }
|
||||
@ -401,8 +406,10 @@ pub mod native {
|
||||
// On the bright side, `rustdoc` lets us hide this from the public API
|
||||
// documentation.
|
||||
pub use compile::Program;
|
||||
pub use compile::Inst::{Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd,
|
||||
EmptyWordBoundary, Save, Jump, Split};
|
||||
pub use compile::Inst::{
|
||||
Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd,
|
||||
EmptyWordBoundary, Save, Jump, Split,
|
||||
};
|
||||
pub use parse::{
|
||||
FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
|
||||
FLAG_SWAP_GREED, FLAG_NEGATED,
|
||||
@ -411,5 +418,7 @@ pub mod native {
|
||||
pub use re::Regex::{Dynamic, Native};
|
||||
pub use vm::{CharReader, find_prefix};
|
||||
pub use vm::MatchKind::{self, Exists, Location, Submatches};
|
||||
pub use vm::StepState::{self, StepMatchEarlyReturn, StepMatch, StepContinue};
|
||||
pub use vm::StepState::{
|
||||
self, StepMatchEarlyReturn, StepMatch, StepContinue,
|
||||
};
|
||||
}
|
||||
|
10
src/parse.rs
10
src/parse.rs
@ -8,11 +8,6 @@
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use self::Ast::*;
|
||||
use self::Repeater::*;
|
||||
use self::Greed::*;
|
||||
use self::BuildAst::*;
|
||||
|
||||
use std::char;
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
@ -22,6 +17,11 @@ use std::num;
|
||||
/// Static data containing Unicode ranges for general categories and scripts.
|
||||
use unicode::regex::{UNICODE_CLASSES, PERLD, PERLS, PERLW};
|
||||
|
||||
use self::Ast::*;
|
||||
use self::Repeater::*;
|
||||
use self::Greed::*;
|
||||
use self::BuildAst::*;
|
||||
|
||||
/// The maximum number of repetitions allowed with the `{n,m}` syntax.
|
||||
static MAX_REPEAT: usize = 1000;
|
||||
|
||||
|
94
src/re.rs
94
src/re.rs
@ -44,9 +44,6 @@ pub fn quote(text: &str) -> String {
|
||||
///
|
||||
/// To find submatches, split or replace text, you'll need to compile an
|
||||
/// expression first.
|
||||
///
|
||||
/// Note that you should prefer the `regex!` macro when possible. For example,
|
||||
/// `regex!("...").is_match("...")`.
|
||||
pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
|
||||
Regex::new(regex).map(|r| r.is_match(text))
|
||||
}
|
||||
@ -78,33 +75,9 @@ pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
|
||||
///
|
||||
/// ```rust
|
||||
/// # use regex::Regex;
|
||||
/// let re = match Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}") {
|
||||
/// Ok(re) => re,
|
||||
/// Err(err) => panic!("{}", err),
|
||||
/// };
|
||||
/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
|
||||
/// assert_eq!(re.find("phone: 111-222-3333"), Some((7, 19)));
|
||||
/// ```
|
||||
///
|
||||
/// You can also use the `regex!` macro to compile a regular expression when
|
||||
/// you compile your program:
|
||||
///
|
||||
/// ```rust
|
||||
/// #![feature(plugin)]
|
||||
/// #![plugin(regex_macros)]
|
||||
/// extern crate regex;
|
||||
///
|
||||
/// fn main() {
|
||||
/// let re = regex!(r"\d+");
|
||||
/// assert_eq!(re.find("123 abc"), Some((0, 3)));
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Given an incorrect regular expression, `regex!` will cause the Rust
|
||||
/// compiler to produce a compile time error.
|
||||
/// Note that `regex!` will compile the expression to native Rust code, which
|
||||
/// makes it much faster when searching text.
|
||||
/// More details about the `regex!` macro can be found in the `regex` crate
|
||||
/// documentation.
|
||||
#[derive(Clone)]
|
||||
pub enum Regex {
|
||||
// The representation of `Regex` is exported to support the `regex!`
|
||||
@ -163,9 +136,6 @@ impl Regex {
|
||||
/// Compiles a dynamic regular expression. Once compiled, it can be
|
||||
/// used repeatedly to search, split or replace text in a string.
|
||||
///
|
||||
/// When possible, you should prefer the `regex!` macro since it is
|
||||
/// safer and always faster.
|
||||
///
|
||||
/// If an invalid expression is given, then an error is returned.
|
||||
pub fn new(re: &str) -> Result<Regex, parse::Error> {
|
||||
let ast = try!(parse::parse(re));
|
||||
@ -185,11 +155,10 @@ impl Regex {
|
||||
/// characters:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let text = "I categorically deny having triskaidekaphobia.";
|
||||
/// let matched = regex!(r"\b\w{13}\b").is_match(text);
|
||||
/// let matched = Regex::new(r"\b\w{13}\b").unwrap().is_match(text);
|
||||
/// assert!(matched);
|
||||
/// # }
|
||||
/// ```
|
||||
@ -210,11 +179,10 @@ impl Regex {
|
||||
/// characters:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let text = "I categorically deny having triskaidekaphobia.";
|
||||
/// let pos = regex!(r"\b\w{13}\b").find(text);
|
||||
/// let pos = Regex::new(r"\b\w{13}\b").unwrap().find(text);
|
||||
/// assert_eq!(pos, Some((2, 15)));
|
||||
/// # }
|
||||
/// ```
|
||||
@ -237,11 +205,10 @@ impl Regex {
|
||||
/// characters:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let text = "Retroactively relinquishing remunerations is reprehensible.";
|
||||
/// for pos in regex!(r"\b\w{13}\b").find_iter(text) {
|
||||
/// for pos in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
|
||||
/// println!("{:?}", pos);
|
||||
/// }
|
||||
/// // Output:
|
||||
@ -276,10 +243,9 @@ impl Regex {
|
||||
/// year separately.
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let re = regex!(r"'([^']+)'\s+\((\d{4})\)");
|
||||
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
|
||||
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
|
||||
/// let caps = re.captures(text).unwrap();
|
||||
/// assert_eq!(caps.at(1), Some("Citizen Kane"));
|
||||
@ -294,10 +260,10 @@ impl Regex {
|
||||
/// We can make this example a bit clearer by using *named* capture groups:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
|
||||
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
|
||||
/// .unwrap();
|
||||
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
|
||||
/// let caps = re.captures(text).unwrap();
|
||||
/// assert_eq!(caps.name("title"), Some("Citizen Kane"));
|
||||
@ -327,10 +293,10 @@ impl Regex {
|
||||
/// some text, where the movie is formatted like "'Title' (xxxx)":
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
|
||||
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
|
||||
/// .unwrap();
|
||||
/// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
|
||||
/// for caps in re.captures_iter(text) {
|
||||
/// println!("Movie: {:?}, Released: {:?}", caps.name("title"), caps.name("year"));
|
||||
@ -363,10 +329,9 @@ impl Regex {
|
||||
/// To split a string delimited by arbitrary amounts of spaces or tabs:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let re = regex!(r"[ \t]+");
|
||||
/// let re = Regex::new(r"[ \t]+").unwrap();
|
||||
/// let fields: Vec<&str> = re.split("a b \t c\td e").collect();
|
||||
/// assert_eq!(fields, vec!("a", "b", "c", "d", "e"));
|
||||
/// # }
|
||||
@ -393,10 +358,9 @@ impl Regex {
|
||||
/// Get the first two words in some text:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let re = regex!(r"\W+");
|
||||
/// let re = Regex::new(r"\W+").unwrap();
|
||||
/// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
|
||||
/// assert_eq!(fields, vec!("Hey", "How", "are you?"));
|
||||
/// # }
|
||||
@ -423,10 +387,9 @@ impl Regex {
|
||||
/// In typical usage, this can just be a normal string:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let re = regex!("[^01]+");
|
||||
/// let re = Regex::new("[^01]+").unwrap();
|
||||
/// assert_eq!(re.replace("1078910", ""), "1010");
|
||||
/// # }
|
||||
/// ```
|
||||
@ -437,10 +400,9 @@ impl Regex {
|
||||
/// submatches easily:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # use regex::Captures; fn main() {
|
||||
/// let re = regex!(r"([^,\s]+),\s+(\S+)");
|
||||
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
|
||||
/// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
|
||||
/// format!("{} {}", caps.at(2).unwrap_or(""), caps.at(1).unwrap_or(""))
|
||||
/// });
|
||||
@ -454,10 +416,9 @@ impl Regex {
|
||||
/// with named capture groups:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// let re = regex!(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)");
|
||||
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
|
||||
/// let result = re.replace("Springsteen, Bruce", "$first $last");
|
||||
/// assert_eq!(result, "Bruce Springsteen");
|
||||
/// # }
|
||||
@ -471,12 +432,11 @@ impl Regex {
|
||||
/// `NoExpand`:
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(plugin)] #![plugin(regex_macros)]
|
||||
/// # extern crate regex;
|
||||
/// # extern crate regex; use regex::Regex;
|
||||
/// # fn main() {
|
||||
/// use regex::NoExpand;
|
||||
///
|
||||
/// let re = regex!(r"(?P<last>[^,\s]+),\s+(\S+)");
|
||||
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
|
||||
/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
|
||||
/// assert_eq!(result, "$2 $last");
|
||||
/// # }
|
||||
|
14
src/testdata/README
vendored
14
src/testdata/README
vendored
@ -1,4 +1,4 @@
|
||||
Test data was taken from the Go distribution, which was in turn taken from the
|
||||
Test data was taken from the Go distribution, which was in turn taken from the
|
||||
testregex test suite:
|
||||
|
||||
http://www2.research.att.com/~astopen/testregex/testregex.html
|
||||
@ -6,12 +6,12 @@ testregex test suite:
|
||||
The LICENSE in this directory corresponds to the LICENSE that the data was
|
||||
released under.
|
||||
|
||||
The tests themselves were modified for RE2/Go. A couple were modified further
|
||||
by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
|
||||
(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
|
||||
have been a bad idea, but I think being consistent with an established Regex
|
||||
The tests themselves were modified for RE2/Go. A couple were modified further
|
||||
by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
|
||||
(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
|
||||
have been a bad idea, but I think being consistent with an established Regex
|
||||
library is worth something.
|
||||
|
||||
Note that these files are read by 'src/etc/regexp-match-tests' and turned into
|
||||
Rust tests found in 'src/libregexp/tests/matches.rs'.
|
||||
Note that these files are read by 'scripts/regex-match-tests.py' and turned
|
||||
into Rust tests found in 'regex_macros/tests/matches.rs'.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user