third_party_rust_regex/scripts/regex-match-tests.py
Andrew Gallant 2d0e77a457 Divorce regex_macros from regex.
Fixes #31 and #33.

There are a number of related changes in this commit:

1. A script that generates the 'match' tests has been reintroduced.
2. The regex-dna shootout benchmark has been updated.
3. Running `cargo test` on the `regex` crate does not require
   `regex_macros`.
4. The documentation has been updated to use `Regex::new(...).unwrap()`
   instead of `regex!`. The emphasis on using `regex!` has been reduced,
   and a note about its unavailability in Rust 1.0 beta/stable has been
   added.
5. Updated Travis to test both `regex` and `regex_macros`.
2015-02-28 14:15:36 -05:00

108 lines
3.4 KiB
Python
Executable File

#!/usr/bin/env python2
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
from __future__ import absolute_import, division, print_function
import argparse
import datetime
import os.path as path
def print_tests(tests):
print('\n'.join([test_tostr(t) for t in tests]))
def read_tests(f):
basename, _ = path.splitext(path.basename(f))
tests = []
for lineno, line in enumerate(open(f), 1):
fields = filter(None, map(str.strip, line.split('\t')))
if not (4 <= len(fields) <= 5) \
or 'E' not in fields[0] or fields[0][0] == '#':
continue
opts, pat, text, sgroups = fields[0:4]
groups = [] # groups as integer ranges
if sgroups == 'NOMATCH':
groups = [None]
elif ',' in sgroups:
noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
for g in noparen:
s, e = map(str.strip, g.split(','))
if s == '?' and e == '?':
groups.append(None)
else:
groups.append((int(s), int(e)))
else:
# This skips tests that should result in an error.
# There aren't many, so I think we can just capture those
# manually. Possibly fix this in future.
continue
if pat == 'SAME':
pat = tests[-1][1]
if '$' in opts:
pat = pat.decode('string_escape')
text = text.decode('string_escape')
if 'i' in opts:
pat = '(?i)%s' % pat
name = '%s_%d' % (basename, lineno)
tests.append((name, pat, text, groups))
return tests
def test_tostr(t):
lineno, pat, text, groups = t
options = map(group_tostr, groups)
return 'mat!(match_%s, r"%s", r"%s", %s);' \
% (lineno, pat, '' if text == "NULL" else text, ', '.join(options))
def group_tostr(g):
if g is None:
return 'None'
else:
return 'Some((%d, %d))' % (g[0], g[1])
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Generate match tests from an AT&T POSIX test file.')
aa = parser.add_argument
aa('files', nargs='+',
help='A list of dat AT&T POSIX test files. See src/testdata')
args = parser.parse_args()
tests = []
for f in args.files:
tests += read_tests(f)
tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
// on {date}.
'''
print(tpl.format(date=str(datetime.datetime.now())))
for f in args.files:
print('// Tests from %s' % path.basename(f))
print_tests(read_tests(f))
print('')