#!/usr/bin/env python2 from __future__ import absolute_import, division, print_function import argparse import datetime import os.path as path def print_tests(tests): print('\n'.join([test_tostr(t) for t in tests])) def read_tests(f): basename, _ = path.splitext(path.basename(f)) tests = [] for lineno, line in enumerate(open(f), 1): fields = filter(None, map(str.strip, line.split('\t'))) if not (4 <= len(fields) <= 5) \ or 'E' not in fields[0] or fields[0][0] == '#': continue opts, pat, text, sgroups = fields[0:4] groups = [] # groups as integer ranges if sgroups == 'NOMATCH': groups = [None] elif ',' in sgroups: noparen = map(lambda s: s.strip('()'), sgroups.split(')(')) for g in noparen: s, e = map(str.strip, g.split(',')) if s == '?' and e == '?': groups.append(None) else: groups.append((int(s), int(e))) else: # This skips tests that should result in an error. # There aren't many, so I think we can just capture those # manually. Possibly fix this in future. continue if pat == 'SAME': pat = tests[-1][1] if '$' in opts: pat = pat.decode('string_escape') text = text.decode('string_escape') if 'i' in opts: pat = '(?i)%s' % pat name = '%s_%d' % (basename, lineno) tests.append((name, pat, text, groups)) return tests def test_tostr(t): lineno, pat, text, groups = t options = map(group_tostr, groups) return 'mat!(match_%s, r"%s", r"%s", %s);' \ % (lineno, pat, '' if text == "NULL" else text, ', '.join(options)) def group_tostr(g): if g is None: return 'None' else: return 'Some((%d, %d))' % (g[0], g[1]) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Generate match tests from an AT&T POSIX test file.') aa = parser.add_argument aa('files', nargs='+', help='A list of dat AT&T POSIX test files. See src/testdata') args = parser.parse_args() tests = [] for f in args.files: tests += read_tests(f) tpl = ''' // DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py' // on {date}. '''.lstrip() print(tpl.format(date=str(datetime.datetime.now()))) for f in args.files: print('// Tests from %s' % path.basename(f)) print_tests(read_tests(f)) print('')