From cd59accbf56cee1fb108f7bb2e8f3823f785be1f Mon Sep 17 00:00:00 2001 From: Howard Hinnant Date: Tue, 23 Jul 2013 16:18:04 +0000 Subject: [PATCH] Bill Fisher: This patch fixes a bug where std::regex in ECMAScript mode was ignoring capture groups inside lookahead assertions. For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a" git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@186954 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/regex | 23 +++-- .../re.alg.match/lookahead_capture.pass.cpp | 98 +++++++++++++++++++ 2 files changed, 114 insertions(+), 7 deletions(-) create mode 100644 test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp diff --git a/include/regex b/include/regex index 3ec2ff92e..bde3af7e4 100644 --- a/include/regex +++ b/include/regex @@ -2769,7 +2769,7 @@ private: void __push_end_marked_subexpression(unsigned); void __push_empty(); void __push_word_boundary(bool); - void __push_lookahead(const basic_regex&, bool); + void __push_lookahead(const basic_regex&, bool, unsigned); template bool @@ -2907,6 +2907,7 @@ class __lookahead typedef __owns_one_state<_CharT> base; basic_regex<_CharT, _Traits> __exp_; + unsigned __mexp_; bool __invert_; __lookahead(const __lookahead&); @@ -2915,8 +2916,8 @@ public: typedef _VSTD::__state<_CharT> __state; _LIBCPP_INLINE_VISIBILITY - __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s) - : base(__s), __exp_(__exp), __invert_(__invert) {} + __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s, unsigned __mexp) + : base(__s), __exp_(__exp), __invert_(__invert), __mexp_(__mexp) {} virtual void __exec(__state&) const; }; @@ -2935,6 +2936,9 @@ __lookahead<_CharT, _Traits>::__exec(__state& __s) const { __s.__do_ = __state::__accept_but_not_consume; __s.__node_ = this->first(); + for (unsigned __i = 1; __i < __m.size(); ++__i) { + __s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i]; + } } else { @@ -4168,7 +4172,9 @@ basic_regex<_CharT, _Traits>::__parse_assertion(_ForwardIterator __first, basic_regex __exp; __exp.__flags_ = __flags_; __temp = __exp.__parse(++__temp, __last); - __push_lookahead(_VSTD::move(__exp), false); + unsigned __mexp = __exp.__marked_count_; + __push_lookahead(_VSTD::move(__exp), false, __marked_count_); + __marked_count_ += __mexp; #ifndef _LIBCPP_NO_EXCEPTIONS if (__temp == __last || *__temp != ')') throw regex_error(regex_constants::error_paren); @@ -4181,7 +4187,9 @@ basic_regex<_CharT, _Traits>::__parse_assertion(_ForwardIterator __first, basic_regex __exp; __exp.__flags_ = __flags_; __temp = __exp.__parse(++__temp, __last); - __push_lookahead(_VSTD::move(__exp), true); + unsigned __mexp = __exp.__marked_count_; + __push_lookahead(_VSTD::move(__exp), true, __marked_count_); + __marked_count_ += __mexp; #ifndef _LIBCPP_NO_EXCEPTIONS if (__temp == __last || *__temp != ')') throw regex_error(regex_constants::error_paren); @@ -4759,10 +4767,11 @@ basic_regex<_CharT, _Traits>::__start_matching_list(bool __negate) template void basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp, - bool __invert) + bool __invert, + unsigned __mexp) { __end_->first() = new __lookahead<_CharT, _Traits>(__exp, __invert, - __end_->first()); + __end_->first(), __mexp); __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first()); } diff --git a/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp b/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp new file mode 100644 index 000000000..78e1e65c3 --- /dev/null +++ b/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp @@ -0,0 +1,98 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template +// bool +// regex_match(BidirectionalIterator first, BidirectionalIterator last, +// match_results& m, +// const basic_regex& e, +// regex_constants::match_flag_type flags = regex_constants::match_default); + +// std::regex in ECMAScript mode should not ignore capture groups inside lookahead assertions. +// For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a" + +#include +#include + +#include "test_iterators.h" + +int main() +{ + { + std::regex re{"^(?=(.))a$"}; + assert(re.mark_count() == 1); + + std::string s{"a"}; + std::smatch m; + assert(std::regex_match(s, m, re)); + assert(m.size() == 2); + assert(m[0] == "a"); + assert(m[1] == "a"); + } + + { + std::regex re{"^(a)(?=(.))(b)$"}; + assert(re.mark_count() == 3); + + std::string s{"ab"}; + std::smatch m; + assert(std::regex_match(s, m, re)); + assert(m.size() == 4); + assert(m[0] == "ab"); + assert(m[1] == "a"); + assert(m[2] == "b"); + assert(m[3] == "b"); + } + + { + std::regex re{"^(.)(?=(.)(?=.(.)))(...)$"}; + assert(re.mark_count() == 4); + + std::string s{"abcd"}; + std::smatch m; + assert(std::regex_match(s, m, re)); + assert(m.size() == 5); + assert(m[0] == "abcd"); + assert(m[1] == "a"); + assert(m[2] == "b"); + assert(m[3] == "d"); + assert(m[4] == "bcd"); + } + + { + std::regex re{"^(a)(?!([^b]))(.c)$"}; + assert(re.mark_count() == 3); + + std::string s{"abc"}; + std::smatch m; + assert(std::regex_match(s, m, re)); + assert(m.size() == 4); + assert(m[0] == "abc"); + assert(m[1] == "a"); + assert(m[2] == ""); + assert(m[3] == "bc"); + } + + { + std::regex re{"^(?!((b)))(?=(.))(?!(abc)).b$"}; + assert(re.mark_count() == 4); + + std::string s{"ab"}; + std::smatch m; + assert(std::regex_match(s, m, re)); + assert(m.size() == 5); + assert(m[0] == "ab"); + assert(m[1] == ""); + assert(m[2] == ""); + assert(m[3] == "a"); + assert(m[4] == ""); + } +}