mirror of
https://github.com/openharmony/third_party_re2.git
synced 2026-07-01 09:20:39 -04:00
!49 merge master into master
upgrade: re2 to 2025-08-12 release Created-by: wang_jun_long Commit-by: wang_jun_long Merged-by: openharmony_ci Description: 描述 将 re2 库升级到上游 2025-08-12 版本。 变更摘要 替换核心源码文件(re2/、util/、lib/)为新版本 将 ABSL_FALLTHROUGH_INTENDED 替换为 C++17 标准 [[fallthrough]] re2.cc 中增加 NULL 模式处理 变更文件 文件 变更内容 re2/re2.h absl::optional → std::optional re2/re2.cc 增加 NULL 模式处理 re2/bitmap256.cc ABSL_FALLTHROUGH_INTENDED → [[fallthrough]] re2/dfa.cc ABSL_FALLTHROUGH_INTENDED → [[fallthrough]] re2/parse.cc ABSL_FALLTHROUGH_INTENDED → [[fallthrough]] re2/prog.cc/h 常规更新 re2/set.cc/h 常规更新 re2/walker-inl.h 常规更新 收益 减少依赖(不再需要 abseil-cpp) 包含上游安全修复和 bug 修复 更好地符合 C++17 标准规范 关联 Issue: #26 Co-Authored-By: Agent Signed-off-by: 王俊龙 wangjunlong8@h-partners.com See merge request: openharmony/third_party_re2!49
This commit is contained in:
+65
-56
@@ -3,13 +3,15 @@
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# https://github.com/google/oss-policies-info/blob/main/foundational-cxx-support-matrix.md
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
cmake_minimum_required(VERSION 3.22)
|
||||
|
||||
project(RE2 CXX)
|
||||
include(CMakePackageConfigHelpers)
|
||||
include(CTest)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
set(RE2_CXX_VERSION cxx_std_17)
|
||||
|
||||
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
||||
option(RE2_USE_ICU "build against ICU for full Unicode properties support" OFF)
|
||||
|
||||
@@ -23,7 +25,11 @@ option(RE2_BUILD_FRAMEWORK "build RE2 as a framework" OFF)
|
||||
|
||||
# CMake seems to have no way to enable/disable testing per subproject,
|
||||
# so we provide an option similar to BUILD_TESTING, but just for RE2.
|
||||
option(RE2_BUILD_TESTING "enable testing for RE2" OFF)
|
||||
# RE2_BUILD_TESTING builds and runs tests, and builds benchmarks
|
||||
# RE2_TEST and RE2_BENCHMARK provide more fine-grained control.
|
||||
option(RE2_TEST "build and run RE2 tests" OFF)
|
||||
option(RE2_BENCHMARK "build RE2 benchmarks" OFF)
|
||||
option(RE2_BUILD_TESTING "build and run RE2 tests; build RE2 benchmarks" OFF)
|
||||
|
||||
# The pkg-config Requires: field.
|
||||
set(REQUIRES)
|
||||
@@ -34,7 +40,7 @@ set(SONAME 11)
|
||||
|
||||
set(EXTRA_TARGET_LINK_LIBRARIES)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
|
||||
if(MSVC)
|
||||
if(MSVC_VERSION LESS 1920)
|
||||
message(FATAL_ERROR "you need Visual Studio 2019 or later")
|
||||
endif()
|
||||
@@ -133,7 +139,7 @@ set(RE2_HEADERS
|
||||
)
|
||||
|
||||
add_library(re2 ${RE2_SOURCES})
|
||||
target_compile_features(re2 PUBLIC cxx_std_14)
|
||||
target_compile_features(re2 PUBLIC ${RE2_CXX_VERSION})
|
||||
target_include_directories(re2 PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
|
||||
# CMake gives "set_target_properties called with incorrect number of arguments."
|
||||
# errors if we don't quote ${RE2_HEADERS}, so quote it despite prevailing style.
|
||||
@@ -163,14 +169,7 @@ if(RE2_USE_ICU)
|
||||
target_link_libraries(re2 PUBLIC ICU::uc)
|
||||
endif()
|
||||
|
||||
if(RE2_BUILD_TESTING)
|
||||
if(NOT TARGET GTest::gtest)
|
||||
find_package(GTest REQUIRED)
|
||||
endif()
|
||||
if(NOT TARGET benchmark::benchmark)
|
||||
find_package(benchmark REQUIRED)
|
||||
endif()
|
||||
|
||||
if(RE2_BUILD_TESTING OR RE2_TEST OR RE2_BENCHMARK)
|
||||
set(TESTING_SOURCES
|
||||
re2/testing/backtrack.cc
|
||||
re2/testing/dump.cc
|
||||
@@ -186,55 +185,65 @@ if(RE2_BUILD_TESTING)
|
||||
if(BUILD_SHARED_LIBS AND WIN32)
|
||||
target_compile_definitions(testing PRIVATE -DRE2_BUILD_TESTING_DLL)
|
||||
endif()
|
||||
target_compile_features(testing PUBLIC cxx_std_14)
|
||||
target_compile_features(testing PUBLIC ${RE2_CXX_VERSION})
|
||||
target_link_libraries(testing PUBLIC re2 GTest::gtest)
|
||||
|
||||
set(TEST_TARGETS
|
||||
charclass_test
|
||||
compile_test
|
||||
filtered_re2_test
|
||||
mimics_pcre_test
|
||||
parse_test
|
||||
possible_match_test
|
||||
re2_test
|
||||
re2_arg_test
|
||||
regexp_test
|
||||
required_prefix_test
|
||||
search_test
|
||||
set_test
|
||||
simplify_test
|
||||
string_generator_test
|
||||
|
||||
dfa_test
|
||||
exhaustive1_test
|
||||
exhaustive2_test
|
||||
exhaustive3_test
|
||||
exhaustive_test
|
||||
random_test
|
||||
)
|
||||
|
||||
set(BENCHMARK_TARGETS
|
||||
regexp_benchmark
|
||||
)
|
||||
|
||||
foreach(target ${TEST_TARGETS})
|
||||
add_executable(${target} re2/testing/${target}.cc)
|
||||
if(BUILD_SHARED_LIBS AND WIN32)
|
||||
target_compile_definitions(${target} PRIVATE -DRE2_CONSUME_TESTING_DLL)
|
||||
if(RE2_BUILD_TESTING OR RE2_TEST)
|
||||
if(NOT TARGET GTest::gtest)
|
||||
find_package(GTest REQUIRED)
|
||||
endif()
|
||||
target_compile_features(${target} PUBLIC cxx_std_14)
|
||||
target_link_libraries(${target} PUBLIC testing GTest::gtest_main ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
add_test(NAME ${target} COMMAND ${target})
|
||||
endforeach()
|
||||
|
||||
foreach(target ${BENCHMARK_TARGETS})
|
||||
add_executable(${target} re2/testing/${target}.cc)
|
||||
if(BUILD_SHARED_LIBS AND WIN32)
|
||||
target_compile_definitions(${target} PRIVATE -DRE2_CONSUME_TESTING_DLL)
|
||||
set(TEST_TARGETS
|
||||
charclass_test
|
||||
compile_test
|
||||
filtered_re2_test
|
||||
mimics_pcre_test
|
||||
parse_test
|
||||
possible_match_test
|
||||
re2_test
|
||||
re2_arg_test
|
||||
regexp_test
|
||||
required_prefix_test
|
||||
search_test
|
||||
set_test
|
||||
simplify_test
|
||||
string_generator_test
|
||||
|
||||
dfa_test
|
||||
exhaustive1_test
|
||||
exhaustive2_test
|
||||
exhaustive3_test
|
||||
exhaustive_test
|
||||
random_test
|
||||
)
|
||||
|
||||
foreach(target ${TEST_TARGETS})
|
||||
add_executable(${target} re2/testing/${target}.cc)
|
||||
if(BUILD_SHARED_LIBS AND WIN32)
|
||||
target_compile_definitions(${target} PRIVATE -DRE2_CONSUME_TESTING_DLL)
|
||||
endif()
|
||||
target_compile_features(${target} PUBLIC ${RE2_CXX_VERSION})
|
||||
target_link_libraries(${target} PUBLIC re2 testing GTest::gtest_main ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
add_test(NAME ${target} COMMAND ${target})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(RE2_BUILD_TESTING OR RE2_BENCHMARK)
|
||||
if(NOT TARGET benchmark::benchmark)
|
||||
find_package(benchmark REQUIRED)
|
||||
endif()
|
||||
target_compile_features(${target} PUBLIC cxx_std_14)
|
||||
target_link_libraries(${target} PUBLIC testing benchmark::benchmark_main ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
endforeach()
|
||||
set(BENCHMARK_TARGETS
|
||||
regexp_benchmark
|
||||
)
|
||||
foreach(target ${BENCHMARK_TARGETS})
|
||||
add_executable(${target} re2/testing/${target}.cc)
|
||||
if(BUILD_SHARED_LIBS AND WIN32)
|
||||
target_compile_definitions(${target} PRIVATE -DRE2_CONSUME_TESTING_DLL)
|
||||
endif()
|
||||
target_compile_features(${target} PUBLIC ${RE2_CXX_VERSION})
|
||||
target_link_libraries(${target} PUBLIC testing re2 benchmark::benchmark_main ${EXTRA_TARGET_LINK_LIBRARIES})
|
||||
endforeach()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
install(TARGETS re2
|
||||
|
||||
+10
-10
@@ -6,24 +6,24 @@
|
||||
|
||||
module(
|
||||
name = "re2",
|
||||
version = "2024-07-02",
|
||||
version = "2025-08-12",
|
||||
compatibility_level = 1,
|
||||
)
|
||||
|
||||
bazel_dep(name = "platforms", version = "0.0.10")
|
||||
bazel_dep(name = "apple_support", version = "1.15.1")
|
||||
bazel_dep(name = "rules_cc", version = "0.0.9")
|
||||
bazel_dep(name = "abseil-cpp", version = "20240116.2")
|
||||
bazel_dep(name = "rules_python", version = "0.33.2")
|
||||
bazel_dep(name = "pybind11_bazel", version = "2.12.0")
|
||||
bazel_dep(name = "platforms", version = "1.0.0")
|
||||
bazel_dep(name = "apple_support", version = "1.22.1")
|
||||
bazel_dep(name = "rules_cc", version = "0.1.4")
|
||||
bazel_dep(name = "abseil-cpp", version = "20250512.1")
|
||||
bazel_dep(name = "rules_python", version = "1.5.1")
|
||||
bazel_dep(name = "pybind11_bazel", version = "2.13.6")
|
||||
|
||||
# This is a temporary hack for `x64_x86_windows`.
|
||||
# TODO(junyer): Remove whenever no longer needed.
|
||||
cc_configure = use_extension("@bazel_tools//tools/cpp:cc_configure.bzl", "cc_configure_extension")
|
||||
cc_configure = use_extension("@rules_cc//cc:extensions.bzl", "cc_configure_extension", dev_dependency = True)
|
||||
use_repo(cc_configure, "local_config_cc")
|
||||
|
||||
# These dependencies will be ignored when the `re2` module is not
|
||||
# the root module (or when `--ignore_dev_dependency` is enabled).
|
||||
bazel_dep(name = "google_benchmark", version = "1.8.4", dev_dependency = True)
|
||||
bazel_dep(name = "googletest", version = "1.14.0.bcr.1", dev_dependency = True)
|
||||
bazel_dep(name = "google_benchmark", version = "1.9.4", dev_dependency = True)
|
||||
bazel_dep(name = "googletest", version = "1.17.0", dev_dependency = True)
|
||||
bazel_dep(name = "abseil-py", version = "2.1.0", dev_dependency = True)
|
||||
|
||||
@@ -49,7 +49,7 @@ CXX?=g++
|
||||
CXXFLAGS?=-O3 -g
|
||||
LDFLAGS?=
|
||||
# required
|
||||
RE2_CXXFLAGS?=-pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCABSL) $(CCICU) $(CCGTEST) $(CCBENCHMARK) $(CCPCRE)
|
||||
RE2_CXXFLAGS?=-std=c++17 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCABSL) $(CCICU) $(CCGTEST) $(CCBENCHMARK) $(CCPCRE)
|
||||
RE2_LDFLAGS?=-pthread $(LDABSL) $(LDICU) $(LDGTEST) $(LDBENCHMARK) $(LDPCRE)
|
||||
AR?=ar
|
||||
ARFLAGS?=rsc
|
||||
|
||||
+1
-1
@@ -3,7 +3,7 @@
|
||||
"Name": "re2",
|
||||
"License": "BSD 3-Clause License",
|
||||
"License File": "LICENSE",
|
||||
"Version Number": "2024-07-02",
|
||||
"Version Number": "2025-08-12",
|
||||
"Upstream URL": "https://github.com/google/re2",
|
||||
"Description": "RE2 is a fast, safe, thread-friendly alternative to backtracking regular expression engines like those used in PCRE, Perl, and Python. It is a C++ library.",
|
||||
"Dependencies": [ "abseil-cpp" ]
|
||||
|
||||
@@ -0,0 +1,259 @@
|
||||
# RE2, a regular expression library
|
||||
|
||||
RE2 is an efficient, principled regular expression library
|
||||
that has been used in production at Google and many other places
|
||||
since 2006.
|
||||
|
||||
_**Safety is RE2's primary goal.**_
|
||||
|
||||
RE2 was designed and implemented with an explicit goal of being able
|
||||
to handle regular expressions from untrusted users without risk.
|
||||
One of its primary guarantees is that the match time is linear in the
|
||||
length of the input string. It was also written with production concerns in mind:
|
||||
the parser, the compiler and the execution engines limit their memory usage
|
||||
by working within a configurable budget—failing gracefully when exhausted—and
|
||||
they avoid stack overflow by eschewing recursion.
|
||||
|
||||
It is not a goal to be faster than all other engines under all circumstances.
|
||||
Although RE2 guarantees a running time that is asymptotically linear in
|
||||
the length of the input, more complex expressions may incur larger constant factors;
|
||||
longer expressions increase the overhead required to handle those expressions safely.
|
||||
In a sense, RE2 is pessimistic where a backtracking engine is optimistic:
|
||||
A backtracking engine tests each alternative sequentially, making it fast when the first alternative is common.
|
||||
By contrast RE2 evaluates all alternatives in parallel, avoiding the performance penalty for the last alternative,
|
||||
at the cost of some overhead. This pessimism is what makes RE2 secure.
|
||||
|
||||
It is also not a goal to implement all of the features offered by Perl, PCRE and other engines.
|
||||
As a matter of principle, RE2 does not support constructs for which only backtracking solutions are known to exist.
|
||||
Thus, backreferences and look-around assertions are not supported.
|
||||
|
||||
For more information, please refer to Russ Cox's articles on regular expression theory and practice:
|
||||
|
||||
* [Regular Expression Matching Can Be Simple And Fast](https://swtch.com/~rsc/regexp/regexp1.html)
|
||||
* [Regular Expression Matching: the Virtual Machine Approach](https://swtch.com/~rsc/regexp/regexp2.html)
|
||||
* [Regular Expression Matching in the Wild](https://swtch.com/~rsc/regexp/regexp3.html)
|
||||
|
||||
### Syntax
|
||||
|
||||
In POSIX mode, RE2 accepts standard POSIX (egrep) syntax regular expressions.
|
||||
In Perl mode, RE2 accepts most Perl operators. The only excluded ones are
|
||||
those that require backtracking (and its potential for exponential runtime)
|
||||
to implement. These include backreferences (submatching is still okay)
|
||||
and generalized assertions.
|
||||
The [Syntax wiki page](https://github.com/google/re2/wiki/Syntax)
|
||||
documents the supported Perl-mode syntax in detail.
|
||||
The default is Perl mode.
|
||||
|
||||
### C++ API
|
||||
|
||||
RE2's native language is C++, although there are [ports and wrappers](#ports-and-wrappers) listed below.
|
||||
|
||||
#### Matching Interface
|
||||
|
||||
There are two basic operators:
|
||||
`RE2::FullMatch` requires the regexp to match the entire input text, and
|
||||
`RE2::PartialMatch` looks for a match for a substring of the input text,
|
||||
returning the leftmost-longest match in POSIX mode and the
|
||||
same match that Perl would have chosen in Perl mode.
|
||||
|
||||
Examples:
|
||||
|
||||
```cpp
|
||||
assert(RE2::FullMatch("hello", "h.*o"))
|
||||
assert(!RE2::FullMatch("hello", "e"))
|
||||
|
||||
assert(RE2::PartialMatch("hello", "h.*o"))
|
||||
assert(RE2::PartialMatch("hello", "e"))
|
||||
```
|
||||
|
||||
#### Submatch Extraction
|
||||
|
||||
Both matching functions take additional arguments in which submatches will be stored.
|
||||
The argument can be a `string*`, or an integer type, or the type `absl::string_view*`.
|
||||
(The `absl::string_view` type is very similar to the `std::string_view` type,
|
||||
but for historical reasons, RE2 uses the former.)
|
||||
A `string_view` is a pointer to the original input text, along with a count.
|
||||
It behaves like a string but doesn't carry its own storage.
|
||||
Like when using a pointer, when using a `string_view`
|
||||
you must be careful not to use it once the original text has been deleted or gone out of scope.
|
||||
|
||||
Examples:
|
||||
|
||||
```cpp
|
||||
// Successful parsing.
|
||||
int i;
|
||||
string s;
|
||||
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
|
||||
assert(s == "ruby");
|
||||
assert(i == 1234);
|
||||
|
||||
// Fails: "ruby" cannot be parsed as an integer.
|
||||
assert(!RE2::FullMatch("ruby", "(.+)", &i));
|
||||
|
||||
// Success; does not extract the number.
|
||||
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
|
||||
|
||||
// Success; skips NULL argument.
|
||||
assert(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", (void*)NULL, &i));
|
||||
|
||||
// Fails: integer overflow keeps value from being stored in i.
|
||||
assert(!RE2::FullMatch("ruby:123456789123", "(\\w+):(\\d+)", &s, &i));
|
||||
```
|
||||
|
||||
#### Pre-Compiled Regular Expressions
|
||||
|
||||
The examples above all recompile the regular expression on each call.
|
||||
Instead, you can compile it once to an RE2 object and reuse that object for each call.
|
||||
|
||||
Example:
|
||||
```cpp
|
||||
RE2 re("(\\w+):(\\d+)");
|
||||
assert(re.ok()); // compiled; if not, see re.error();
|
||||
|
||||
assert(RE2::FullMatch("ruby:1234", re, &s, &i));
|
||||
assert(RE2::FullMatch("ruby:1234", re, &s));
|
||||
assert(RE2::FullMatch("ruby:1234", re, (void*)NULL, &i));
|
||||
assert(!RE2::FullMatch("ruby:123456789123", re, &s, &i));
|
||||
```
|
||||
|
||||
#### Options
|
||||
|
||||
The constructor takes an optional second argument that can
|
||||
be used to change RE2's default options.
|
||||
For example, `RE2::Quiet` silences the error messages that are
|
||||
usually printed when a regular expression fails to parse:
|
||||
|
||||
```cpp
|
||||
RE2 re("(ab", RE2::Quiet); // don't write to stderr for parser failure
|
||||
assert(!re.ok()); // can check re.error() for details
|
||||
```
|
||||
|
||||
Other useful predefined options are `Latin1` (disable UTF-8) and `POSIX`
|
||||
(use POSIX syntax and leftmost longest matching).
|
||||
|
||||
You can also declare your own `RE2::Options` object and then configure it as you like.
|
||||
See the [header](https://github.com/google/re2/blob/main/re2/re2.h) for the full set of options.
|
||||
|
||||
#### Unicode Normalization
|
||||
|
||||
RE2 operates on Unicode code points: it makes no attempt at normalization.
|
||||
For example, the regular expression /ü/ (U+00FC, u with diaeresis)
|
||||
does not match the input "ü" (U+0075 U+0308, u followed by combining diaeresis).
|
||||
Normalization is a long, involved topic.
|
||||
The simplest solution, if you need such matches, is to normalize both the regular expressions
|
||||
and the input in a preprocessing step before using RE2.
|
||||
For more details on the general topic, see <https://www.unicode.org/reports/tr15/>.
|
||||
|
||||
#### Additional Tips and Tricks
|
||||
|
||||
For advanced usage, like constructing your own argument lists,
|
||||
or using RE2 as a lexer, or parsing hex, octal, and C-radix numbers,
|
||||
see [re2.h](https://github.com/google/re2/blob/main/re2/re2.h).
|
||||
|
||||
### Installation
|
||||
|
||||
RE2 can be built and installed using GNU make, CMake, or Bazel.
|
||||
The simplest installation instructions are:
|
||||
|
||||
make
|
||||
make test
|
||||
make benchmark
|
||||
make install
|
||||
make testinstall
|
||||
|
||||
Building RE2 requires a C++17 compiler and the [Abseil](https://github.com/abseil/abseil-cpp) library.
|
||||
Building the tests and benchmarks requires
|
||||
[GoogleTest](https://github.com/google/googletest)
|
||||
and [Benchmark](https://github.com/google/benchmark).
|
||||
To obtain those:
|
||||
|
||||
- Linux: `apt install libabsl-dev libgtest-dev libbenchmark-dev`
|
||||
- macOS: `brew install abseil googletest google-benchmark pkg-config-wrapper`
|
||||
- Windows: `vcpkg install abseil gtest benchmark` \
|
||||
or `vcpkg add port abseil gtest benchmark`
|
||||
|
||||
Once those are installed, the build has to be able to find them.
|
||||
If the standard Makefile has trouble, then switching to CMake can help:
|
||||
|
||||
rm -rf build
|
||||
cmake -DRE2_TEST=ON -DRE2_BENCHMARK=ON -S . -B build
|
||||
cd build
|
||||
make
|
||||
make test
|
||||
make install
|
||||
|
||||
When using CMake, with benchmarks enabled, `make test` builds and runs test binaries
|
||||
and builds a `regexp_benchmark` binary but does not run it.
|
||||
If you don't need the tests or benchmarks at all, you can omit the corresponding `-D` arguments,
|
||||
and then you don't need the GoogleTest or Benchmark dependencies either.
|
||||
|
||||
Another useful option is `-DRE2_USE_ICU=ON`, which adds a dependency on the
|
||||
ICU Unicode library but also extends the list of property names available in the `\p` and `\P` patterns.
|
||||
|
||||
CMake can also be used to generate Visual Studio and Xcode projects, as well as
|
||||
Cygwin, MinGW, and MSYS makefiles.
|
||||
|
||||
- Visual Studio users: You need Visual Studio 2019 or later.
|
||||
- Cygwin users: You must run CMake from the Cygwin command line, not the Windows command line.
|
||||
|
||||
If you are adding RE2 to your own CMake project,
|
||||
CMake has two ways to use a dependency: `add_subdirectory()`,
|
||||
which is when the dependency's **_sources_** are in a subdirectory of your project;
|
||||
and `find_package()`, which is when the dependency's
|
||||
**_binaries_** have been built and installed somewhere on your system.
|
||||
The Abseil documentation walks through the former [here](https://abseil.io/docs/cpp/quickstart-cmake)
|
||||
versus the latter [here](https://abseil.io/docs/cpp/tools/cmake-installs).
|
||||
Once you get Abseil working, getting RE2 working will be a very similar process and,
|
||||
either way, `target_link_libraries(… re2::re2)` should Just Work™.
|
||||
|
||||
If you are using [Bazel](https://bazel.io), it will handle the dependencies for you,
|
||||
although you still need to download Bazel,
|
||||
which you can do with [Bazelisk](https://github.com/bazelbuild/bazelisk).
|
||||
|
||||
go install github.com/bazelbuild/bazelisk@latest
|
||||
# or on mac: brew install bazelisk
|
||||
|
||||
bazelisk build :all
|
||||
bazelisk test :all
|
||||
|
||||
If you are using RE2 from another project, you need to make sure you are
|
||||
using at least C++17.
|
||||
See the RE2 [.bazelrc](https://github.com/google/re2/blob/main/.bazelrc) file for an example.
|
||||
|
||||
### Ports and Wrappers
|
||||
|
||||
RE2 is implemented in C++.
|
||||
|
||||
The official Python wrapper is [in the `python` directory](https://github.com/google/re2/tree/main/python)
|
||||
and [published on PyPI as `google-re2`](https://pypi.org/project/google-re2/).
|
||||
Note that there is also a PyPI `re2` but it is not by the RE2 authors and is unmaintained. Use `google-re2`.
|
||||
|
||||
There are also other unofficial wrappers:
|
||||
|
||||
- A C wrapper is at <https://github.com/marcomaggi/cre2/>.
|
||||
- A D wrapper is at <https://github.com/ShigekiKarita/re2d/> and [on DUB](https://code.dlang.org/packages/re2d).
|
||||
- An Erlang wrapper is at <https://github.com/dukesoferl/re2/> and [on Hex](https://hex.pm/packages/re2).
|
||||
- An Inferno wrapper is at <https://github.com/powerman/inferno-re2/>.
|
||||
- A Node.js wrapper is at <https://github.com/uhop/node-re2/> and [on NPM](https://www.npmjs.com/package/re2).
|
||||
- An OCaml wrapper is at <https://github.com/janestreet/re2/> and [on OPAM](https://opam.ocaml.org/packages/re2/).
|
||||
- A Perl wrapper is at <https://github.com/dgl/re-engine-RE2/> and [on CPAN](https://metacpan.org/pod/re::engine::RE2).
|
||||
- An R wrapper is at <https://github.com/girishji/re2/> and [on CRAN](https://cran.r-project.org/web/packages/re2/index.html).
|
||||
- A Ruby wrapper is at <https://github.com/mudge/re2/> and on RubyGems (rubygems.org).
|
||||
- A WebAssembly wrapper is at <https://github.com/google/re2-wasm/> and on NPM (npmjs.com).
|
||||
|
||||
[RE2J](https://github.com/google/re2j) is a port of the RE2 C++ code to pure Java,
|
||||
and [RE2JS](https://github.com/le0pard/re2js) is a port of RE2J to JavaScript.
|
||||
|
||||
The [Go `regexp` package](https://go.dev/pkg/regexp)
|
||||
and [Rust `regex` crate](https://docs.rs/regex)
|
||||
do not share code with RE2, but they follow the same principles,
|
||||
accept the same syntax, and provide the same efficiency guarantees.
|
||||
|
||||
### Contact
|
||||
|
||||
The [issue tracker](https://github.com/google/re2/issues) is the best place for discussions.
|
||||
|
||||
There is a [mailing list](https://groups.google.com/group/re2-dev) for keeping up with code changes.
|
||||
|
||||
Please read the [contribution guide](https://github.com/google/re2/wiki/Contribute) before sending changes.
|
||||
In particular, note that RE2 does not use GitHub pull requests.
|
||||
@@ -70,3 +70,11 @@ platform(
|
||||
"@platforms//os:windows",
|
||||
],
|
||||
)
|
||||
|
||||
platform(
|
||||
name = "arm64_windows",
|
||||
constraint_values = [
|
||||
"@platforms//cpu:arm64",
|
||||
"@platforms//os:windows",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
../LICENSE
|
||||
+1
-1
@@ -64,7 +64,7 @@ def compile(pattern, options=None):
|
||||
if options:
|
||||
raise error('pattern is already compiled, so '
|
||||
'options may not be specified')
|
||||
pattern = pattern._pattern
|
||||
return pattern
|
||||
options = options or Options()
|
||||
values = tuple(getattr(options, name) for name in Options.NAMES)
|
||||
return _Regexp._make(pattern, values)
|
||||
|
||||
@@ -315,6 +315,12 @@ class Re2RegexpTest(ReRegexpTest):
|
||||
re2.purge()
|
||||
self.assertEqual(re2._Regexp._make.cache_info().currsize, 0)
|
||||
|
||||
def test_options(self):
|
||||
opt = re2.Options()
|
||||
opt.case_sensitive = False
|
||||
r = re2.compile('test', opt)
|
||||
self.assertIsNotNone(r.search('TEST'))
|
||||
self.assertIsNotNone(re2.search(r, 'TEST'))
|
||||
|
||||
class Re2EscapeTest(parameterized.TestCase):
|
||||
"""Contains tests that apply to the re2 module only.
|
||||
|
||||
+3
-3
@@ -129,7 +129,7 @@ try:
|
||||
|
||||
setuptools.setup(
|
||||
name='google-re2',
|
||||
version='1.1.20240702',
|
||||
version='1.1.20250812',
|
||||
description='RE2 Python bindings',
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/plain',
|
||||
@@ -144,11 +144,11 @@ try:
|
||||
'Intended Audience :: Developers',
|
||||
'License :: OSI Approved :: BSD License',
|
||||
'Programming Language :: C++',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
],
|
||||
options=options(),
|
||||
cmdclass={'build_ext': BuildExt},
|
||||
python_requires='~=3.8',
|
||||
python_requires='~=3.9',
|
||||
)
|
||||
except:
|
||||
raise
|
||||
|
||||
+3
-4
@@ -6,7 +6,6 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
|
||||
namespace re2 {
|
||||
@@ -27,15 +26,15 @@ int Bitmap256::FindNextSetBit(int c) const {
|
||||
case 1:
|
||||
if (words_[1] != 0)
|
||||
return (1 * 64) + FindLSBSet(words_[1]);
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case 2:
|
||||
if (words_[2] != 0)
|
||||
return (2 * 64) + FindLSBSet(words_[2]);
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case 3:
|
||||
if (words_[3] != 0)
|
||||
return (3 * 64) + FindLSBSet(words_[3]);
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
+1
-2
@@ -34,7 +34,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/base/thread_annotations.h"
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
@@ -650,7 +649,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
|
||||
absl::FPrintF(stderr, " -> FullMatchState\n");
|
||||
return FullMatchState;
|
||||
}
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
default:
|
||||
// Record iff id is the head of its list, which must
|
||||
// be the case if id-1 is the last of *its* list. :)
|
||||
|
||||
+3
-4
@@ -24,7 +24,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/ascii.h"
|
||||
@@ -304,7 +303,7 @@ Rune ApplyFold(const CaseFold* f, Rune r) {
|
||||
case EvenOddSkip: // even <-> odd but only applies to every other
|
||||
if ((r - f->lo) % 2)
|
||||
return r;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case EvenOdd: // even <-> odd
|
||||
if (r%2 == 0)
|
||||
return r + 1;
|
||||
@@ -313,7 +312,7 @@ Rune ApplyFold(const CaseFold* f, Rune r) {
|
||||
case OddEvenSkip: // odd <-> even but only applies to every other
|
||||
if ((r - f->lo) % 2)
|
||||
return r;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case OddEven: // odd <-> even
|
||||
if (r%2 == 1)
|
||||
return r + 1;
|
||||
@@ -1525,7 +1524,7 @@ static bool ParseEscape(absl::string_view* s, Rune* rp,
|
||||
// Single non-zero octal digit is a backreference; not supported.
|
||||
if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
|
||||
goto BadEscape;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
case '0':
|
||||
// consume up to three octal digits; already have one.
|
||||
code = c - '0';
|
||||
|
||||
+1
-2
@@ -15,7 +15,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/attributes.h"
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
@@ -820,7 +819,7 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
|
||||
flat->back().set_opcode(kInstAltMatch);
|
||||
flat->back().set_out(static_cast<int>(flat->size()));
|
||||
flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
|
||||
case kInstAlt:
|
||||
stk->push_back(ip->out1());
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
@@ -143,6 +143,11 @@ static std::string trunc(absl::string_view pattern) {
|
||||
|
||||
|
||||
RE2::RE2(const char* pattern) {
|
||||
// If absl::string_view becomes an alias for std::string_view,
|
||||
// it will stop allowing NULL to be converted.
|
||||
// Handle NULL explicitly to keep callers working no matter what.
|
||||
if (pattern == NULL)
|
||||
pattern = "";
|
||||
Init(pattern, DefaultOptions);
|
||||
}
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@
|
||||
// ABSL_CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
|
||||
//
|
||||
// Example: extracts "ruby" into "s" and no value into "i"
|
||||
// absl::optional<int> i;
|
||||
// std::optional<int> i;
|
||||
// std::string s;
|
||||
// ABSL_CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i));
|
||||
//
|
||||
@@ -213,13 +213,13 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "re2/stringpiece.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
@@ -385,7 +385,7 @@ class RE2 {
|
||||
// type, or one of:
|
||||
// std::string (matched piece is copied to string)
|
||||
// absl::string_view (string_view is mutated to point to matched piece)
|
||||
// absl::optional<T> (T is a supported numeric or string type as above)
|
||||
// std::optional<T> (T is a supported numeric or string type as above)
|
||||
// T ("bool T::ParseFrom(const char*, size_t)" must exist)
|
||||
// (void*)NULL (the corresponding matched sub-pattern is not copied)
|
||||
//
|
||||
@@ -406,7 +406,7 @@ class RE2 {
|
||||
// int number;
|
||||
// RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
|
||||
//
|
||||
// Use absl::optional<int> instead to handle this case correctly.
|
||||
// Use std::optional<int> instead to handle this case correctly.
|
||||
template <typename... A>
|
||||
static bool FullMatch(absl::string_view text, const RE2& re, A&&... a) {
|
||||
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
|
||||
@@ -842,12 +842,12 @@ template <> struct Parse4ary<unsigned long long> : public std::true_type {};
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, T* dest, int radix);
|
||||
|
||||
// Support absl::optional<T> for all T with a stock parser.
|
||||
template <typename T> struct Parse3ary<absl::optional<T>> : public Parse3ary<T> {};
|
||||
template <typename T> struct Parse4ary<absl::optional<T>> : public Parse4ary<T> {};
|
||||
// Support std::optional<T> for all T with a stock parser.
|
||||
template <typename T> struct Parse3ary<std::optional<T>> : public Parse3ary<T> {};
|
||||
template <typename T> struct Parse4ary<std::optional<T>> : public Parse4ary<T> {};
|
||||
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, absl::optional<T>* dest) {
|
||||
bool Parse(const char* str, size_t n, std::optional<T>* dest) {
|
||||
if (str == NULL) {
|
||||
if (dest != NULL)
|
||||
dest->reset();
|
||||
@@ -863,7 +863,7 @@ bool Parse(const char* str, size_t n, absl::optional<T>* dest) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Parse(const char* str, size_t n, absl::optional<T>* dest, int radix) {
|
||||
bool Parse(const char* str, size_t n, std::optional<T>* dest, int radix) {
|
||||
if (str == NULL) {
|
||||
if (dest != NULL)
|
||||
dest->reset();
|
||||
|
||||
@@ -55,6 +55,12 @@ RE2::Set& RE2::Set::operator=(Set&& other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
int RE2::Set::Size() const {
|
||||
if (!compiled_)
|
||||
return static_cast<int>(elem_.size());
|
||||
return size_;
|
||||
}
|
||||
|
||||
int RE2::Set::Add(absl::string_view pattern, std::string* error) {
|
||||
if (compiled_) {
|
||||
ABSL_LOG(DFATAL) << "RE2::Set::Add() called after compiling";
|
||||
|
||||
@@ -53,6 +53,10 @@ class RE2::Set {
|
||||
// the error message from the parser.
|
||||
int Add(absl::string_view pattern, std::string* error);
|
||||
|
||||
// Returns the number of patterns in the set.
|
||||
// Can be called before or after Compile().
|
||||
int Size() const;
|
||||
|
||||
// Compiles the set in preparation for matching.
|
||||
// Returns false if the compiler runs out of memory.
|
||||
// Add() must not be called again after Compile().
|
||||
@@ -62,6 +66,7 @@ class RE2::Set {
|
||||
// Returns true if text matches at least one of the regexps in the set.
|
||||
// Fills v (if not NULL) with the indices of the matching regexps.
|
||||
// Callers must not expect v to be sorted.
|
||||
// The indices are in the half-open interval [0, Size()).
|
||||
bool Match(absl::string_view text, std::vector<int>* v) const;
|
||||
|
||||
// As above, but populates error_info (if not NULL) when none of the regexps
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <optional>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "re2/re2.h"
|
||||
|
||||
@@ -158,7 +158,7 @@ TEST(RE2ArgTest, ParseFromTest) {
|
||||
}
|
||||
|
||||
TEST(RE2ArgTest, OptionalDoubleTest) {
|
||||
absl::optional<double> opt;
|
||||
std::optional<double> opt;
|
||||
RE2::Arg arg(&opt);
|
||||
EXPECT_TRUE(arg.Parse(NULL, 0));
|
||||
EXPECT_FALSE(opt.has_value());
|
||||
@@ -169,7 +169,7 @@ TEST(RE2ArgTest, OptionalDoubleTest) {
|
||||
}
|
||||
|
||||
TEST(RE2ArgTest, OptionalIntWithCRadixTest) {
|
||||
absl::optional<int> opt;
|
||||
std::optional<int> opt;
|
||||
RE2::Arg arg = RE2::CRadix(&opt);
|
||||
EXPECT_TRUE(arg.Parse(NULL, 0));
|
||||
EXPECT_FALSE(opt.has_value());
|
||||
|
||||
@@ -1688,4 +1688,12 @@ TEST(RE2, Issue477) {
|
||||
ASSERT_EQ(s, "\x61\x63");
|
||||
}
|
||||
|
||||
TEST(RE2, InitNULL) {
|
||||
// RE2::RE2 accepts NULL. Make sure it keeps doing that.
|
||||
RE2 re(NULL);
|
||||
ASSERT_TRUE(re.ok());
|
||||
ASSERT_TRUE(RE2::FullMatch("", re));
|
||||
ASSERT_TRUE(RE2::FullMatch("", NULL));
|
||||
}
|
||||
|
||||
} // namespace re2
|
||||
|
||||
@@ -17,10 +17,15 @@ namespace re2 {
|
||||
TEST(Set, Unanchored) {
|
||||
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
|
||||
|
||||
ASSERT_EQ(s.Size(), 0);
|
||||
ASSERT_EQ(s.Add("foo", NULL), 0);
|
||||
ASSERT_EQ(s.Size(), 1);
|
||||
ASSERT_EQ(s.Add("(", NULL), -1);
|
||||
ASSERT_EQ(s.Size(), 1);
|
||||
ASSERT_EQ(s.Add("bar", NULL), 1);
|
||||
ASSERT_EQ(s.Size(), 2);
|
||||
ASSERT_EQ(s.Compile(), true);
|
||||
ASSERT_EQ(s.Size(), 2);
|
||||
|
||||
ASSERT_EQ(s.Match("foobar", NULL), true);
|
||||
ASSERT_EQ(s.Match("fooba", NULL), true);
|
||||
|
||||
+1
-1
@@ -192,7 +192,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
|
||||
s->child_args = &s->child_arg;
|
||||
else if (re->nsub_ > 1)
|
||||
s->child_args = new T[re->nsub_];
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
[[fallthrough]];
|
||||
}
|
||||
default: {
|
||||
if (re->nsub_ > 0) {
|
||||
|
||||
Reference in New Issue
Block a user