mirror of
https://github.com/openharmony/third_party_pcre2.git
synced 2026-07-01 10:05:26 -04:00
feat: update pcre2 to version of 10.46
Signed-off-by: Haryslee <lihao189@huawei.com>
This commit is contained in:
@@ -13,7 +13,7 @@ action("copy_generic_files") {
|
||||
inputs = [
|
||||
"$PCRE2_LIB_DIR/src/config.h.generic",
|
||||
"$PCRE2_LIB_DIR/src/pcre2.h.generic",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_chartables.c",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_chartables.c.dist",
|
||||
]
|
||||
outputs = [
|
||||
"${target_gen_dir}/src/pcre2_chartables.c",
|
||||
@@ -51,6 +51,7 @@ pcre2_sources = [
|
||||
"$PCRE2_LIB_DIR/src/pcre2_auto_possess.c",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_chkdint.c",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_compile.c",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_compile_class.c",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_config.c",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_context.c",
|
||||
"$PCRE2_LIB_DIR/src/pcre2_convert.c",
|
||||
@@ -97,7 +98,7 @@ ohos_shared_library("libpcre2") {
|
||||
"ramdisk",
|
||||
"updater",
|
||||
]
|
||||
license_file = "$PCRE2_LIB_DIR/LICENCE"
|
||||
license_file = "$PCRE2_LIB_DIR/LICENCE.md"
|
||||
innerapi_tags = [
|
||||
"platformsdk_indirect",
|
||||
"chipsetsdk_sp_indirect",
|
||||
@@ -119,7 +120,7 @@ ohos_static_library("libpcre2_static") {
|
||||
"-DPCRE2_CODE_UNIT_WIDTH=8",
|
||||
"-w",
|
||||
]
|
||||
license_file = "$PCRE2_LIB_DIR/LICENCE"
|
||||
license_file = "$PCRE2_LIB_DIR/LICENCE.md"
|
||||
part_name = "pcre2"
|
||||
subsystem_name = "thirdparty"
|
||||
}
|
||||
@@ -139,7 +140,7 @@ ohos_static_library("libpcre2_static_16") {
|
||||
"-DPCRE2_CODE_UNIT_WIDTH=16",
|
||||
"-w",
|
||||
]
|
||||
license_file = "$PCRE2_LIB_DIR/LICENCE"
|
||||
license_file = "$PCRE2_LIB_DIR/LICENCE.md"
|
||||
part_name = "pcre2"
|
||||
subsystem_name = "thirdparty"
|
||||
}
|
||||
|
||||
@@ -112,6 +112,8 @@
|
||||
<filteritem type="filepath" name="pcre2/maint/pcre2_chartables.c.non-standard" desc="InvalidCopyright"/>
|
||||
<filteritem type="filepath" name="pcre2/autogen.sh" desc="InvalidCopyright"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/utf8.c" desc="InvalidCopyright"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/CleanTxt" desc="InvalidCopyright"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/132html" desc="InvalidCopyright"/>
|
||||
<filteritem type="filepath" name="pcre2/pcre2_fuzzer.dict" desc="InvalidCopyright"/>
|
||||
<filteritem type="filepath" name="pcre2/pcre2_fuzzer.options" desc="InvalidCopyright"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/ucptestdata/testinput1" desc="InvalidCopyright"/>
|
||||
@@ -123,9 +125,27 @@
|
||||
<filteritem type="filepath" name="pcre2/WORKSPACE.bazel" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/BUILD.bazel" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/.bazelrc" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/.gitmodules" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="BUILD.gn" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/pcre2_fuzzer_32.dict" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/GenerateUcpTables.py" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/UpdateDates.py" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/GenerateTest.py" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/CheckMan" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/RunManifestTest" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/RunPerlTest" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/CheckTxt" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/PrepareRelease" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/manifest-cmakeinstall-windows" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/manifest-cmakeinstall-linux" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/UpdateRelease.py" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/Detrail" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/manifest-cmakeinstall-macos" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/manifest-makeinstall-freebsd" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/UpdateCommon.py" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/manifest-makeinstall-linux" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/RunManifestTest.ps1" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/maint/manifest-tarball" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/pcre2_fuzzer_32.options" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="copy_generic_files.sh" desc="Copyright Header Invalid: NULL"/>
|
||||
<filteritem type="filepath" name="pcre2/build.zig" desc="Copyright Header Invalid: NULL"/>
|
||||
@@ -172,6 +192,9 @@
|
||||
<filteritem type="filepath" name="pcre2/testdata/testoutput10" desc="already checked"/>
|
||||
<filteritem type="filepath" name="pcre2/testdata/testbtables" desc="already checked"/>
|
||||
<filteritem type="filepath" name="pcre2/testdata/testinput10" desc="already checked"/>
|
||||
<filteritem type="filepath" name="pcre2/testdata/grepoutputUN" desc="already checked"/>
|
||||
<filteritem type="filepath" name="pcre2/testdata/grepinputBad8" desc="already checked"/>
|
||||
<filteritem type="filepath" name="pcre2/testdata/grepinputBad8_Trail" desc="already checked"/>
|
||||
</filefilter>
|
||||
</filefilterlist>
|
||||
|
||||
|
||||
+3
-3
@@ -1,9 +1,9 @@
|
||||
[
|
||||
{
|
||||
"Name": "PCRE2",
|
||||
"License": "BSD 3-Clause License WITH pcre2 exception",
|
||||
"License File": "pcre2/LICENSE",
|
||||
"Version Number": "pcre2-10.44",
|
||||
"License": "BSD-3-Clause WITH PCRE2-exception",
|
||||
"License File": "pcre2/LICENSE.md",
|
||||
"Version Number": "pcre2-10.46",
|
||||
"Owner": "maliang34@huawei.com",
|
||||
"Upstream URL": "https://github.com/PhilipHazel/pcre2.git",
|
||||
"Description": "pcre2 is a re_working of the original PCRE1 library to provide an entirely new API."
|
||||
|
||||
@@ -12,7 +12,7 @@ function check_md5_and_copy() {
|
||||
fi
|
||||
}
|
||||
|
||||
mkdir -P $pcre2_lib_dir/src
|
||||
mkdir -P $pcre2_gen_dir/src
|
||||
check_md5_and_copy $pcre2_lib_dir/src/config.h.generic $pcre2_gen_dir/src/config.h
|
||||
check_md5_and_copy $pcre2_lib_dir/src/pcre2.h.generic $pcre2_gen_dir/src/pcre2.h
|
||||
check_md5_and_copy $pcre2_lib_dir/src/pcre2_chartables.c.dist $pcre2_gen_dir/src/pcre2_chartables.c
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
common --experimental_enable_bzlmod
|
||||
build --incompatible_enable_cc_toolchain_resolution
|
||||
build --incompatible_strict_action_env
|
||||
@@ -0,0 +1,2 @@
|
||||
testdata/* -text
|
||||
maint/manifest-* -text
|
||||
Vendored
+49
@@ -0,0 +1,49 @@
|
||||
codecov:
|
||||
strict_yaml_branch: default
|
||||
require_ci_to_pass: false
|
||||
notify:
|
||||
wait_for_ci: false
|
||||
notify_error: true
|
||||
|
||||
coverage:
|
||||
range: 75..90
|
||||
round: nearest
|
||||
precision: 2
|
||||
|
||||
status:
|
||||
project: false
|
||||
|
||||
patch:
|
||||
default:
|
||||
target: 100%
|
||||
threshold: 5%
|
||||
|
||||
github_checks:
|
||||
annotations: false
|
||||
|
||||
comment: false
|
||||
# layout: "condensed_header, condensed_files, condensed_footer"
|
||||
# hide_project_coverage: true
|
||||
# require_head: true
|
||||
# require_base: true
|
||||
# require_changes: "coverage_drop OR uncovered_patch"
|
||||
|
||||
component_management:
|
||||
individual_components:
|
||||
- component_id: library
|
||||
name: "Core library"
|
||||
paths:
|
||||
- '!src/(pcre2test|pcre2grep|pcre2_jit_test|pcre2posix_test|pcre2_printint)\.c'
|
||||
statuses:
|
||||
- type: project
|
||||
target: auto
|
||||
threshold: 0.5%
|
||||
|
||||
- component_id: test_binaries
|
||||
name: "Test binaries"
|
||||
paths:
|
||||
- 'src/(pcre2test|pcre2grep|pcre2_jit_test|pcre2posix_test|pcre2_printint)\.c'
|
||||
statuses:
|
||||
- type: project
|
||||
target: auto
|
||||
threshold: 2%
|
||||
Vendored
+6
@@ -0,0 +1,6 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: github-actions
|
||||
directory: /
|
||||
schedule:
|
||||
interval: monthly
|
||||
Vendored
+281
-50
@@ -1,23 +1,37 @@
|
||||
|
||||
name: Build
|
||||
on: [push, pull_request]
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ master, "release/**" ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
name: Linux
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install zlib1g-dev libbz2-dev
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Prepare
|
||||
run: ./autogen.sh
|
||||
|
||||
- name: Configure
|
||||
run: ./configure CPPFLAGS='-Wall -Wextra' --enable-jit --enable-pcre2-16 --enable-pcre2-32
|
||||
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-pcre2grep-libz --enable-pcre2grep-libbz2
|
||||
|
||||
- name: Build
|
||||
run: make -j2
|
||||
run: make -j3 CPPFLAGS='-Wall -Wextra -Werror'
|
||||
|
||||
- name: Test (main test script)
|
||||
run: ./RunTest
|
||||
@@ -31,25 +45,32 @@ jobs:
|
||||
- name: Test (pcre2posix program)
|
||||
run: ./pcre2posix_test -v
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
make install "DESTDIR=`pwd`/install-dir"
|
||||
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
|
||||
|
||||
alpine:
|
||||
name: alpine
|
||||
runs-on: ubuntu-latest
|
||||
container: alpine
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup
|
||||
run: apk add --no-cache automake autoconf gcc libtool make musl-dev #musl-locales
|
||||
run: apk add --no-cache automake autoconf gcc libtool make musl-dev git zlib zlib-dev bzip2 bzip2-dev #musl-locales
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Prepare
|
||||
run: ./autogen.sh
|
||||
|
||||
- name: Configure
|
||||
run: ./configure CPPFLAGS='-Wall -Wextra' --enable-jit --enable-pcre2-16 --enable-pcre2-32
|
||||
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-pcre2grep-libz --enable-pcre2grep-libbz2
|
||||
|
||||
- name: Build
|
||||
run: make -j2
|
||||
run: make -j3 CPPFLAGS='-Wall -Wextra -Werror'
|
||||
|
||||
- name: Test (main test script)
|
||||
run: ./RunTest
|
||||
@@ -63,54 +84,264 @@ jobs:
|
||||
- name: Test (pcre2posix program)
|
||||
run: ./pcre2posix_test -v
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
make install "DESTDIR=`pwd`/install-dir"
|
||||
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
|
||||
|
||||
macos:
|
||||
name: macOS universal
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_C_FLAGS='-Wall -Wextra' -B build
|
||||
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_SUPPORT_LIBZ=ON -DPCRE2_SUPPORT_LIBBZ2=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_C_FLAGS='-Wall -Wextra' -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
|
||||
|
||||
- name: Build
|
||||
run: cmake --build build
|
||||
|
||||
- name: Test (main test script)
|
||||
run: |
|
||||
cd build
|
||||
../RunTest
|
||||
|
||||
- name: Test (JIT test program)
|
||||
run: |
|
||||
cd build
|
||||
./pcre2_jit_test
|
||||
|
||||
- name: Test (pcre2grep test script)
|
||||
run: |
|
||||
cd build
|
||||
../RunGrepTest
|
||||
|
||||
- name: Test (pcre2posix program)
|
||||
run: |
|
||||
cd build
|
||||
./pcre2posix_test -v
|
||||
|
||||
windows:
|
||||
name: 32bit Windows
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure
|
||||
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DCMAKE_IGNORE_PREFIX_PATH=C:/Strawberry/c -B build -A Win32
|
||||
|
||||
- name: Build
|
||||
run: cmake --build build
|
||||
run: cd build && make -j3
|
||||
|
||||
- name: Test
|
||||
run: cd build && ctest -j3 --output-on-failure
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
cd build\Debug
|
||||
..\..\RunTest.bat
|
||||
./pcre2posix_test -v
|
||||
cd build
|
||||
cmake --install . --prefix install-dir
|
||||
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-macos
|
||||
|
||||
windows:
|
||||
name: Windows
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: ["Win32", "x64"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A ${{ matrix.arch }}
|
||||
|
||||
- name: Build
|
||||
run: cmake --build build --config Release
|
||||
|
||||
- name: Test
|
||||
run: cd build && ctest -C Release -j3 --output-on-failure
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
cd build
|
||||
cmake --install . --config Release --prefix install-dir
|
||||
../maint/RunManifestTest.ps1 install-dir ../maint/manifest-cmakeinstall-windows
|
||||
|
||||
freebsd:
|
||||
name: FreeBSD
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Prepare
|
||||
run: ./autogen.sh
|
||||
|
||||
- name: Build & test
|
||||
uses: vmactions/freebsd-vm@debf37ca7b7fa40e19c542ef7ba30d6054a706a4 # v1.1.5
|
||||
with:
|
||||
usesh: true
|
||||
run: |
|
||||
set -e
|
||||
./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32
|
||||
make -j3 CPPFLAGS='-Wall -Wextra -Werror'
|
||||
make check
|
||||
|
||||
make install "DESTDIR=`pwd`/install-dir"
|
||||
maint/RunManifestTest install-dir maint/manifest-makeinstall-freebsd
|
||||
|
||||
solaris:
|
||||
name: Solaris
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Transfer Oracle Studio certificates
|
||||
env:
|
||||
PKG_ORACLE_COM_CERTIFICATE_PEM: ${{ secrets.PKG_ORACLE_COM_CERTIFICATE_PEM }}
|
||||
PKG_ORACLE_COM_KEY_PEM: ${{ secrets.PKG_ORACLE_COM_KEY_PEM }}
|
||||
run: |
|
||||
printenv PKG_ORACLE_COM_CERTIFICATE_PEM > pkg.oracle.com.certificate.pem
|
||||
printenv PKG_ORACLE_COM_KEY_PEM > pkg.oracle.com.key.pem
|
||||
|
||||
- name: Prepare
|
||||
run: ./autogen.sh
|
||||
|
||||
- name: Build & test
|
||||
uses: vmactions/solaris-vm@a89b9438868c70db27e41625f0a5de6ff5e90809 # v1.1.0
|
||||
with:
|
||||
usesh: true
|
||||
# Seriously! Solaris is the only OS to actually ship without a C
|
||||
# compiler, and not even to provide a simple download to get one!
|
||||
# You have to actually register with Oracle to get an X.509
|
||||
# certificate before you can even download their compiler. Whatever.
|
||||
prepare: |
|
||||
cp "$GITHUB_WORKSPACE/pkg.oracle.com.key.pem" /root/pkg.oracle.com.key.pem
|
||||
cp "$GITHUB_WORKSPACE/pkg.oracle.com.certificate.pem" /root/pkg.oracle.com.certificate.pem
|
||||
sudo pkg set-publisher \
|
||||
-k /root/pkg.oracle.com.key.pem \
|
||||
-c /root/pkg.oracle.com.certificate.pem \
|
||||
-G "*" -g https://pkg.oracle.com/solarisstudio/release solarisstudio
|
||||
pkg install developer/build/make system/header
|
||||
pkg install --accept developerstudio-126/cc
|
||||
|
||||
run: |
|
||||
set -e
|
||||
PATH=/opt/developerstudio12.6/bin:"$PATH"
|
||||
export PATH
|
||||
CC=cc
|
||||
export CC
|
||||
|
||||
./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32
|
||||
make CPPFLAGS='-Wall -Wextra -Werror'
|
||||
make check
|
||||
|
||||
make install "DESTDIR=`pwd`/install-dir"
|
||||
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
|
||||
|
||||
distcheck:
|
||||
name: Build & verify distribution
|
||||
runs-on: ubuntu-24.04 # TODO: Update to ubuntu-latest when that switches to 24.04
|
||||
permissions:
|
||||
id-token: write # Needed to make calls to the Sigstore service
|
||||
attestations: write # Needed to write the attestation to GitHub's database
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
./autogen.sh
|
||||
|
||||
# Workaround for incorrect filesystem permissions on /usr/share/aclocal, which
|
||||
# causes the m4 macros to be copied with incorrect permissions.
|
||||
chmod u=rw,go=r m4/*.m4
|
||||
|
||||
- name: Configure
|
||||
run: ./configure
|
||||
|
||||
- name: Distcheck
|
||||
run: make distcheck -j3
|
||||
|
||||
- name: Manifest
|
||||
run: |
|
||||
mkdir tarball-dir
|
||||
tar -C tarball-dir -xzf pcre2-*.tar.gz
|
||||
# Budge the directory, so we don't back the version number into the
|
||||
# `manifest-tarball` file:
|
||||
mv tarball-dir/pcre2-* tarball-dir/pcre2-SNAPSHOT
|
||||
maint/RunManifestTest tarball-dir maint/manifest-tarball
|
||||
|
||||
- name: Upload to GitHub artifacts
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
|
||||
with:
|
||||
name: "Distribution release"
|
||||
path: |
|
||||
pcre2-*.tar.bz2
|
||||
pcre2-*.tar.gz
|
||||
pcre2-*.zip
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Attest
|
||||
uses: actions/attest-build-provenance@7668571508540a607bdfd90a87a560489fe372eb # v2.1.0
|
||||
if: |
|
||||
github.event_name != 'pull_request' &&
|
||||
(startsWith(github.ref, 'refs/heads/release/') ||
|
||||
startsWith(github.ref, 'refs/tags/pcre2-'))
|
||||
with:
|
||||
subject-path: 'pcre2-*.tar.bz2, pcre2-*.tar.gz, pcre2-*.zip'
|
||||
|
||||
coverage:
|
||||
name: Code coverage
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install zlib1g-dev libbz2-dev libedit-dev
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: CC="clang -fprofile-instr-generate -fcoverage-mapping" cmake -DCMAKE_BUILD_TYPE=Debug -DPCRE2_DEBUG=OFF -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_SUPPORT_LIBZ=ON -DPCRE2_SUPPORT_LIBBZ2=ON -DPCRE2_SUPPORT_LIBEDIT=ON -DPCRE2_SUPPORT_LIBREADLINE=OFF -B build
|
||||
|
||||
- name: Build
|
||||
run: cd build && make -j3
|
||||
|
||||
- name: Test
|
||||
run: cd build && LLVM_PROFILE_FILE="coverage-%m.profraw" ctest -j1 --output-on-failure
|
||||
|
||||
- name: Report
|
||||
run: |
|
||||
LLVM_VER=`clang --version | head -n1 | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f1`
|
||||
echo "Using LLVM version $LLVM_VER"
|
||||
|
||||
# Merge the profiles gathered
|
||||
cd build
|
||||
llvm-profdata-$LLVM_VER merge -sparse coverage-*.profraw -o coverage.profdata
|
||||
|
||||
# Output HTML, for archiving and browsing later
|
||||
llvm-cov-$LLVM_VER show \
|
||||
-format=html -output-dir=coverage-report -show-line-counts-or-regions -show-branches=percent \
|
||||
-instr-profile=coverage.profdata \
|
||||
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
|
||||
../src/ ./
|
||||
|
||||
# Output LCOV-compatible output, for downstream tools
|
||||
llvm-cov-$LLVM_VER export \
|
||||
-format=lcov \
|
||||
-instr-profile=coverage.profdata \
|
||||
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
|
||||
../src/ ./ \
|
||||
> ./coverage-lcov.info
|
||||
|
||||
# Output text summary to build log
|
||||
echo '```' > "$GITHUB_STEP_SUMMARY"
|
||||
llvm-cov-$LLVM_VER report \
|
||||
-instr-profile=coverage.profdata \
|
||||
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
|
||||
../src/ ./ \
|
||||
>> "$GITHUB_STEP_SUMMARY"
|
||||
echo '```' >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload report to GitHub artifacts
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
|
||||
with:
|
||||
name: "Coverage report"
|
||||
path: './build/coverage-report'
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload report to Codecov
|
||||
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
fail_ci_if_error: true
|
||||
disable_search: true
|
||||
files: ./build/coverage-lcov.info
|
||||
|
||||
Vendored
+11
-4
@@ -1,23 +1,30 @@
|
||||
name: CIFuzz
|
||||
on: [pull_request]
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
Fuzzing:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Build Fuzzers
|
||||
id: build
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@3d38acd485bc848e33396e7523b9a4f2aff9027e # master
|
||||
with:
|
||||
oss-fuzz-project-name: 'pcre2'
|
||||
dry-run: false
|
||||
- name: Run Fuzzers
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@3d38acd485bc848e33396e7523b9a4f2aff9027e # master
|
||||
with:
|
||||
oss-fuzz-project-name: 'pcre2'
|
||||
fuzz-seconds: 300
|
||||
dry-run: false
|
||||
- name: Upload Crash
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
|
||||
if: failure() && steps.build.outcome == 'success'
|
||||
with:
|
||||
name: artifacts
|
||||
|
||||
+71
@@ -0,0 +1,71 @@
|
||||
name: Clang Static Analyzer
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ master, "release/**" ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
Analyze:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
# Needed to upload the results to code-scanning dashboard.
|
||||
security-events: write
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# The @microsoft/sarif-multitool tool actually uses DotnetCore, which in
|
||||
# turn aborts when it finds that GitHub's CI machine doesn't have ICU.
|
||||
# Just turn off localisation. A future version of the ubuntu-24.04 or
|
||||
# ubuntu-latest runners might not need this workaround.
|
||||
DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1
|
||||
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install ninja-build clang-tools
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
scan-build cmake -G Ninja -DPCRE2_SUPPORT_JIT=ON -DCMAKE_BUILD_TYPE=Debug ..
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
# Inefficiently run clang scan twice; once to generate HTML, and secondly
|
||||
# to generate SARIF files. Ideally we would have some way to scan once and
|
||||
# generate one of those outputs from the other, but I don't know a good way
|
||||
# to do that.
|
||||
cd build
|
||||
scan-build -o clang-report/ ninja
|
||||
|
||||
ninja clean
|
||||
scan-build -o clang-sarif -sarif ninja
|
||||
# Work around issue in GitHub's SARIF ingestion - merge all SARIF files into one
|
||||
npx -y @microsoft/sarif-multitool merge clang-sarif/*/*.sarif --output-file=clang.sarif
|
||||
|
||||
# Upload the browsable HTML report as an artifact.
|
||||
- name: Upload report
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
|
||||
with:
|
||||
name: "Clang Static Analyzer report"
|
||||
path: './build/clang-report'
|
||||
|
||||
# Upload the results to GitHub's code scanning dashboard.
|
||||
- name: "Upload to code-scanning"
|
||||
uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
|
||||
with:
|
||||
sarif_file: build/clang.sarif
|
||||
category: clang-analyzer
|
||||
Vendored
+13
-9
@@ -13,39 +13,43 @@ name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
branches: [ master, "release/**" ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ master ]
|
||||
schedule:
|
||||
- cron: '27 6 * * 4'
|
||||
|
||||
# Declare default permissions as read only.
|
||||
permissions: read-all
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
# Needed to upload the results to code-scanning dashboard.
|
||||
security-events: write
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'cpp', 'python' ]
|
||||
language: [ 'cpp' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -56,7 +60,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
uses: github/codeql-action/autobuild@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@@ -70,4 +74,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
|
||||
|
||||
Vendored
+312
-15
@@ -1,24 +1,36 @@
|
||||
name: Dev
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ master, "release/**" ]
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
branches: [ master ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
|
||||
canary:
|
||||
name: gcc
|
||||
# Tests with: Debug & assertions; link-size=4; libedit
|
||||
name: GCC -O0
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install libedit-dev
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Prepare
|
||||
run: ./autogen.sh
|
||||
|
||||
- name: Configure
|
||||
run: ./configure CC='gcc -O0 -fsanitize=undefined,address -fsanitize-undefined-trap-on-error' CPPFLAGS='-Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --with-link-size=4
|
||||
run: ./configure CC='gcc -fsanitize=undefined,address -fsanitize-undefined-trap-on-error' CFLAGS='-O0 -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --enable-pcre2test-libedit --with-link-size=4
|
||||
|
||||
- name: Build
|
||||
run: make -j3
|
||||
@@ -36,23 +48,32 @@ jobs:
|
||||
run: ./pcre2posix_test -v
|
||||
|
||||
dragon:
|
||||
name: clang
|
||||
# Tests with: clang AB/UB; link-size=3
|
||||
name: Clang
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
opt: ["-O0", "-O2"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Prepare
|
||||
run: ./autogen.sh
|
||||
|
||||
- name: Configure
|
||||
run: ./configure CC='clang -fsanitize=undefined,address,integer -fno-sanitize=unsigned-integer-overflow' CPPFLAGS='-Wall -Wextra -Werror -Wno-error=unused-but-set-parameter -Wno-error=deprecated-declarations -Wno-error=incompatible-library-redeclaration' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --with-link-size=3
|
||||
run: ./configure CC='clang -fsanitize=undefined,address,integer -fno-sanitize-recover=undefined,integer -fno-sanitize=unsigned-integer-overflow,unsigned-shift-base,function' CFLAGS='${{ matrix.opt }} -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter -Wno-error=deprecated-declarations -Wno-error=incompatible-library-redeclaration -Wno-error=incompatible-pointer-types-discards-qualifiers' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --with-link-size=3
|
||||
|
||||
- name: Build
|
||||
run: make -j3
|
||||
|
||||
- name: Test (main test script)
|
||||
run: ./RunTest
|
||||
run: |
|
||||
ulimit -S -s 49152 # Raise stack limit; ASAN with -O0 is very stack-hungry
|
||||
./RunTest
|
||||
|
||||
- name: Test (JIT test program)
|
||||
run: ./pcre2_jit_test
|
||||
@@ -63,19 +84,295 @@ jobs:
|
||||
- name: Test (pcre2posix program)
|
||||
run: ./pcre2posix_test -v
|
||||
|
||||
greatawk:
|
||||
# Tests with: GCC, -O3, oldest supported Ubuntu (in non-extended support)
|
||||
name: GCC -O3
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
|
||||
|
||||
- name: Build
|
||||
run: cd build && make -j3
|
||||
|
||||
- name: Test
|
||||
run: cd build && ctest -j3 --output-on-failure
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
cd build
|
||||
cmake --install . --prefix install-dir
|
||||
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-linux
|
||||
|
||||
dodo:
|
||||
# Tests with: Autconf on oldest supported Ubuntu (in non-extended support)
|
||||
name: GCC -Os, old Autotools
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Prepare
|
||||
run: ./autogen.sh
|
||||
|
||||
- name: Configure
|
||||
run: ./configure CFLAGS='-Os -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug
|
||||
|
||||
- name: Build
|
||||
run: make -j3
|
||||
|
||||
- name: Test
|
||||
run: make check
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
make install "DESTDIR=`pwd`/install-dir"
|
||||
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
|
||||
|
||||
wasp:
|
||||
# Tests with: French locale; oldest supported CMake; no JIT; -Os; libreadline
|
||||
name: GCC -Os, CMake+ninja, no JIT
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
CMAKE_VER: "3.15.7"
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install language-pack-fr ninja-build libreadline-dev
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Cache CMake
|
||||
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
|
||||
with:
|
||||
key: cmake-${{ env.CMAKE_VER }}-Linux-x86_64
|
||||
path: cmake-${{ env.CMAKE_VER }}-Linux-x86_64.tar.gz
|
||||
|
||||
- name: Install CMake
|
||||
run: |
|
||||
[ -f cmake-${CMAKE_VER}-Linux-x86_64.tar.gz ] || curl -L -S -O "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.tar.gz"
|
||||
tar -xz -f cmake-${CMAKE_VER}-Linux-x86_64.tar.gz
|
||||
realpath "cmake-${CMAKE_VER}-Linux-x86_64/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
cmake --version | grep "version ${CMAKE_VER}" || (echo "CMake version mismatch" && exit 1)
|
||||
CC='clang' CFLAGS='-fsanitize=undefined,address,integer -fno-sanitize-recover=undefined,integer -fno-sanitize=unsigned-shift-base,function -pedantic -Wall -Wextra -Wpedantic -Wdeclaration-after-statement -Wshadow -Wno-overlength-strings -Werror -Wno-error=incompatible-pointer-types-discards-qualifiers' cmake -G Ninja -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DPCRE2_DEBUG=ON -DPCRE2_SUPPORT_LIBREADLINE=ON -DCMAKE_BUILD_TYPE=MinSizeRel -B build
|
||||
|
||||
- name: Build
|
||||
run: ninja -C build
|
||||
|
||||
- name: Test
|
||||
run: cd build && ctest -j3 --output-on-failure
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
cd build
|
||||
cmake --install . --prefix install-dir
|
||||
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-linux
|
||||
|
||||
bat:
|
||||
# Tests with: MSVC 32-bit, and a variety of CMake options
|
||||
name: Windows (Win32)
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2GREP_SUPPORT_CALLOUT_FORK=OFF -DPCRE2_DEBUG=ON -DPCRE2_NEWLINE=ANYCRLF -DPCRE2_STATIC_PIC=ON -DPCRE2_STATIC_RUNTIME=ON -DPCRE2_SUPPORT_BSR_ANYCRLF=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A Win32
|
||||
|
||||
- name: Build
|
||||
run: cmake --build build --config RelWithDebInfo
|
||||
|
||||
- name: Test
|
||||
run: cd build && ctest -C RelWithDebInfo -j3 --output-on-failure
|
||||
|
||||
pterodactyl:
|
||||
# Tests with: MSVC 64-bit, Debug, shared libraries
|
||||
name: Windows (x64)
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: cmake -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_DEBUG=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=OFF -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A x64
|
||||
|
||||
- name: Build
|
||||
run: cmake --build build --config Debug
|
||||
|
||||
- name: Test
|
||||
run: cd build && ctest -C Debug -j3 --output-on-failure
|
||||
|
||||
bigbird:
|
||||
# Job to execute ManyConfigTests
|
||||
name: manyconfig
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare
|
||||
- name: Setup
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y valgrind
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install -y valgrind
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Run
|
||||
run: |
|
||||
./autogen.sh
|
||||
./maint/ManyConfigTests
|
||||
|
||||
camel:
|
||||
# Job to execute RunPerlTest
|
||||
name: perl
|
||||
runs-on: ubuntu-latest
|
||||
container: perl:devel
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
apt-get -qq update
|
||||
apt-get -qq install cmake ninja-build
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: yes
|
||||
|
||||
- name: Configure
|
||||
run: cmake -G Ninja -B build -DPCRE2_BUILD_PCRE2_8=OFF -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_NEVER_BACKSLASH_C=ON -DPCRE2_DEBUG=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
|
||||
|
||||
- name: Build
|
||||
run: ninja -C build
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
cd build
|
||||
ctest -j3 --output-on-failure
|
||||
cd ..
|
||||
perl -v
|
||||
maint/RunPerlTest
|
||||
|
||||
chaffinch:
|
||||
# Job to verify that the CMake "unity" build (single-file / jumbo build) passes.
|
||||
# If this fails, it's usually because two different files define some file-static
|
||||
# functions or macros which collide.
|
||||
name: CMake unity build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Configure
|
||||
run: cmake -DCMAKE_UNITY_BUILD=ON -DCMAKE_UNITY_BUILD_BATCH_SIZE=0 -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
|
||||
|
||||
- name: Build
|
||||
run: cd build && make -j3
|
||||
|
||||
- name: Test
|
||||
run: cd build && ctest -j3 --output-on-failure
|
||||
|
||||
zebrilus:
|
||||
# Tests with: Zig compiler
|
||||
name: Zig
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request'
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
sudo snap install zig --classic --beta
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Build
|
||||
run: zig build
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
# Zig does something weird with the stack - it uses more space than the
|
||||
# equivalent plain C program.
|
||||
ulimit -S -s 16384
|
||||
srcdir=`pwd` pcre2test=`pwd`/zig-out/bin/pcre2test ./RunTest
|
||||
|
||||
bazel:
|
||||
# Tests with: Bazel build system
|
||||
name: Bazel
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: ["ubuntu-latest", "windows-latest"]
|
||||
runs-on: ${{ matrix.os }}
|
||||
if: github.event_name != 'pull_request'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Build
|
||||
run: bazelisk build //... --enable_runfiles --incompatible_strict_action_env
|
||||
|
||||
- name: Test
|
||||
run: bazelisk test //... --enable_runfiles --incompatible_strict_action_env --test_output=all
|
||||
|
||||
heron:
|
||||
# Job to verify that the tasks performed by PrepareRelease have been done. It is
|
||||
# the committer's responsibility (currently) to run PrepareRelease themselves when
|
||||
# making a PR, so that everything is kept in-sync.
|
||||
name: Check autogenerated file freshness
|
||||
runs-on: ubuntu-24.04 # TODO: Update to ubuntu-latest when that switches to 24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: false
|
||||
fetch-depth: 0
|
||||
fetch-tags: false
|
||||
|
||||
- name: PrepareRelease
|
||||
run: maint/PrepareRelease
|
||||
|
||||
- name: 'Rebuild *.h.generic'
|
||||
run: |
|
||||
./autogen.sh && ./configure
|
||||
rm -f src/*.generic
|
||||
make src/config.h.generic src/pcre2.h.generic
|
||||
|
||||
# Workaround for incorrect filesystem permissions on /usr/share/aclocal, which
|
||||
# causes the m4 macros to be copied with incorrect permissions.
|
||||
chmod u=rw,go=r m4/*.m4
|
||||
|
||||
- name: Working directory clean
|
||||
run: |
|
||||
if [ -n "`git status --porcelain`" ] ; then
|
||||
(
|
||||
echo "Dirty working tree! Affected files:"
|
||||
git status --porcelain || true
|
||||
echo ""
|
||||
echo "Diff:"
|
||||
git diff || true
|
||||
) >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
+11
-8
@@ -1,5 +1,6 @@
|
||||
name: Scorecards supply-chain security
|
||||
on:
|
||||
workflow_dispatch:
|
||||
# Only the default branch is supported.
|
||||
branch_protection_rule:
|
||||
schedule:
|
||||
@@ -7,33 +8,34 @@ on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
# Declare default permissions as read only.
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
analysis:
|
||||
name: Scorecards analysis
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
# Needed to upload the results to code-scanning dashboard.
|
||||
security-events: write
|
||||
# Needed to publish the results to Scorecard's service.
|
||||
id-token: write
|
||||
actions: read
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: "Checkout code"
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
persist-credentials: false
|
||||
|
||||
- name: "Run analysis"
|
||||
uses: ossf/scorecard-action@3e15ea8318eee9b333819ec77a36aca8d39df13e # tag=v1.1.1
|
||||
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # tag=v2.4.0
|
||||
with:
|
||||
results_file: results.sarif
|
||||
results_format: sarif
|
||||
# Read-only PAT token. To create it,
|
||||
# follow the steps in https://github.com/ossf/scorecard-action#pat-token-creation.
|
||||
repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
|
||||
# repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
# Publish the results to enable scorecard badges. For more details, see
|
||||
# https://github.com/ossf/scorecard-action#publishing-results.
|
||||
# For private repositories, `publish_results` will automatically be set to `false`,
|
||||
@@ -42,7 +44,7 @@ jobs:
|
||||
|
||||
# Upload the results as artifacts (optional).
|
||||
- name: "Upload artifact"
|
||||
uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2.3.1
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
|
||||
with:
|
||||
name: SARIF file
|
||||
path: results.sarif
|
||||
@@ -50,6 +52,7 @@ jobs:
|
||||
|
||||
# Upload the results to GitHub's code scanning dashboard.
|
||||
- name: "Upload to code-scanning"
|
||||
uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # v1.0.26
|
||||
uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
category: ossf-scorecard
|
||||
|
||||
+6
-5
@@ -1,6 +1,7 @@
|
||||
# Public .gitignore file for PCRE2
|
||||
|
||||
build/
|
||||
build-*/
|
||||
|
||||
*.a
|
||||
*.gcda
|
||||
@@ -17,7 +18,6 @@ __pycache__
|
||||
.deps
|
||||
.libs
|
||||
|
||||
INSTALL
|
||||
Makefile
|
||||
Makefile.in
|
||||
RunGrepTest.log
|
||||
@@ -74,6 +74,7 @@ testtemp1grep
|
||||
testtemp2
|
||||
testtemp2grep
|
||||
testtry
|
||||
testtry2
|
||||
testtrygrep
|
||||
testSinput
|
||||
testbtables
|
||||
@@ -86,19 +87,19 @@ m4/ltsugar.m4
|
||||
m4/ltversion.m4
|
||||
m4/lt~obsolete.m4
|
||||
|
||||
maint/ucptest
|
||||
maint/utf8
|
||||
|
||||
src/.deps
|
||||
src/.dirstamp
|
||||
src/config.h
|
||||
src/config.h.in
|
||||
src/pcre2.h
|
||||
src/pcre2_chartables.c
|
||||
src/stamp-h1
|
||||
|
||||
/bazel-*
|
||||
*.bazel.lock
|
||||
|
||||
zig-out/
|
||||
zig-cache/
|
||||
.zig-cache/
|
||||
|
||||
# End
|
||||
# End
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
[submodule "deps/sljit"]
|
||||
path = deps/sljit
|
||||
url = https://github.com/zherczeg/sljit.git
|
||||
@@ -1,36 +0,0 @@
|
||||
THE MAIN PCRE2 LIBRARY CODE
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
Retired from University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2024 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
||||
--------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
||||
@@ -0,0 +1,200 @@
|
||||
PCRE2 Authorship and Contributors
|
||||
=================================
|
||||
|
||||
COPYRIGHT
|
||||
---------
|
||||
|
||||
Please see the file [LICENCE](./LICENCE.md) in the PCRE2 distribution for
|
||||
copyright details.
|
||||
|
||||
|
||||
MAINTAINERS
|
||||
-----------
|
||||
|
||||
The PCRE and PCRE2 libraries were authored and maintained by Philip Hazel.
|
||||
|
||||
Since 2024, the contributors with administrator access to the project are now
|
||||
Nicholas Wilson and Zoltán Herczeg. See the file [SECURITY](./SECURITY.md) for
|
||||
GPG keys.
|
||||
|
||||
Both administrators are volunteers acting in a personal capacity.
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Role</th>
|
||||
<tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
Nicholas Wilson<br/>
|
||||
`nicholas@nicholaswilson.me.uk`<br/>
|
||||
Currently of Microsoft Research Cambridge, UK
|
||||
|
||||
</td>
|
||||
<td>
|
||||
|
||||
* General project administration & maintenance
|
||||
* Release management
|
||||
* Code maintenance
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
Zoltán Herczeg<br/>
|
||||
`hzmester@freemail.hu`<br/>
|
||||
Currently of the University of Szeged, Hungary
|
||||
|
||||
</td>
|
||||
<td>
|
||||
|
||||
* Code maintenance
|
||||
* Ownership of `sljit` and PCRE2's JIT
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
CONTRIBUTORS
|
||||
------------
|
||||
|
||||
Many others have participated and contributed to PCRE2 over its history.
|
||||
|
||||
The maintainers are grateful for all contributions and participation over the
|
||||
years. We apologise for any names we have forgotten.
|
||||
|
||||
We are especially grateful to Philip Hazel, creator of PCRE and PCRE2, and
|
||||
maintainer from 1997 to 2024.
|
||||
|
||||
All names listed alphabetically.
|
||||
|
||||
### Contributors to PCRE2
|
||||
|
||||
This list includes names up until the PCRE2 10.44 release. New names will be
|
||||
added from the Git history on each release.
|
||||
|
||||
Scott Bell
|
||||
Carlo Marcelo Arenas Belón
|
||||
Edward Betts
|
||||
Jan-Willem Blokland
|
||||
Ross Burton
|
||||
Dmitry Cherniachenko
|
||||
Alexey Chupahin
|
||||
Jessica Clarke
|
||||
Alejandro Colomar
|
||||
Jeremie Courreges-Anglas
|
||||
Addison Crump
|
||||
Alex Dowad
|
||||
Daniel Engberg
|
||||
Daniel Richard G
|
||||
David Gaussmann
|
||||
Andrey Gorbachev
|
||||
Jordan Griege
|
||||
Jason Hood
|
||||
Bumsu Hyeon
|
||||
Roy Ivy
|
||||
Martin Joerg
|
||||
Guillem Jover
|
||||
Ralf Junker
|
||||
Ayesh Karunaratne
|
||||
Michael Kaufmann
|
||||
Yunho Kim
|
||||
Joshua Kinard
|
||||
David Korczynski
|
||||
Uwe Korn
|
||||
Jonas Kvinge
|
||||
Kristian Larsson
|
||||
Kai Lu
|
||||
Behzod Mansurov
|
||||
B. Scott Michel
|
||||
Nathan Moinvaziri
|
||||
Mike Munday
|
||||
Marc Mutz
|
||||
Fabio Pagani
|
||||
Christian Persch
|
||||
Tristan Ross
|
||||
William A Rowe Jr
|
||||
David Seifert
|
||||
Yaakov Selkowitz
|
||||
Rich Siegel
|
||||
Karl Skomski
|
||||
Maciej Sroczyński
|
||||
Wolfgang Stöggl
|
||||
Thomas Tempelmann
|
||||
Greg Thain
|
||||
Lucas Trzesniewski
|
||||
Theodore Tsirpanis
|
||||
Matthew Vernon
|
||||
Rémi Verschelde
|
||||
Thomas Voss
|
||||
Ezekiel Warren
|
||||
Carl Weaver
|
||||
Chris Wilson
|
||||
Amin Yahyaabadi
|
||||
Joe Zhang
|
||||
|
||||
### Contributors to PCRE1
|
||||
|
||||
These people contributed either by sending patches or reporting serious issues.
|
||||
|
||||
Irfan Adilovic
|
||||
Alexander Barkov
|
||||
Daniel Bergström
|
||||
David Burgess
|
||||
Ross Burton
|
||||
David Byron
|
||||
Fred Cox
|
||||
Christian Ehrlicher
|
||||
Tom Fortmann
|
||||
Lionel Fourquaux
|
||||
Mike Frysinger
|
||||
Daniel Richard G
|
||||
Dair Gran
|
||||
"Graycode" (Red Hat Product Security)
|
||||
Viktor Griph
|
||||
Wen Guanxing
|
||||
Robin Houston
|
||||
Martin Jerabek
|
||||
Peter Kankowski
|
||||
Stephen Kelly
|
||||
Yunho Kim
|
||||
Joshua Kinard
|
||||
Carsten Klein
|
||||
Evgeny Kotkov
|
||||
Ronald Landheer-Cieslak
|
||||
Alan Lehotsky
|
||||
Dmitry V. Levin
|
||||
Nuno Lopes
|
||||
Kai Lu
|
||||
Giuseppe Maxia
|
||||
Dan Mooney
|
||||
Marc Mutz
|
||||
Markus Oberhumer
|
||||
Sheri Pierce
|
||||
Petr Pisar
|
||||
Ari Pollak
|
||||
Bob Rossi
|
||||
Ruiger Rill
|
||||
Michael Shigorin
|
||||
Rich Siegel
|
||||
Craig Silverstein (C++ wrapper)
|
||||
Karl Skomski
|
||||
Paul Sokolovsky
|
||||
Stan Switzer
|
||||
Ian Taylor
|
||||
Mark Tetrode
|
||||
Jeff Trawick
|
||||
Steven Van Ingelgem
|
||||
Lawrence Velazquez
|
||||
Jiong Wang
|
||||
Stefan Weber
|
||||
Chris Wilson
|
||||
|
||||
Thanks go to Jeffrey Friedl for testing and debugging assistance.
|
||||
+105
-8
@@ -1,5 +1,6 @@
|
||||
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
|
||||
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
|
||||
load("@bazel_skylib//rules:native_binary.bzl", "native_test")
|
||||
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
|
||||
|
||||
copy_file(
|
||||
name = "config_h_generic",
|
||||
@@ -19,7 +20,7 @@ copy_file(
|
||||
out = "src/pcre2_chartables.c",
|
||||
)
|
||||
|
||||
# Removed src/pcre2_ucptables.c below because it is #included in
|
||||
# Removed src/pcre2_ucptables.c below because it is #included in
|
||||
# src/pcre2_tables.c. Also fixed typo: ckdint should be chkdint.
|
||||
# PH, 22-March-2023.
|
||||
cc_library(
|
||||
@@ -28,6 +29,7 @@ cc_library(
|
||||
"src/pcre2_auto_possess.c",
|
||||
"src/pcre2_chkdint.c",
|
||||
"src/pcre2_compile.c",
|
||||
"src/pcre2_compile_class.c",
|
||||
"src/pcre2_config.c",
|
||||
"src/pcre2_context.c",
|
||||
"src/pcre2_convert.c",
|
||||
@@ -35,6 +37,7 @@ cc_library(
|
||||
"src/pcre2_error.c",
|
||||
"src/pcre2_extuni.c",
|
||||
"src/pcre2_find_bracket.c",
|
||||
"src/pcre2_jit_compile.c",
|
||||
"src/pcre2_maketables.c",
|
||||
"src/pcre2_match.c",
|
||||
"src/pcre2_match_data.c",
|
||||
@@ -52,24 +55,118 @@ cc_library(
|
||||
"src/pcre2_valid_utf.c",
|
||||
"src/pcre2_xclass.c",
|
||||
":pcre2_chartables_c",
|
||||
],
|
||||
hdrs = glob(["src/*.h"]) + [
|
||||
"src/pcre2_compile.h",
|
||||
"src/pcre2_internal.h",
|
||||
"src/pcre2_intmodedep.h",
|
||||
"src/pcre2_ucp.h",
|
||||
"src/pcre2_util.h",
|
||||
":config_h_generic",
|
||||
],
|
||||
textual_hdrs = [
|
||||
"src/pcre2_jit_match.c",
|
||||
"src/pcre2_jit_misc.c",
|
||||
"src/pcre2_ucptables.c",
|
||||
],
|
||||
hdrs = [
|
||||
":pcre2_h_generic",
|
||||
],
|
||||
defines = [
|
||||
local_defines = [
|
||||
"HAVE_CONFIG_H",
|
||||
"HAVE_MEMMOVE",
|
||||
"PCRE2_CODE_UNIT_WIDTH=8",
|
||||
"PCRE2_STATIC",
|
||||
"SUPPORT_UNICODE",
|
||||
],
|
||||
includes = ["src"],
|
||||
strip_include_prefix = "src",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "pcre2demo",
|
||||
srcs = ["src/pcre2demo.c"],
|
||||
cc_library(
|
||||
name = "pcre2-posix",
|
||||
srcs = [
|
||||
"src/pcre2posix.c",
|
||||
":config_h_generic",
|
||||
],
|
||||
hdrs = [
|
||||
"src/pcre2posix.h",
|
||||
],
|
||||
local_defines = [
|
||||
"HAVE_CONFIG_H",
|
||||
"HAVE_MEMMOVE",
|
||||
"PCRE2_CODE_UNIT_WIDTH=8",
|
||||
"PCRE2_STATIC",
|
||||
"SUPPORT_UNICODE",
|
||||
],
|
||||
includes = ["src"],
|
||||
strip_include_prefix = "src",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":pcre2"],
|
||||
)
|
||||
|
||||
# Totally weird issue in Bazel. It won't let you #include any files unless they
|
||||
# are declared to the build system. OK, fair enough. But - for a cc_binary it
|
||||
# uses the file extension to determine whether it's a header or a compilation
|
||||
# unit. But... we have several .c files which are #included, rather than treated
|
||||
# as a compilation unit.
|
||||
#
|
||||
# For cc_library() above, we can overcome this with textual_hdrs. But that
|
||||
# doesn't work for cc_binary(). Here's our workaround.
|
||||
#
|
||||
# https://github.com/bazelbuild/bazel/issues/680
|
||||
cc_library(
|
||||
name = "pcre2test_dotc_headers",
|
||||
hdrs = [
|
||||
"src/pcre2_chkdint.c",
|
||||
"src/pcre2_printint.c",
|
||||
"src/pcre2_tables.c",
|
||||
"src/pcre2_ucd.c",
|
||||
"src/pcre2_valid_utf.c",
|
||||
],
|
||||
strip_include_prefix = "src",
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "pcre2test",
|
||||
srcs = [
|
||||
"src/pcre2test.c",
|
||||
":config_h_generic",
|
||||
],
|
||||
local_defines = [
|
||||
"HAVE_CONFIG_H",
|
||||
"HAVE_MEMMOVE",
|
||||
"HAVE_STRERROR",
|
||||
"PCRE2_STATIC",
|
||||
"SUPPORT_UNICODE",
|
||||
"SUPPORT_PCRE2_8",
|
||||
] + select({
|
||||
"@platforms//os:windows": [],
|
||||
"//conditions:default": ["HAVE_UNISTD_H"],
|
||||
}),
|
||||
linkopts = select({
|
||||
"@platforms//os:windows": ["-STACK:2500000"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":pcre2test_dotc_headers", ":pcre2", ":pcre2-posix"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "testdata",
|
||||
srcs = glob(["testdata/*"]),
|
||||
)
|
||||
|
||||
native_test(
|
||||
name = "pcre2_test",
|
||||
src = select({
|
||||
"@platforms//os:windows": "RunTest.bat",
|
||||
"//conditions:default": "RunTest",
|
||||
}),
|
||||
out = select({
|
||||
"@platforms//os:windows": "RunTest.bat",
|
||||
"//conditions:default": "RunTest",
|
||||
}),
|
||||
data = [":pcre2test", ":testdata"],
|
||||
size = "small",
|
||||
)
|
||||
+950
-780
File diff suppressed because it is too large
Load Diff
+204
@@ -4,6 +4,210 @@ Change Log for PCRE2
|
||||
Before the move to GitHub, this was the only record of changes to PCRE2. Now
|
||||
there is also the log of commit messages.
|
||||
|
||||
Internal changes which are not visible to clients of the library are mostly not
|
||||
listed here.
|
||||
|
||||
Version 10.46 27-August-2025
|
||||
----------------------------
|
||||
|
||||
1. (#771) (CVE-2025-58050) Security fix to prevent a read-past-the-end memory
|
||||
error, of arbitrary length. An attacker-controlled regex pattern is required,
|
||||
and it cannot be triggered by providing crafted subject (match) text. The
|
||||
(*ACCEPT) and (*scs:) pattern features must be used together.
|
||||
|
||||
Release 10.44 and earlier are not affected.
|
||||
|
||||
This could have implications of denial-of-service or information disclosure,
|
||||
and could potentially be used to escalate other vulnerabilities in a system
|
||||
(such as information disclosure being used to escalate the severity of an
|
||||
unrelated bug in another system).
|
||||
|
||||
|
||||
Version 10.45 05-February-2025
|
||||
------------------------------
|
||||
|
||||
1. (#418) Change 6 of 10.44 broke 32-bit tests because pcre2test's reporting of
|
||||
memory size was changed to the entire compiled data block, instead of just the
|
||||
pattern and tables data, so as to align with the new length restriction.
|
||||
Because the block's header contains pointers, this meant the pcre2test output
|
||||
was different in 32-bit mode. A patch by Carlo reverts to the previous state
|
||||
and makes sure that any limit set by pcre2_set_max_pattern_compiled_length()
|
||||
also avoids the internal struct overhead.
|
||||
|
||||
2. (#416, #622) Updates to build.zig.
|
||||
|
||||
3. (#427, et al.) Various fixes to pacify static analyzers.
|
||||
|
||||
4. (#428) Add --posix-pattern-file to pcre2grep to allow processing of empty
|
||||
patterns through the -f option, as well as patterns that end in space
|
||||
characters, for compatibility with other grep tools.
|
||||
|
||||
5. (4fa5b8bd) Fix a bug in the fuzz support quantifier-limiting code. It ignores
|
||||
strings of more than 5 digits because they are necessarily numbers greater than
|
||||
65535, the largest legal quantifier. However, it wasn't ignoring non-significant
|
||||
leading zeros.
|
||||
|
||||
6. (6d82f0cd) The case-independent processing of the letter-matching Unicode
|
||||
properties Ll, Lt, and Lu have been changed to match Perl (which changed a while
|
||||
ago). When caseless matching is in force, all three of these properties are now
|
||||
treated as Lc (cased letter).
|
||||
|
||||
7. (#433) The pcre2_jit_compile() function was updated by the addition of a new
|
||||
option PCRE2_JIT_TEST_ALLOC which, if called with a NULL first argument, tests
|
||||
not only the availability of JIT, but also its ability to allocate executable
|
||||
memory. Update pcre2test to use this support to extend the -C option.
|
||||
|
||||
8. (75b1025a) The code for parsing Unicode property descriptions for \p and \P
|
||||
been changed as follows:
|
||||
|
||||
. White space etc. before ^ in a negated value such as \p{ ^L } was not being
|
||||
ignored.
|
||||
|
||||
. The code wouldn't have worked if PCRE2 was compiled for UTF-8 support
|
||||
within an EBCDIC environment. Possibly nobody does this any more, but it
|
||||
should now work.
|
||||
|
||||
. The documentation of the syntax of what can follow \p and \P has been
|
||||
updated.
|
||||
|
||||
9. (1c24ba01) There was an error in the table of lengths for parsed items for
|
||||
the OPTIONS item, but fortuitously it could never have actually bitten. While
|
||||
fixing this, some other code that could never be obeyed was discovered and
|
||||
removed.
|
||||
|
||||
10. (674b6640) Removed some incorect optimization code from DFA matching that
|
||||
has been there since PCRE1, but has just been found to cause a no match return
|
||||
instead of a partial match in some cases. It involves partial matching when (*F)
|
||||
is present so is unlikely to have actually affected anyone.
|
||||
|
||||
11. (b0f4ac17) Tidy the wording and formatting of some pcre2test error messages
|
||||
concerned with bad modifiers. Also restrict single-letter modifier sequences to
|
||||
the first item in a modifier list, as documented and always intended.
|
||||
|
||||
12. (1415565c) An iterator at the end of many assertions can always be
|
||||
auto-possessified, but not at the end of variable-length lookbehinds. There was
|
||||
a bug in the code that checks for such a lookbehind; it was looking only at the
|
||||
first branch, which is wrong because some branches can be fixed length when
|
||||
others are not, for example (?<=AB|CD?). Now all branches are checked for
|
||||
variability.
|
||||
|
||||
13. (ead08288) Matching with pcre2_match() could give an incorrect result if a
|
||||
variable-length lookbehind was used as the condition in a conditional group.
|
||||
The condition could erroneously be treated as true if a branch matched but
|
||||
overran the current position. This bug was in the interpreter only; matching
|
||||
with JIT was correct.
|
||||
|
||||
14. (#443) Split out the sljit sub-project into a "Git submodule". Git users
|
||||
must now run `git submodule init; git submodule update` after a Git checkout, or
|
||||
the build will fail due to missing files in deps/sljit.
|
||||
|
||||
15. (#441) Add a new error code (PCRE2_ERROR_JIT_UNSUPPORTED) which is yielded
|
||||
for unsupported jit features.
|
||||
|
||||
16. (#444) Fix bug in 'first code unit' and 'last code unit' optimization
|
||||
combined with lookahead assertions.
|
||||
|
||||
17. (#445, #447, #449, #451, #452, #459, #563) Add a new feature called scan
|
||||
substring. This feature is a new type of assertion which matches the content of
|
||||
a capturing block to a sub-pattern.
|
||||
|
||||
18. (#450) Improvements to 'first code unit' / 'starting code units'
|
||||
optimisation.
|
||||
|
||||
19. (#455) Many, many improvements to the JIT compiler.
|
||||
|
||||
20. Item 43 of 10.43 was incomplete because it addressed only \z and not \Z,
|
||||
which was still misbehaving when matching fragments inside invalid UTF strings.
|
||||
|
||||
21. (d29e7290) Octal escapes of the form \045 or \111 were not being recognized
|
||||
in substitution strings, and if encountered gave an error, though the \o{...}
|
||||
form was recognized. This bug is now fixed.
|
||||
|
||||
22. (#463, #487) Fix 1 byte out-of-bounds read when parsing malformed limits
|
||||
(e.g. LIMIT_HEAP)
|
||||
|
||||
23. Many improvements to test infrastructure. Many more platforms and
|
||||
configurations are now run in Continuous Integration, and all the platforms now
|
||||
run the full test suite, rather than a partial subset.
|
||||
|
||||
24. (#475) Implement title casing in substitution strings using Perl syntax.
|
||||
|
||||
25. (#478, #504) Disallow \x if not followed by { or a hex digit.
|
||||
|
||||
26. (#473) Implements Python-style backrefs in substitutions.
|
||||
|
||||
27. (#472) Fix error reporting for certain over-large octal escapes.
|
||||
|
||||
28. (#482) Fix parsing of named captures in replacement strings, allowing
|
||||
non-ASCII capture names to be used.
|
||||
|
||||
29. (#477, #474, #488, #494, #496, #506, #508, #511, #518, #524, #540) Many
|
||||
improvements to parsing and optimising of character classes.
|
||||
|
||||
30. (#483, #498) Add support for \g<n> and $<name> to replacement strings.
|
||||
|
||||
31. (#470) Add option flags PCRE2_EXTRA_NO_BS0 and PCRE2_EXTRA_PYTHON_OCTAL.
|
||||
|
||||
32. (#471) Add new API function pcre2_set_optimize() for controlling which
|
||||
optimizations are enabled.
|
||||
|
||||
33. (#491) Adds $& $` $' and $_ to substitution replacements, as well as
|
||||
interpreting \b and \v as characters.
|
||||
|
||||
34. (#499) Add option PCRE2_EXTRA_NEVER_CALLOUT to disable callouts.
|
||||
|
||||
35. (#503, #513) Update Unicode support to UCD 16.
|
||||
|
||||
36. (#512, #618, #638) Add new function pcre2_set_substitute_case_callout() to
|
||||
allow clients to provide a custom callback with locale-aware case
|
||||
transformation.
|
||||
|
||||
37. (#516) Fix case-insensitive matching of backreferences when using the
|
||||
PCRE2_EXTRA_CASELESS_RESTRICT option.
|
||||
|
||||
38. (#519) In pcre2grep, add $& as an alias for $0
|
||||
|
||||
39. (c9bf8339, #534) Updated perltest.sh to enable locale setting.
|
||||
|
||||
40. (#521) Add support for Turkish I casefolding, using new options
|
||||
PCRE2_EXTRA_TURKISH_CASING, and added pre-pattern flags (*TURKISH_CASING) and
|
||||
(*CASELESS_RESTRICT).
|
||||
|
||||
41. (#523, #546, #547) Add support for UTS#18 compatible character classes,
|
||||
using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a metacharacter
|
||||
within character classes and the operators '&&', '--' and '~~', allowing
|
||||
subtractions and intersections of character classes to be easily expressed.
|
||||
|
||||
42. (#553, #586, #596, #597) Add support for Perl-style extended character
|
||||
classes, using the syntax (?[...]). This also allows expressing subtractions and
|
||||
intersections of character classes, but using a different syntax to UTS#18.
|
||||
|
||||
43. (#554) Fixed a bug in JIT affecting greedy bounded repeats. The upper limit
|
||||
of repeats inside a repeated bracket might be incorrectly checked.
|
||||
|
||||
44. (#556) Fixed a bug in JIT affecting caseful matching of backreferences. When
|
||||
utf is disabled, and dupnames is enabled, caseless matching was used even
|
||||
if caseful matching was needed.
|
||||
|
||||
45. (f34fc0a3) Fixed a bug in pcre2grep reported by Alejandro Colomar
|
||||
<alx@kernel.org> (GitHub issue #577). In certain cases, when lines of above and
|
||||
below context were contiguous, a separator line was incorrectly being inserted.
|
||||
|
||||
46. (#594) Fix a small (one/two byte) out-of-bounds read on invalid UTF-8 input
|
||||
in pcre2grep.
|
||||
|
||||
47. (#370) Fix the INSTALL_MSVC_PDB CMake flag.
|
||||
|
||||
48. (#366) Install cmake files in prefix/lib/cmake/pcre2 rather than
|
||||
prefix/cmake. The new CMake flag PCRE2_INSTALL_CMAKEDIR allows customising this
|
||||
location.
|
||||
|
||||
49. (#624, #626, #628, #632, #639, #641) Reduce code size of generated JIT code
|
||||
for repeated character classes.
|
||||
|
||||
50. (#623) Update the Bazel build files.
|
||||
|
||||
|
||||
Version 10.44 07-June-2024
|
||||
--------------------------
|
||||
|
||||
|
||||
+135
-64
@@ -21,41 +21,27 @@ form, and were quite restricted in what they could do by comparison with Perl.
|
||||
The interesting part about the algorithm was that the amount of space required
|
||||
to hold the compiled form of an expression was known in advance. The code to
|
||||
apply an expression did not operate by backtracking, as the original Henry
|
||||
Spencer code and current PCRE2 and Perl code does, but instead checked all
|
||||
possibilities simultaneously by keeping a list of current states and checking
|
||||
all of them as it advanced through the subject string. In the terminology of
|
||||
Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a
|
||||
traditional Finite State Machine (FSM). When the pattern was all used up, all
|
||||
remaining states were possible matches, and the one matching the longest subset
|
||||
of the subject string was chosen. This did not necessarily maximize the
|
||||
individual wild portions of the pattern, as is expected in Unix and Perl-style
|
||||
regular expressions.
|
||||
Spencer code and the current PCRE2 pcre2_match() function and Perl code do, but
|
||||
instead checked all possibilities simultaneously by keeping a list of current
|
||||
states and checking all of them as it advanced through the subject string. In
|
||||
the terminology of Jeffrey Friedl's book, it was a "DFA algorithm", though it
|
||||
was not a traditional Finite State Machine (FSM). When the pattern was all used
|
||||
up, all remaining states were possible matches, and the one matching the
|
||||
longest subset of the subject string was chosen. This did not necessarily
|
||||
maximize the individual wild portions of the pattern, as is expected in Unix
|
||||
and Perl-style regular expressions.
|
||||
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version may or may not still do this; I'm talking about the
|
||||
original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that formed the original PCRE1 library in 1997 (which
|
||||
are unrelated to those mentioned above), I tried at first to invent an
|
||||
algorithm that used an amount of store bounded by a multiple of the number of
|
||||
characters in the pattern, to save on compiling time. However, because of the
|
||||
greater complexity in Perl regular expressions, I couldn't do this, even though
|
||||
the then current Perl 5.004 patterns were much simpler than those supported
|
||||
nowadays. In any case, a first pass through the pattern is helpful for other
|
||||
reasons.
|
||||
The code originally written by Henry Spencer (which was subsequently heavily
|
||||
modified for Perl) compiles the expression twice: once in a dummy mode in order
|
||||
to find out how much store will be needed, and then for real. (The Perl version
|
||||
may or may not still do this; I'm talking about the original library.) The
|
||||
execution function operates by backtracking and maximizing (or, optionally,
|
||||
minimizing, in Perl) the amount of the subject that matches individual wild
|
||||
portions of the pattern. This is an "NFA algorithm" in Friedl's terminology.
|
||||
|
||||
|
||||
Support for 16-bit and 32-bit data strings
|
||||
@@ -98,8 +84,8 @@ were also present in the 7.0 release).
|
||||
A side effect of this work was that the previous limit of 200 on the nesting
|
||||
depth of parentheses was removed. However, there was a downside: compiling ran
|
||||
more slowly than before (30% or more, depending on the pattern) because it now
|
||||
did a full analysis of the pattern. My hope was that this would not be a big
|
||||
issue, and in the event, nobody has commented on it.
|
||||
did a full analysis of the pattern twice. My hope was that this would not be a
|
||||
big issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
@@ -119,7 +105,7 @@ memory.) The use of duplicate group numbers (the (?| facility) also caused
|
||||
issues.
|
||||
|
||||
To get around these problems I adopted a new approach by adding a third pass
|
||||
over the pattern (really a "pre-pass"), which did nothing other than identify
|
||||
over the pattern (really a "pre-pass"), which does nothing other than identify
|
||||
all the named subpatterns and their corresponding group numbers. This means
|
||||
that the actual compile (both the memory-computing dummy run and the real
|
||||
compile) has full knowledge of group names and numbers throughout. Several
|
||||
@@ -154,17 +140,21 @@ assumption is made that there will be a callout for each pattern code unit
|
||||
at the end. A default parsed pattern vector is defined on the system stack, to
|
||||
minimize memory handling, but if this is not big enough, heap memory is used.
|
||||
|
||||
As before, the actual compiling function is run twice, the first time to
|
||||
determine the amount of memory needed for the final compiled pattern. It
|
||||
now processes the parsed pattern vector, not the pattern itself, although some
|
||||
of the parsed items refer to strings in the pattern - for example, group
|
||||
names. As escapes and comments have already been processed, the code is a bit
|
||||
simpler than before.
|
||||
If there are any lookbehinds in the pattern, the parsed pattern is scanned in
|
||||
order to work out their lengths. Then the actual compiling function is run
|
||||
twice, the first time to determine the amount of memory needed for the final
|
||||
compiled pattern. The compiling function processes the parsed pattern vector,
|
||||
not the pattern itself, although some of the parsed items refer to strings in
|
||||
the pattern - for example, group names.
|
||||
|
||||
Most errors can be diagnosed during the parsing scan. For those that cannot
|
||||
(for example, "lookbehind assertion is not fixed length"), the parsed code
|
||||
contains offsets into the pattern so that the actual compiling code can
|
||||
report where errors are.
|
||||
Some post-processing of the compiled pattern takes place. If there are any
|
||||
recursion or subroutine calls, there is a scan to convert them into offsets.
|
||||
Then there are other scans to apply certain optimizations, some of which can be
|
||||
disabled by setting appropriate options.
|
||||
|
||||
Most errors can be diagnosed during the parsing scan. For those that cannot,
|
||||
the parsed code contains offsets into the pattern so that the actual compiling
|
||||
code can report where the errors are.
|
||||
|
||||
|
||||
The elements of the parsed pattern vector
|
||||
@@ -209,6 +199,11 @@ META_RANGE_ESCAPED hyphen in class range with at least one escape
|
||||
META_RANGE_LITERAL hyphen in class range defined literally
|
||||
META_SKIP (*SKIP) - no argument (see below for with argument)
|
||||
META_THEN (*THEN) - no argument (see below for with argument)
|
||||
META_ECLASS_AND && (or &) in an extended character class
|
||||
META_ECLASS_OR || (or |, +) in an extended character class
|
||||
META_ECLASS_SUB -- (or -) in an extended character class
|
||||
META_ECLASS_XOR ~~ (or ^) in an extended character class
|
||||
META_ECLASS_NOT ! in an extended character class
|
||||
|
||||
The two RANGE values occur only in character classes. They are positioned
|
||||
between two literals that define the start and end of the range. In an EBCDIC
|
||||
@@ -240,11 +235,11 @@ occurrence is useful). On 64-bit systems this avoids using more than two parsed
|
||||
pattern elements for items such as \3. The offset is used when an error occurs
|
||||
because the reference is to a non-existent group.
|
||||
|
||||
META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
|
||||
element contains the 16-bit type and data property values, packed together.
|
||||
ESC_g and ESC_k are used only for named references - numerical ones are turned
|
||||
into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed
|
||||
by a length and an offset into the pattern to specify the name.
|
||||
META_ESCAPE is used for escapes such as \d that match a character. It has an
|
||||
ESC_xxx value as its data. For ESC_P and ESC_p, the next element contains the
|
||||
16-bit type and data property values, packed together. Escape sequences such as
|
||||
\g and \k are turned into other items like META_RECURSE or META_BACKREF and
|
||||
their ESC_xxx values never occur with META_ESCAPE.
|
||||
|
||||
The following have one data item that follows in the next vector element:
|
||||
|
||||
@@ -268,15 +263,17 @@ META_COND_NAME (?(<name>) or (?('name') or (?(name)
|
||||
META_COND_RNAME (?(R&name)
|
||||
META_COND_RNUMBER (?(Rdigits)
|
||||
META_RECURSE_BYNAME (?&name)
|
||||
META_BACKREF_BYNAME \k'name'
|
||||
META_BACKREF_BYNAME \k'name' or \k<name> or \k{name} or \g{name}
|
||||
META_SCS_NAME (*scs:(<name>)...)
|
||||
|
||||
META_COND_RNUMBER is used for names that start with R and continue with digits,
|
||||
because this is an ambiguous case. It could be a back reference to a group with
|
||||
that name, or it could be a recursion test on a numbered group.
|
||||
|
||||
This one is followed by an offset, for use in error messages, then a number:
|
||||
These are followed by an offset, for use in error messages, then a number:
|
||||
|
||||
META_COND_NUMBER (?([+-]digits)
|
||||
META_SCS_NUMBER (*scs:(digits)...)
|
||||
|
||||
The following is followed just by an offset, for use in error messages:
|
||||
|
||||
@@ -286,7 +283,7 @@ The following are at first also followed just by an offset for use in error
|
||||
messages. After the lengths of the branches of a lookbehind group have been
|
||||
checked the error offset is no longer needed. The lower 16 bits of the main
|
||||
word are now set to the maximum length of the first branch of the lookbehind
|
||||
group, and the second word is set to the mimimum matching length for a
|
||||
group, and the second word is set to the minimum matching length for a
|
||||
variable-length lookbehind group, or to LOOKBEHIND_MAX for a group whose
|
||||
branches are all of fixed length. These values are used when generating
|
||||
OP_REVERSE or OP_VREVERSE for the first branch. The miminum value is also used
|
||||
@@ -336,16 +333,28 @@ will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
|
||||
support, and studying a compiled pattern with JIT is successful, the JIT code
|
||||
is run instead of the normal pcre2_match() code, but the result is the same.
|
||||
|
||||
The interpreter used to implement backtracking by means of recursive function
|
||||
calls, but this gave rise to regular complaints when patterns with large search
|
||||
trees ran out of stack. There was for a while a fudge that used the heap
|
||||
instead, but this was inefficient and slow. In 2017 I re-wrote pcre2_match() as
|
||||
a single, non-recursive function that implements backtracking via a vector of
|
||||
"frames" on the heap, each frame representing a backtracking point. As well as
|
||||
standard information such as the position in the pattern and position in the
|
||||
subject, each frame has a number of unassigned variables that can be used
|
||||
locally to preserve values at a backtracking point. C macros are used
|
||||
extensively to implement all of this.
|
||||
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
There is also a supplementary matching function called pcre2_dfa_match(). This
|
||||
There is a supplementary matching function called pcre2_dfa_match() that
|
||||
implements a DFA matching algorithm that searches simultaneously for all
|
||||
possible matches that start at one point in the subject string. (Going back to
|
||||
my roots: see Historical Note 1 above.) This function intreprets the same
|
||||
compiled pattern data as pcre2_match(); however, not all the facilities are
|
||||
available, and those that are do not always work in quite the same way. See the
|
||||
available, and those that are do not always work in quite the same way. In
|
||||
particular, capturing parentheses and backreferences are not supported. See the
|
||||
user documentation for details.
|
||||
|
||||
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
|
||||
@@ -361,8 +370,10 @@ Changeable options
|
||||
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and
|
||||
some others may be changed in the middle of patterns by items such as (?i).
|
||||
Their processing is handled entirely at compile time by generating different
|
||||
opcodes for the different settings. The runtime functions do not need to keep
|
||||
track of an option's state.
|
||||
opcodes for the different settings. Some options are copied into the opcode's
|
||||
data, for opcodes such as OP_REFI which depends on the (?r)
|
||||
(PCRE2_EXTRA_CASELESS_RESTRICT) option. The runtime functions do not need to
|
||||
keep track of an option's state.
|
||||
|
||||
PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
|
||||
are tracked and processed during the parsing pre-pass. The others are handled
|
||||
@@ -383,10 +394,10 @@ within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
|
||||
only be 4. The 8-bit library can be compiled to use 3-byte or 4-byte values,
|
||||
and the 16-bit library can be compiled to use 4-byte values, though this
|
||||
impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries is
|
||||
necessary only when patterns whose compiled length is greater than 65535 code
|
||||
units are going to be processed. When a LINK_SIZE value uses more than one code
|
||||
unit, the most significant unit is first.
|
||||
impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries
|
||||
is necessary only when patterns whose compiled length is greater than 65535
|
||||
code units are going to be processed. When a LINK_SIZE value uses more than one
|
||||
code unit, the most significant unit is first.
|
||||
|
||||
In this description, we assume the "normal" compilation options. Data values
|
||||
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
|
||||
@@ -396,7 +407,7 @@ that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one unit long:
|
||||
These items are all just one code unit long:
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
@@ -594,9 +605,13 @@ do.
|
||||
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
|
||||
code points are less than 256, followed by a list of pairs (for a range) and/or
|
||||
single characters and/or properties. In caseless mode, all equivalent
|
||||
characters are explicitly listed.
|
||||
code points are less than 256. After the bit map, the properties of the
|
||||
character class are listed, if they are present. The items in the list
|
||||
follows the declaration order of the pattern string. The property list
|
||||
is followed by single characters and/or character ranges, if they are
|
||||
present. The characters/ranges are sorted in ascending order, and at
|
||||
least one non-matching character must be present between any two of
|
||||
them. In caseless mode, all equivalent characters are explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
|
||||
opcode and its data. This is followed by a code unit containing flag bits:
|
||||
@@ -618,6 +633,42 @@ When XCL_NOT is set, the bit map, if present, contains bits for characters that
|
||||
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
|
||||
specifies characters and properties that are not allowed.
|
||||
|
||||
The meaning of the bitmap indicated by XCL_MAP is that, if one is present, then
|
||||
it fully describes which code points < 256 match the class (without needing to
|
||||
invert the check according to XCL_NOT); the other items in the OP_XCLASS need
|
||||
not be consulted. However, if a bitmap is not present, then code points < 256
|
||||
may still match, so the other items in the OP_XCLASS must be consulted.
|
||||
|
||||
For classes containing logical expressions, such as "[\p{Greek} && \p{Lu}]" for
|
||||
"uppercase Greek letters", OP_ECLASS is used. The expression is encoded as a a
|
||||
stack-based series of operands and operators, in Reverse Polish Notation. Like
|
||||
an OP_XCLASS, the OP_ECLASS is first followed by a LINK_SIZE value containing
|
||||
the total length of the opcode and its data. That is followed by a code unit
|
||||
containing flags: currently just ECL_MAP indicating that a bit map is present.
|
||||
There follows the bit map, if ECL_MAP is set. Finally, there is a sequence of
|
||||
items that are either an operand or operator. Each item starts with a single
|
||||
code unit containing its type:
|
||||
|
||||
ECL_AND AND; no additional data
|
||||
ECL_OR OR; no additional data
|
||||
ECL_XOR XOR; no additional data
|
||||
ECL_NOT NOT; no additional data
|
||||
ECL_XCLASS The additional data which follows ECL_XCLASS is the same as for
|
||||
an OP_XCLASS, except that this data is preceded by ECL_XCLASS
|
||||
rather than OP_XCLASS.
|
||||
Because the OP_ECLASS has its own bitmap (if required), an
|
||||
ECL_XCLASS should not contain a bitmap.
|
||||
|
||||
Additionally, there are two intermediate values used during compilation, but
|
||||
these are folded away during generation of the opcode, and so never appear
|
||||
inside an OP_ECLASS at match time. They are:
|
||||
|
||||
ECL_ANY match all characters; no additional data
|
||||
ECL_NONE match no characters; no additional data
|
||||
|
||||
The meaning of the bitmap indicated by ECL_MAP is the same as XCL_MAP.
|
||||
If the bitmap is present, all codepoints < 256 are checked against the bitmap.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
@@ -631,6 +682,9 @@ generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
|
||||
required name, followed by the number of groups with the same name. The
|
||||
matching code can then search for the first one that is set.
|
||||
|
||||
OP_REFI and OP_DNREFI are further followed by an item containing any
|
||||
case-insensitivity flags.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
@@ -750,6 +804,16 @@ In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are
|
||||
also the number of code units, but in UTF-8/16 mode each character may occupy
|
||||
more than one code unit.
|
||||
|
||||
The "scan substring" assertion compiles as OP_ASSERT_SCS. This opcode is
|
||||
followed by a list of arguments. Each argument is either an OP_CREF or
|
||||
OP_DNCREF byte code sequence. The details of these sequences are described
|
||||
in the next section.
|
||||
|
||||
For example (*scs:(1,'NAME')...PATTERN...) is translated to:
|
||||
[OP_ASSERT_SCS] [OP_CREF] [OP_CREF] ...PATTERN... [OP_KET]
|
||||
|
||||
If 'NAME' is a duplicated name, the second [OP_CREF] is [OP_DNCREF] instead.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
@@ -849,5 +913,12 @@ The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
|
||||
not a real opcode, but is used to check at compile time that tables indexed by
|
||||
opcode are the correct length, in order to catch updating errors.
|
||||
|
||||
|
||||
See also
|
||||
--------
|
||||
|
||||
The file maint/README contains additional information.
|
||||
|
||||
|
||||
Philip Hazel
|
||||
November 2023
|
||||
August 2024
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
PCRE2 LICENCE
|
||||
-------------
|
||||
PCRE2 License
|
||||
=============
|
||||
|
||||
| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception |
|
||||
|---------|-------|
|
||||
|
||||
PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
@@ -16,40 +19,46 @@ optimize pattern matching. This is an optional feature that can be omitted when
|
||||
the library is built.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
COPYRIGHT
|
||||
---------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
### The basic library functions
|
||||
|
||||
Retired from University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
Copyright (c) 1997-2024 University of Cambridge
|
||||
All rights reserved.
|
||||
Retired from University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
Copyright (c) 2007-2024 Philip Hazel
|
||||
All rights reserved.
|
||||
|
||||
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
||||
--------------------------------------
|
||||
### PCRE2 Just-In-Time compilation support
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
Copyright (c) 2010-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
### Stack-less Just-In-Time compiler
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
Copyright (c) 2009-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
Copyright(c) 2009-2024 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
### All other contributions
|
||||
|
||||
Many other contributors have participated in the authorship of PCRE2. As PCRE2
|
||||
has never required a Contributor Licensing Agreement, or other copyright
|
||||
assignment agreement, all contributions have copyright retained by each
|
||||
original contributor or their employer.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
@@ -58,16 +67,16 @@ THE "BSD" LICENCE
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notices,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions of source code must retain the above copyright notices,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notices, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notices, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
* Neither the name of the University of Cambridge nor the names of any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
+2
-1
@@ -1,8 +1,9 @@
|
||||
module(
|
||||
name = "pcre2",
|
||||
version = "10.40",
|
||||
version = "10.46",
|
||||
compatibility_level = 1,
|
||||
)
|
||||
|
||||
bazel_dep(name = "rules_cc", version = "0.0.1")
|
||||
bazel_dep(name = "bazel_skylib", version = "1.2.1")
|
||||
bazel_dep(name = "platforms", version = "0.0.4")
|
||||
|
||||
+58
-47
@@ -10,12 +10,13 @@ AM_CPPFLAGS="-I$(srcdir)/src"
|
||||
## Specify the documentation files that are distributed.
|
||||
|
||||
dist_doc_DATA = \
|
||||
AUTHORS \
|
||||
AUTHORS.md \
|
||||
COPYING \
|
||||
ChangeLog \
|
||||
LICENCE \
|
||||
LICENCE.md \
|
||||
NEWS \
|
||||
README \
|
||||
SECURITY.md \
|
||||
doc/pcre2.txt \
|
||||
doc/pcre2-config.txt \
|
||||
doc/pcre2grep.txt \
|
||||
@@ -86,11 +87,13 @@ dist_html_DATA = \
|
||||
doc/html/pcre2_set_max_pattern_length.html \
|
||||
doc/html/pcre2_set_max_varlookbehind.html \
|
||||
doc/html/pcre2_set_offset_limit.html \
|
||||
doc/html/pcre2_set_optimize.html \
|
||||
doc/html/pcre2_set_newline.html \
|
||||
doc/html/pcre2_set_parens_nest_limit.html \
|
||||
doc/html/pcre2_set_recursion_limit.html \
|
||||
doc/html/pcre2_set_recursion_memory_management.html \
|
||||
doc/html/pcre2_set_substitute_callout.html \
|
||||
doc/html/pcre2_set_substitute_case_callout.html \
|
||||
doc/html/pcre2_substitute.html \
|
||||
doc/html/pcre2_substring_copy_byname.html \
|
||||
doc/html/pcre2_substring_copy_bynumber.html \
|
||||
@@ -185,11 +188,13 @@ dist_man_MANS = \
|
||||
doc/pcre2_set_max_pattern_length.3 \
|
||||
doc/pcre2_set_max_varlookbehind.3 \
|
||||
doc/pcre2_set_offset_limit.3 \
|
||||
doc/pcre2_set_optimize.3 \
|
||||
doc/pcre2_set_newline.3 \
|
||||
doc/pcre2_set_parens_nest_limit.3 \
|
||||
doc/pcre2_set_recursion_limit.3 \
|
||||
doc/pcre2_set_recursion_memory_management.3 \
|
||||
doc/pcre2_set_substitute_callout.3 \
|
||||
doc/pcre2_set_substitute_case_callout.3 \
|
||||
doc/pcre2_substitute.3 \
|
||||
doc/pcre2_substring_copy_byname.3 \
|
||||
doc/pcre2_substring_copy_bynumber.3 \
|
||||
@@ -272,6 +277,14 @@ EXTRA_DIST += \
|
||||
NON-AUTOTOOLS-BUILD \
|
||||
HACKING
|
||||
|
||||
# These are support files for building with Bazel or Zig
|
||||
|
||||
EXTRA_DIST += \
|
||||
BUILD.bazel \
|
||||
MODULE.bazel \
|
||||
WORKSPACE.bazel \
|
||||
build.zig
|
||||
|
||||
# These are support files for building under VMS
|
||||
|
||||
EXTRA_DIST += \
|
||||
@@ -280,16 +293,6 @@ EXTRA_DIST += \
|
||||
vms/pcre2.h_patch \
|
||||
vms/stdint.h
|
||||
|
||||
# These files are used in the preparation of a release
|
||||
|
||||
EXTRA_DIST += \
|
||||
PrepareRelease \
|
||||
CheckMan \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
132html \
|
||||
doc/index.html.src
|
||||
|
||||
# These files are usable versions of pcre2.h and config.h that are distributed
|
||||
# for the benefit of people who are building PCRE2 manually, without the
|
||||
# Autotools support.
|
||||
@@ -374,6 +377,8 @@ COMMON_SOURCES = \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_chkdint.c \
|
||||
src/pcre2_compile.c \
|
||||
src/pcre2_compile.h \
|
||||
src/pcre2_compile_class.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_convert.c \
|
||||
@@ -383,6 +388,7 @@ COMMON_SOURCES = \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_char_inc.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
src/pcre2_jit_neon_inc.h \
|
||||
src/pcre2_jit_simd_inc.h \
|
||||
@@ -401,6 +407,7 @@ COMMON_SOURCES = \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_util.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_xclass.c
|
||||
|
||||
@@ -460,39 +467,39 @@ CLEANFILES += src/pcre2_chartables.c
|
||||
# when pcre2_jit_compile.c is processed, so they must be distributed.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/sljit/sljitConfig.h \
|
||||
src/sljit/sljitConfigCPU.h \
|
||||
src/sljit/sljitConfigInternal.h \
|
||||
src/sljit/sljitLir.c \
|
||||
src/sljit/sljitLir.h \
|
||||
src/sljit/sljitNativeARM_32.c \
|
||||
src/sljit/sljitNativeARM_64.c \
|
||||
src/sljit/sljitNativeARM_T2_32.c \
|
||||
src/sljit/sljitNativeLOONGARCH_64.c \
|
||||
src/sljit/sljitNativeMIPS_32.c \
|
||||
src/sljit/sljitNativeMIPS_64.c \
|
||||
src/sljit/sljitNativeMIPS_common.c \
|
||||
src/sljit/sljitNativePPC_32.c \
|
||||
src/sljit/sljitNativePPC_64.c \
|
||||
src/sljit/sljitNativePPC_common.c \
|
||||
src/sljit/sljitNativeRISCV_32.c \
|
||||
src/sljit/sljitNativeRISCV_64.c \
|
||||
src/sljit/sljitNativeRISCV_common.c \
|
||||
src/sljit/sljitNativeS390X.c \
|
||||
src/sljit/sljitNativeX86_32.c \
|
||||
src/sljit/sljitNativeX86_64.c \
|
||||
src/sljit/sljitNativeX86_common.c \
|
||||
src/sljit/sljitSerialize.c \
|
||||
src/sljit/sljitUtils.c \
|
||||
src/sljit/allocator_src/sljitExecAllocatorApple.c \
|
||||
src/sljit/allocator_src/sljitExecAllocatorCore.c \
|
||||
src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c \
|
||||
src/sljit/allocator_src/sljitExecAllocatorPosix.c \
|
||||
src/sljit/allocator_src/sljitExecAllocatorWindows.c \
|
||||
src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c \
|
||||
src/sljit/allocator_src/sljitProtExecAllocatorPosix.c \
|
||||
src/sljit/allocator_src/sljitWXExecAllocatorPosix.c \
|
||||
src/sljit/allocator_src/sljitWXExecAllocatorWindows.c
|
||||
deps/sljit/sljit_src/sljitConfig.h \
|
||||
deps/sljit/sljit_src/sljitConfigCPU.h \
|
||||
deps/sljit/sljit_src/sljitConfigInternal.h \
|
||||
deps/sljit/sljit_src/sljitLir.c \
|
||||
deps/sljit/sljit_src/sljitLir.h \
|
||||
deps/sljit/sljit_src/sljitNativeARM_32.c \
|
||||
deps/sljit/sljit_src/sljitNativeARM_64.c \
|
||||
deps/sljit/sljit_src/sljitNativeARM_T2_32.c \
|
||||
deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c \
|
||||
deps/sljit/sljit_src/sljitNativeMIPS_32.c \
|
||||
deps/sljit/sljit_src/sljitNativeMIPS_64.c \
|
||||
deps/sljit/sljit_src/sljitNativeMIPS_common.c \
|
||||
deps/sljit/sljit_src/sljitNativePPC_32.c \
|
||||
deps/sljit/sljit_src/sljitNativePPC_64.c \
|
||||
deps/sljit/sljit_src/sljitNativePPC_common.c \
|
||||
deps/sljit/sljit_src/sljitNativeRISCV_32.c \
|
||||
deps/sljit/sljit_src/sljitNativeRISCV_64.c \
|
||||
deps/sljit/sljit_src/sljitNativeRISCV_common.c \
|
||||
deps/sljit/sljit_src/sljitNativeS390X.c \
|
||||
deps/sljit/sljit_src/sljitNativeX86_32.c \
|
||||
deps/sljit/sljit_src/sljitNativeX86_64.c \
|
||||
deps/sljit/sljit_src/sljitNativeX86_common.c \
|
||||
deps/sljit/sljit_src/sljitSerialize.c \
|
||||
deps/sljit/sljit_src/sljitUtils.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c \
|
||||
deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c
|
||||
|
||||
# Some of the JIT sources are also in separate files that are #included.
|
||||
|
||||
@@ -710,9 +717,12 @@ EXTRA_DIST += \
|
||||
testdata/grepinput \
|
||||
testdata/grepinput3 \
|
||||
testdata/grepinput8 \
|
||||
testdata/grepinputBad8 \
|
||||
testdata/grepinputBad8_Trail \
|
||||
testdata/grepinputC.bz2 \
|
||||
testdata/grepinputC.gz \
|
||||
testdata/grepinputM \
|
||||
testdata/grepinputUN \
|
||||
testdata/grepinputv \
|
||||
testdata/grepinputx \
|
||||
testdata/greplist \
|
||||
@@ -755,6 +765,7 @@ EXTRA_DIST += \
|
||||
testdata/testinput24 \
|
||||
testdata/testinput25 \
|
||||
testdata/testinput26 \
|
||||
testdata/testinput27 \
|
||||
testdata/testinputEBC \
|
||||
testdata/testinputheap \
|
||||
testdata/testoutput1 \
|
||||
@@ -799,6 +810,7 @@ EXTRA_DIST += \
|
||||
testdata/testoutput24 \
|
||||
testdata/testoutput25 \
|
||||
testdata/testoutput26 \
|
||||
testdata/testoutput27 \
|
||||
testdata/testoutputEBC \
|
||||
testdata/testoutputheap-16 \
|
||||
testdata/testoutputheap-32 \
|
||||
@@ -819,7 +831,7 @@ CLEANFILES += \
|
||||
test3outputB \
|
||||
testtry \
|
||||
teststdout \
|
||||
teststderr \
|
||||
teststderr \
|
||||
teststderrgrep \
|
||||
testtemp1grep \
|
||||
testtemp2grep \
|
||||
@@ -957,7 +969,6 @@ endif # WITH_GCOV
|
||||
EXTRA_DIST += \
|
||||
cmake/COPYING-CMAKE-SCRIPTS \
|
||||
cmake/FindEditline.cmake \
|
||||
cmake/FindPackageHandleStandardArgs.cmake \
|
||||
cmake/FindReadline.cmake \
|
||||
cmake/pcre2-config-version.cmake.in \
|
||||
cmake/pcre2-config.cmake.in \
|
||||
|
||||
+107
-2
@@ -1,6 +1,111 @@
|
||||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.46 27-August-2025
|
||||
----------------------------
|
||||
|
||||
This is a security-only release, to address CVE-2025-58050.
|
||||
|
||||
Compared to 10.45, this release has only a minimal code change to prevent a
|
||||
read-past-the-end memory error, of arbitrary length. An attacker-controlled
|
||||
regex pattern is required, and it cannot be triggered by providing crafted
|
||||
subject (match) text. The (*ACCEPT) and (*scs:) pattern features must be used
|
||||
together.
|
||||
|
||||
Release 10.44 and earlier are not affected.
|
||||
|
||||
This could have implications of denial-of-service or information disclosure,
|
||||
and could potentially be used to escalate other vulnerabilities in a system
|
||||
(such as information disclosure being used to escalate the severity of an
|
||||
unrelated bug in another system).
|
||||
|
||||
|
||||
Version 10.45 05-February-2025
|
||||
------------------------------
|
||||
|
||||
This is a comparatively large release, incorporating new features, some
|
||||
bugfixes, and a few changes with slight backwards compatibility implications.
|
||||
Please see the ChangeLog and Git log for further details.
|
||||
|
||||
Only changes to behaviour, changes to the API, and major changes to the pattern
|
||||
syntax are described here.
|
||||
|
||||
This release is the first to be available as a (signed) Git tag, or
|
||||
alternatively as a (signed) tarball of the Git tag.
|
||||
|
||||
This is also the first release to be made by the new maintainers of PCRE2, and
|
||||
we would like to thank Philip Hazel, creator and maintainer of PCRE and PCRE2.
|
||||
|
||||
* (Git change) The sljit project has been split out into a separate Git
|
||||
repository. Git users must now run `git submodule init; git submodule update`
|
||||
after a Git checkout.
|
||||
|
||||
* (Behaviour change) Update Unicode support to UCD 16.
|
||||
|
||||
* (Match behaviour change) Case-insensitive matching of Unicode properties
|
||||
Ll, Lt, and Lu has been changed to match Perl. Previously, /\p{Ll}/i would
|
||||
match only lower-case characters (even though case-insensitive matching was
|
||||
specified). This also affects case-insensitive matching of POSIX classes such
|
||||
as [:lower:].
|
||||
|
||||
* (Minor match behaviour change) Case-insensitive matching of backreferences now
|
||||
respects the PCRE2_EXTRA_CASELESS_RESTRICT option.
|
||||
|
||||
* (Minor pattern syntax change) Parsing of the \x escape is stricter, and is
|
||||
no longer parsed as an escape for the NUL character if not followed by '{' or
|
||||
a hexadecimal digit. Use \x00 instead.
|
||||
|
||||
* (Major new feature) Add a new feature called scan substring. This is a new
|
||||
type of assertion which matches the content of a capturing block to a
|
||||
sub-pattern.
|
||||
|
||||
Example: to find a word that contains the rare (in English) sequence of
|
||||
letters "rh" not at the start:
|
||||
|
||||
\b(\w++)(*scan_substring:(1).+rh)
|
||||
|
||||
The first group captures a word which is then scanned by the
|
||||
(*scan_substring:(1) ... ) assertion, which tests whether the pattern ".+rh"
|
||||
matches the capture group "(1)".
|
||||
|
||||
* (Major new feature) Add support for UTS#18 compatible character classes,
|
||||
using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a
|
||||
metacharacter within character classes and the operators '&&', '--' and '~~',
|
||||
allowing subtractions and intersections of character classes to be easily
|
||||
expressed.
|
||||
|
||||
Example: to match Thai or Greek letters (but not letters or other characters
|
||||
in those scripts), use [\p{L}&&[\p{Thai}||\p{Greek}]].
|
||||
|
||||
* (Major new feature) Add support for Perl-style extended character classes,
|
||||
using the syntax (?[...]). This also allows expressing subtractions and
|
||||
intersections of character classes, but using a different syntax to UTS#18.
|
||||
|
||||
Example: to match Thai or Greek letters (but not letters or other characters
|
||||
in those scripts), use (?[\p{L} & (\p{Thai} + \p{Greek})]).
|
||||
|
||||
* (Minor feature) Significant improvements to the character class match engine.
|
||||
Compiled character classes are now more compact, and have faster matching
|
||||
for large or complex character sets, using binary search through the set.
|
||||
|
||||
* JIT compilation now fails with the new error code PCRE2_ERROR_JIT_UNSUPPORTED
|
||||
for patterns which use features not supported by the JIT compiler.
|
||||
|
||||
* (Minor feature) New options PCRE2_EXTRA_NO_BS0 (disallow \0 as an escape for
|
||||
the NUL character); PCRE2_EXTRA_PYTHON_OCTAL (use Python disambiguation rules
|
||||
for deciding whether \12 is a backreference or an octal escape);
|
||||
PCRE2_EXTRA_NEVER_CALLOUT (disable callout syntax entirely);
|
||||
PCRE2_EXTRA_TURKISH_CASING (use Turkish rules for case-insensitive matching).
|
||||
|
||||
* (Minor feature) Add new API function pcre2_set_optimize() for controlling
|
||||
which optimizations are enabled.
|
||||
|
||||
* (Minor new features) A variety of extensions have been made to
|
||||
pcre2_substitute() and its syntax for replacement strings. These now support:
|
||||
\123 octal escapes; titlecasing \u\L; \1 backreferences; \g<1> and $<NAME>
|
||||
backreferences; $& $` $' and $_; new function
|
||||
pcre2_set_substitute_case_callout() to allow locale-aware case transformation.
|
||||
|
||||
|
||||
Version 10.44 07-June-2024
|
||||
--------------------------
|
||||
@@ -13,7 +118,7 @@ increased to 128. Some auxiliary files for building under VMS are added.
|
||||
Version 10.43 16-February-2024
|
||||
------------------------------
|
||||
|
||||
There are quite a lot of changes in this release (see ChangeLog and git log for
|
||||
There are quite a lot of changes in this release (see ChangeLog and Git log for
|
||||
a list). Those that are not bugfixes or code tidies are:
|
||||
|
||||
* The JIT code no longer supports ARMv5 architecture.
|
||||
@@ -52,7 +157,7 @@ a list). Those that are not bugfixes or code tidies are:
|
||||
matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can
|
||||
be used to keep it ASCII only.
|
||||
|
||||
* Make PCRE2_UCP the default in UTF mode in pcre2grep and add -no_ucp,
|
||||
* Make PCRE2_UCP the default in UTF mode in pcre2grep and add --no-ucp,
|
||||
--case-restrict and --posix-digit.
|
||||
|
||||
* Add --group-separator and --no-group-separator to pcre2grep.
|
||||
|
||||
+54
-42
@@ -105,6 +105,7 @@ example.
|
||||
pcre2_chkdint.c
|
||||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_compile_class.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_convert.c
|
||||
@@ -138,7 +139,7 @@ example.
|
||||
Note that you must compile pcre2_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in src/config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre2_jit_compile.c #includes other files from the sljit subdirectory,
|
||||
pcre2_jit_compile.c #includes other files from the sljit dependency,
|
||||
all of whose names begin with "sljit". It also #includes
|
||||
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
|
||||
those yourself.
|
||||
@@ -301,56 +302,66 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
cache can be deleted by selecting "File > Delete Cache".
|
||||
If you are using CMake and encounter errors, deleting the CMake cache and
|
||||
restarting from a fresh build may fix the error. In the CMake GUI, the cache can
|
||||
be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can
|
||||
be deleted.
|
||||
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
|
||||
4. Run cmake-gui from the Shell environment of your build tool, for example,
|
||||
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||
4. Run CMake.
|
||||
|
||||
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
- Using the CLI, simply run `cmake ..` inside the `build/` directory. You can
|
||||
use the `ccmake` ncurses GUI to select and configure PCRE2 features.
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
- Using the CMake GUI:
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
a) Run cmake-gui from the Shell environment of your build tool, for
|
||||
example, Msys for Msys/MinGW or Visual Studio Command Prompt for
|
||||
VC/VC++.
|
||||
|
||||
8. The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
c) Press the "Configure" button.
|
||||
|
||||
10. Hit "Generate".
|
||||
d) Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
e) The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
|
||||
12. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
f) Press "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
|
||||
g) Press "Generate".
|
||||
|
||||
5. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
|
||||
Regardless of build system used, `cmake --build .` will build it.
|
||||
|
||||
6. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
Regardless of build system used, `ctest` will run the tests.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
|
||||
@@ -425,6 +436,7 @@ OpenVMS. They are in the "vms" directory in the distribution tarball. Please
|
||||
read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep
|
||||
programs contain some VMS-specific code.
|
||||
|
||||
===========================
|
||||
Last Updated: 16 April 2024
|
||||
===========================
|
||||
==============================
|
||||
Last updated: 26 December 2024
|
||||
==============================
|
||||
|
||||
|
||||
+86
-72
@@ -385,7 +385,7 @@ library. They are also documented in the pcre2build man page.
|
||||
|
||||
If this is done, when pcre2test's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
Note that libreadline is GPL-licensed, so if you distribute a binary of
|
||||
pcre2test linked in this way, there may be licensing issues. These can be
|
||||
avoided by linking with libedit (which has a BSD licence) instead.
|
||||
|
||||
@@ -411,20 +411,19 @@ library. They are also documented in the pcre2build man page.
|
||||
Instead of %td or %zu, %lu is used, with a cast for size_t values.
|
||||
|
||||
. There is a special option called --enable-fuzz-support for use by people who
|
||||
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
||||
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
||||
be built, but not installed. This contains a single function called
|
||||
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
||||
length of the string. When called, this function tries to compile the string
|
||||
as a pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the string.
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||
be created. This is normally run under valgrind or used when PCRE2 is
|
||||
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||
outputs information about what it is doing. The input strings are specified
|
||||
by arguments: if an argument starts with "=" the rest of it is a literal
|
||||
input string. Otherwise, it is assumed to be a file name, and the contents
|
||||
of the file are the test string.
|
||||
want to run fuzzing tests on PCRE2. If set, it causes an extra library
|
||||
called libpcre2-fuzzsupport.a to be built, but not installed. This contains
|
||||
a single function called LLVMFuzzerTestOneInput() whose arguments are a
|
||||
pointer to a string and the length of the string. When called, this function
|
||||
tries to compile the string as a pattern, and if that succeeds, to match
|
||||
it. This is done both with no options and with some random options bits that
|
||||
are generated from the string. Setting --enable-fuzz-support also causes an
|
||||
executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally
|
||||
run under valgrind or used when PCRE2 is compiled with address sanitizing
|
||||
enabled. It calls the fuzzing function and outputs information about what it
|
||||
is doing. The input strings are specified by arguments: if an argument
|
||||
starts with "=" the rest of it is a literal input string. Otherwise, it is
|
||||
assumed to be a file name, and the contents of the file are the test string.
|
||||
|
||||
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
|
||||
which caused pcre2_match() to use individual blocks on the heap for
|
||||
@@ -510,6 +509,7 @@ system. The following are installed (file names are all relative to the
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
SECURITY
|
||||
pcre2.txt (a concatenation of the man(3) pages)
|
||||
pcre2test.txt the pcre2test man page
|
||||
pcre2grep.txt the pcre2grep man page
|
||||
@@ -607,8 +607,9 @@ zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
should first run the maint/PrepareRelease script before making a distribution.
|
||||
This script creates the .txt and HTML forms of the documentation from the man
|
||||
pages.
|
||||
|
||||
|
||||
Testing PCRE2
|
||||
@@ -822,37 +823,38 @@ The distribution should contain the files listed below.
|
||||
ASCII coding; unless --enable-rebuild-chartables is
|
||||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_chkdint.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_convert.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_extuni.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_ucptables.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_chkdint.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_compile_class.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_convert.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_extuni.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_ucptables.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
|
||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
||||
@@ -860,13 +862,16 @@ The distribution should contain the files listed below.
|
||||
src/config.h.in template for config.h, when built by "configure"
|
||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||
src/pcre2posix.h header for the external POSIX wrapper API
|
||||
src/pcre2_compile.h header for internal use
|
||||
src/pcre2_internal.h header for internal use
|
||||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_jit_char_inc.h header used by JIT
|
||||
src/pcre2_jit_neon_inc.h header used by JIT
|
||||
src/pcre2_jit_simd_inc.h header used by JIT
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
src/pcre2_util.h header for internal utils
|
||||
|
||||
sljit/* source files for the JIT compiler
|
||||
deps/sljit/sljit_src/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
@@ -878,48 +883,49 @@ The distribution should contain the files listed below.
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE2
|
||||
AUTHORS.md information about the authors of PCRE2
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE2
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE2
|
||||
LICENCE.md conditions for the use of PCRE2
|
||||
COPYING the same, using GNU's standard name
|
||||
SECURITY.md information on reporting vulnerabilities
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcre2grep tests
|
||||
RunTest.bat a Windows batch file for running tests
|
||||
RunGrepTest.bat a Windows batch file for pcre2grep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
m4/* m4 macros (used by autoconf)
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for PCRE2
|
||||
doc/*.1 man page sources for pcre2grep and pcre2test
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre2.txt plain text version of the man pages
|
||||
doc/pcre2-config.txt plain text documentation of pcre2-config script
|
||||
doc/pcre2grep.txt plain text documentation of grep utility program
|
||||
doc/pcre2test.txt plain text documentation of test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
ar-lib )
|
||||
config.guess )
|
||||
config.sub )
|
||||
depcomp ) helper tools generated by libtool and
|
||||
compile ) automake, used internally by ./configure
|
||||
install-sh )
|
||||
ltmain.sh )
|
||||
missing )
|
||||
test-driver )
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
testdata/testinput* test data for main library tests
|
||||
@@ -927,12 +933,13 @@ The distribution should contain the files listed below.
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
testdata/* other supporting test files
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
(D) Auxiliary files for CMake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindEditline.cmake
|
||||
cmake/FindReadline.cmake
|
||||
cmake/pcre2-config-version.cmake.in
|
||||
cmake/pcre2-config.cmake.in
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
@@ -943,14 +950,21 @@ The distribution should contain the files listed below.
|
||||
src/config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
(F) Auxiliary files for building PCRE2 under OpenVMS
|
||||
(F) Auxiliary files for building PCRE2 using other build systems
|
||||
|
||||
BUILD.bazel )
|
||||
MODULE.bazel ) files used by the Bazel build system
|
||||
WORKSPACE.bazel )
|
||||
build.zig file used by zig's build system
|
||||
|
||||
(G) Auxiliary files for building PCRE2 under OpenVMS
|
||||
|
||||
vms/configure.com )
|
||||
vms/openvms_readme.txt ) These files were contributed by a PCRE2 user.
|
||||
vms/pcre2.h_patch )
|
||||
vms/stdint.h )
|
||||
|
||||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 15 April 2024
|
||||
==============================
|
||||
Last updated: 18 December 2024
|
||||
==============================
|
||||
|
||||
|
||||
+115
-25
@@ -25,8 +25,8 @@ unset cp ls mv rm
|
||||
# valgrind settings when requested.
|
||||
|
||||
builddir=`pwd`
|
||||
pcre2grep=$builddir/pcre2grep
|
||||
pcre2test=$builddir/pcre2test
|
||||
: ${pcre2grep:=$builddir/pcre2grep}
|
||||
: ${pcre2test:=$builddir/pcre2test}
|
||||
|
||||
if [ ! -x $pcre2grep ] ; then
|
||||
echo "** $pcre2grep does not exist or is not executable."
|
||||
@@ -41,22 +41,17 @@ fi
|
||||
valgrind=
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file";;
|
||||
valgrind|-valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file --error-exitcode=70";;
|
||||
*) echo "RunGrepTest: Unknown argument $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
vjs=
|
||||
pcre2grep_version=`$pcre2grep -V`
|
||||
if [ "$valgrind" = "" ] ; then
|
||||
echo "Testing $pcre2grep_version"
|
||||
else
|
||||
echo "Testing $pcre2grep_version using valgrind"
|
||||
$pcre2test -C jit >/dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
vjs="--suppressions=./testdata/valgrind-jit.supp"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set up a suitable "diff" command for comparison. Some systems have a diff
|
||||
@@ -105,6 +100,16 @@ if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set up the path to the valgrind JIT suppressions
|
||||
|
||||
vjs=
|
||||
if [ "$valgrind" != "" ] ; then
|
||||
$pcre2test -C jit >/dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
vjs="--suppressions=`realpath "$srcdir"`/testdata/valgrind-jit.supp"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for the availability of UTF-8 support
|
||||
|
||||
$pcre2test -C unicode >/dev/null
|
||||
@@ -275,7 +280,7 @@ echo "---------------------------- Test 35 -----------------------------" >>test
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude='grepinput(Bad)?8' --exclude=grepinputM --exclude=grepinputUN --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
|
||||
@@ -318,8 +323,11 @@ echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -e 'unopened)' -e abc ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep --regex=123 -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
@@ -530,25 +538,28 @@ echo "---------------------------- Test 95 -----------------------------" >>test
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MC] 'fox' ./test* | sort) >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MCU] 'fox' ./test* | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "grepinputBad8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MC] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "grepinputBad8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >testtemp2grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "grepinputBad8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
|
||||
@@ -618,7 +629,7 @@ echo "---------------------------- Test 115 -----------------------------" >>tes
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 116 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MC] -th 'the' testdata/grepinput*) >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MCU] -th 'the' testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 117 -----------------------------" >>testtrygrep
|
||||
@@ -637,6 +648,8 @@ echo "RC=$?" >>testtrygrep
|
||||
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$&:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
@@ -761,7 +774,7 @@ echo "---------------------------- Test 140 -----------------------------" >>tes
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 141 -----------------------------" >>testtrygrep
|
||||
printf "$srcdir/testdata/grepinputv\n-\n" >testtemp1grep
|
||||
printf "%s/testdata/grepinputv\n-\n" "$srcdir" >testtemp1grep
|
||||
printf 'This is a line from stdin.' >testtemp2grep
|
||||
$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" <testtemp2grep >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
@@ -842,16 +855,17 @@ echo "RC=$?" >>testtrygrep
|
||||
echo "---------------------------- Test 150 -----------------------------" >>testtrygrep
|
||||
which locale >/dev/null 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "pcre2grep: Failed to set locale badlocale (obtained from LC_CTYPE)" >>testtrygrep
|
||||
echo "pcre2grep: Failed to set locale locale.bad (obtained from LC_CTYPE)" >>testtrygrep
|
||||
echo "RC=2" >>testtrygrep
|
||||
else
|
||||
|
||||
(cd $srcdir; unset LC_ALL; env LC_CTYPE=badlocale $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1
|
||||
(cd $srcdir; unset LC_ALL; LC_CTYPE=locale.bad $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
fi
|
||||
|
||||
echo "---------------------------- Test 151 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 152 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --group-separator='++' 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
@@ -861,6 +875,42 @@ echo "---------------------------- Test 153 -----------------------------" >>tes
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --no-group-separator 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 154 -----------------------------" >>testtrygrep
|
||||
>testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 155 -----------------------------" >>testtrygrep
|
||||
echo "" >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 156 -----------------------------" >>testtrygrep
|
||||
echo "" >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file --file $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 157 -----------------------------" >>testtrygrep
|
||||
echo "spaces " >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -o --posix-pattern-file --file=$builddir/testtemp1grep ./testdata/grepinputv >$builddir/testtemp2grep && $valgrind $vjs $pcre2grep -q "s " $builddir/testtemp2grep) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 158 -----------------------------" >>testtrygrep
|
||||
echo "spaces." >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 159 -----------------------------" >>testtrygrep
|
||||
printf "spaces.\r\n" >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file -f$builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 160 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -nC3 '^(ert|jkl)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -n -B4 -A2 '^(ert|dfg)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
@@ -886,13 +936,11 @@ if [ $utf8 -ne 0 ] ; then
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U4 ------------------------------" >>testtrygrep
|
||||
printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' $builddir/testtemp1grep) >>testtrygrep 2>&1
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' ./testdata/grepinputBad8) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U5 ------------------------------" >>testtrygrep
|
||||
printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' ./testdata/grepinputBad8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
|
||||
@@ -935,31 +983,48 @@ printf 'abc\rdef\r\nghi\njkl' >testNinputgrep
|
||||
|
||||
printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep
|
||||
$valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -B1 -n -N CR "^def" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep
|
||||
pattern=`printf 'def\rjkl'`
|
||||
$valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test N4 ------------------------------\r\n' - >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
|
||||
printf 'xyz\0abc\0def' >testNinputgrep
|
||||
$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -B1 -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test N8 ------------------------------\r\n' - >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -na --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "" >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutputN testtrygrep
|
||||
@@ -972,8 +1037,13 @@ if [ $utf8 -ne 0 ] ; then
|
||||
echo "Testing pcre2grep newline settings with UTF-8 features"
|
||||
|
||||
printf '%c--------------------------- Test UN1 ------------------------------\r\n' - >testtrygrep
|
||||
printf 'abc\341\210\264def\nxyz' >testNinputgrep
|
||||
$valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" testNinputgrep >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" $srcdir/testdata/grepinputUN >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
printf '%c--------------------------- Test UN2 ------------------------------\r\n' - >testtrygrep
|
||||
$valgrind $vjs $pcre2grep -nauU --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "" >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutputUN testtrygrep
|
||||
@@ -990,12 +1060,24 @@ fi
|
||||
|
||||
if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then
|
||||
echo "Testing pcre2grep script callouts"
|
||||
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
|
||||
echo "--- Test 1 ---" >testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "--- Test 2 ---" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "--- Test 3 ---" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "--- Test 4 ---" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(?C"/bin/echo|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "--- Test 5 ---" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "--- Test 6 ---" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
|
||||
nonfork=1
|
||||
@@ -1010,8 +1092,12 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
|
||||
|
||||
if [ $utf8 -ne 0 ] ; then
|
||||
echo "Testing pcre2grep script callout with UTF-8 features"
|
||||
$valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >testtrygrep
|
||||
echo "--- Test 1 ---" >testtrygrep
|
||||
$valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "--- Test 2 ---" >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep -u '(T)(?C"/bin/echo|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
if [ $nonfork = 1 ] ; then
|
||||
$cf $srcdir/testdata/grepoutputCNU testtrygrep
|
||||
@@ -1019,7 +1105,11 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
|
||||
$cf $srcdir/testdata/grepoutputCU testtrygrep
|
||||
fi
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else
|
||||
echo "Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library"
|
||||
fi
|
||||
|
||||
unset nonfork
|
||||
else
|
||||
echo "Script callouts are not supported"
|
||||
fi
|
||||
|
||||
+443
-33
@@ -19,8 +19,9 @@ set GREP_COLOR=
|
||||
:: Remember the current (build) directory and set the program to be tested.
|
||||
|
||||
set builddir="%CD%"
|
||||
set pcre2grep=%builddir%\pcre2grep.exe
|
||||
set pcre2test=%builddir%\pcre2test.exe
|
||||
|
||||
if [%pcre2grep%]==[] set pcre2grep=%builddir%\pcre2grep.exe
|
||||
if [%pcre2test%]==[] set pcre2test=%builddir%\pcre2test.exe
|
||||
|
||||
if NOT exist %pcre2grep% (
|
||||
echo ** %pcre2grep% does not exist.
|
||||
@@ -81,11 +82,16 @@ if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" (
|
||||
)
|
||||
|
||||
:: Create a simple printf via cscript/JScript (an actual printf may translate
|
||||
:: LF to CRLF, which this one does not).
|
||||
:: LF to CRLF, which this one does not). We only support the barebones we need:
|
||||
:: \r, \n, \0, and %s (but only once).
|
||||
|
||||
echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n")) >printf.js
|
||||
echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n").replace(/\\0/g, "\x00").replace(/%%s/g, function() { return WScript.Arguments(1) })) >printf.js
|
||||
set printf=cscript //nologo printf.js
|
||||
|
||||
:: Create a simple 'tr' via cscript/JScript.
|
||||
echo WScript.StdOut.Write(WScript.StdIn.ReadAll().replace(/\x00/g, "@")) >trnull.js
|
||||
set trnull=cscript //nologo trnull.js
|
||||
|
||||
:: ------ Normal tests ------
|
||||
|
||||
echo Testing pcre2grep main features
|
||||
@@ -232,7 +238,7 @@ echo ---------------------------- Test 35 ----------------------------->>testtry
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 36 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude "grepinput$" --exclude=grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include="grepinput[^C]" --exclude "grepinput$" --exclude="grepinput(Bad)?8" --exclude=grepinputM --exclude=grepinputUN --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 37 ----------------------------->>testtrygrep
|
||||
@@ -274,8 +280,14 @@ echo ---------------------------- Test 45 ------------------------------>>testtr
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 46 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -e "unopened)" -e abc ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --regex=123 -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 47 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -Fx AB.VE^
|
||||
@@ -320,11 +332,11 @@ echo ---------------------------- Test 55 ----------------------------->>testtry
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 56 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -c lazy ./testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -c --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 57 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -c -l lazy ./testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -c -l --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 58 ----------------------------->>testtrygrep
|
||||
@@ -378,6 +390,12 @@ echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
echo ---------------------------- Test 70 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 71 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
|
||||
@@ -481,25 +499,28 @@ echo ---------------------------- Test 95 ----------------------------->>testtry
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 96 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" "fox" ./test* | sort & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" --exclude=grepinput[MCU] "fox" ./test* | sort & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 97 ----------------------------->>testtrygrep
|
||||
echo grepinput$>testtemp1grep
|
||||
echo grepinput8>>testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
echo grepinputBad8>>testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 98 ----------------------------->>testtrygrep
|
||||
echo grepinput$>testtemp1grep
|
||||
echo grepinput8>>testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
echo grepinputBad8>>testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 99 ----------------------------->>testtrygrep
|
||||
echo grepinput$>testtemp1grep
|
||||
echo grepinput8>testtemp2grep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
echo grepinputBad8>>testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 100 ------------------------------>>testtrygrep
|
||||
@@ -533,7 +554,7 @@ echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
echo ---------------------------- Test 107 ----------------------------->>testtrygrep
|
||||
echo a>testtemp1grep
|
||||
echo aaaaa>>testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% --line-offsets "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1
|
||||
(pushd %srcdir% & %pcre2grep% --line-offsets --allow-lookaround-bsk "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 108 ------------------------------>>testtrygrep
|
||||
@@ -541,7 +562,7 @@ echo ---------------------------- Test 108 ------------------------------>>testt
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 109 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -cq lazy ./testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -cq --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 110 ----------------------------->>testtrygrep
|
||||
@@ -557,27 +578,27 @@ echo ---------------------------- Test 112 ----------------------------->>testtr
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 113 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --total-count "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --total-count --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 114 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tc "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 115 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tlc "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tlc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 116 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -th "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --exclude=grepinput[MCU] -th "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 117 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tch "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tch --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 118 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tL "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -tL --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 119 ----------------------------->>testtrygrep
|
||||
@@ -588,6 +609,266 @@ echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
echo ---------------------------- Test 120 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -HO "$&:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -m 1 -O "$0:$a$b$e$f$r$t$v" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -HO "${X}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -HO "XX$" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -O "$x{12345678}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -O "$x{123Z" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --output "$x{1234}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 121 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -F "\E and (regex)" testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 122 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -w "cat|dog" testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 123 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -w "dog|cat" testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 124 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -Mn --colour=always "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -Mn --colour=always -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -Mn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -Mn -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 125 ----------------------------->>testtrygrep
|
||||
%printf% "abcd\n" >testNinputgrep
|
||||
%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K.)" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --colour=always --allow-lookaround-bsk "(?=.\K)" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K[ac])" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
set GREP_COLORS=ms=1;20
|
||||
%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep
|
||||
set GREP_COLORS=
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 126 ----------------------------->>testtrygrep
|
||||
%printf% "Next line pattern has binary zero\nABC\0XYZ\n" >testtemp1grep
|
||||
%printf% "ABC\0XYZ\nABCDEF\nDEFABC\n" >testtemp2grep
|
||||
%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%printf% "Next line pattern is erroneous.\n^abc)(xy" >testtemp1grep
|
||||
%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 127 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -o --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 128 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -m1M -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 129 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -m 2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 130 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -o -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 131 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -oc -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 132 ----------------------------->>testtrygrep
|
||||
:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel
|
||||
:: level, are not even numbered). Use a subshell instead.
|
||||
(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& head -1) <testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 133 ----------------------------->>testtrygrep
|
||||
:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel
|
||||
:: level, are not even numbered). Use a subshell instead.
|
||||
(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& %pcre2grep% -m1 -A3 "^match") <testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 134 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --max-count=1 -nH -O "=$x{41}$x423$o{103}$o1045=" "fox" - & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 135 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -lZ "word" ./testdata/grepinputv ./testdata/grepinputv & popd) | %trnull% >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -A 1 -B 1 -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -MHZn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 136 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -m1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --max-count=1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 137 ----------------------------->>testtrygrep
|
||||
%printf% "Last line\nhas no newline" >testtemp1grep
|
||||
%pcre2grep% -A1 Last testtemp1grep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 138 ----------------------------->>testtrygrep
|
||||
%printf% "AbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\n" >testtemp1grep
|
||||
%pcre2grep% --no-jit --heap-limit=0 b testtemp1grep >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 139 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --line-buffered "fox" testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 140 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --buffer-size=10 -A1 "brown" testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 141 ----------------------------->>testtrygrep
|
||||
%printf% "%%s\testdata\grepinputv\n-\n" "%srcdir%" >testtemp1grep
|
||||
%printf% "This is a line from stdin." >testtemp2grep
|
||||
%pcre2grep% --file-list testtemp1grep "line from stdin" <testtemp2grep >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 142 ----------------------------->>testtrygrep
|
||||
%printf% "/does/not/exist\n" >testtemp1grep
|
||||
%printf% "This is a line from stdin." >testtemp2grep
|
||||
%pcre2grep% --file-list testtemp1grep "line from stdin" >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 143 ----------------------------->>testtrygrep
|
||||
%printf% "fox|cat" >testtemp1grep
|
||||
%pcre2grep% -f - %srcdir%\testdata\grepinputv <testtemp1grep >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 144 ----------------------------->>testtrygrep
|
||||
%pcre2grep% -f /non/exist %srcdir%\testdata\grepinputv >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 145 ----------------------------->>testtrygrep
|
||||
%printf% "*meta*\rdog." >testtemp1grep
|
||||
%pcre2grep% -Ncr -F -f testtemp1grep %srcdir%\testdata\grepinputv >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 146 ----------------------------->>testtrygrep
|
||||
%printf% "A123B" >testtemp1grep
|
||||
%pcre2grep% -H -e "123|fox" - <testtemp1grep >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -h -e "123|fox" - %srcdir%\testdata\grepinputv <testtemp1grep >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% - %srcdir%\testdata\grepinputv <testtemp1grep >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 147 ----------------------------->>testtrygrep
|
||||
%pcre2grep% -e "123|fox" -- -nonfile >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 148 ----------------------------->>testtrygrep
|
||||
%pcre2grep% --nonexist >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -n-n-bad >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --context >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --only-matching --output=xx >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --colour=badvalue >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --newline=badvalue >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -d badvalue >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -D badvalue >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --buffer-size=0 >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --exclude "(badpat" abc /dev/null >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --exclude-from /non/exist abc /dev/null >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --include-from /non/exist abc /dev/null >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% --file-list=/non/exist abc /dev/null >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 149 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --binary-files=binary "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --binary-files=wrong "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 150 ----------------------------->>testtrygrep
|
||||
:: The Unix version of this tests checks for whether locales are supported. On Windows,
|
||||
:: we assume they always are.
|
||||
set LC_ALL=
|
||||
set LC_CTYPE=locale.bad
|
||||
(pushd %srcdir% & %pcre2grep% abc /dev/null & popd) >>testtrygrep 2>&1
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
set LC_CTYPE=
|
||||
|
||||
echo ---------------------------- Test 151 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --colour=always -e this -e The -e "The wo" testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 152 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -nA3 --group-separator="++" "four" ./testdata/grepinputx & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 153 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -nA3 --no-group-separator "four" ./testdata/grepinputx & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 154 ----------------------------->>testtrygrep
|
||||
echo. >nul 2>testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 155 ----------------------------->>testtrygrep
|
||||
echo. >testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 156 ----------------------------->>testtrygrep
|
||||
%printf% "\n" >testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% --posix-pattern-file --file %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 157 ----------------------------->>testtrygrep
|
||||
%printf% "spaces \n" >testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -o --posix-pattern-file --file=%builddir%\testtemp1grep ./testdata/grepinputv >%builddir%\testtemp2grep && %pcre2grep% -q "s " %builddir%\testtemp2grep & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 158 ----------------------------->>testtrygrep
|
||||
%printf% "spaces.\n" >testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 159 ----------------------------->>testtrygrep
|
||||
%printf% "spaces.\r\n" >testtemp1grep
|
||||
(pushd %srcdir% & %pcre2grep% --posix-pattern-file -f%builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 160 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -nC3 "^(ert|jkl)" ./testdata/grepinput & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -n -B4 -A2 "^(ert|dfg)" ./testdata/grepinput & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
:: Now compare the results.
|
||||
|
||||
@@ -602,15 +883,43 @@ if %utf8% neq 0 (
|
||||
|
||||
echo ---------------------------- Test U1 ------------------------------>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U2 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U3 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any --allow-lookaround-bsk "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U4 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -u -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep 2>&1
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U5 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -U -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U6 ----------------------------->>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -u -m1 -O "=$x{1d3}$o{744}=" "fox" & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U7 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -ui --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U8 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -UiEP --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U9 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -u --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test U10 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -u --posix-digit --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
%cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout%
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
@@ -631,58 +940,159 @@ echo Testing pcre2grep newline settings
|
||||
|
||||
echo ---------------------------- Test N1 ------------------------------>testtrygrep
|
||||
%pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -B1 -n -N CR "^def" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test N2 ------------------------------>>testtrygrep
|
||||
%pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test N3 ------------------------------>>testtrygrep
|
||||
for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a
|
||||
%pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test N4 ------------------------------>>testtrygrep
|
||||
%pcre2grep% -n --newline=crlf -F -f %srcdir%/testdata/greppatN4 testNinputgrep >>testtrygrep
|
||||
%pcre2grep% -n --newline=crlf -F -f %srcdir%\testdata\greppatN4 testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test N5 ------------------------------>>testtrygrep
|
||||
%pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test N6 ------------------------------>>testtrygrep
|
||||
%pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test N7 ------------------------------>>testtrygrep
|
||||
%printf% "xyz\0abc\0def" >testNinputgrep
|
||||
%pcre2grep% -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
%pcre2grep% -B1 -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test N8 ------------------------------>>testtrygrep
|
||||
%pcre2grep% -na --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
%printf% "\n" >>testtrygrep
|
||||
|
||||
%cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout%
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
:: If pcre2grep supports script callouts, run some tests on them.
|
||||
|
||||
:: These newline tests need UTF support.
|
||||
|
||||
if %utf8% neq 0 (
|
||||
echo Testing pcre2grep newline settings with UTF-8 features
|
||||
|
||||
echo ---------------------------- Test UN1 ------------------------------>testtrygrep
|
||||
%pcre2grep% -nau --newline=anycrlf "^(abc|def)" %srcdir%\testdata\grepinputUN >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test UN2 ------------------------------>testtrygrep
|
||||
%pcre2grep% -nauU --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
%printf% "\n" >>testtrygrep
|
||||
|
||||
%cf% %srcdir%\testdata\grepoutputUN testtrygrep %cfout%
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
) else (
|
||||
echo Skipping pcre2grep newline UTF-8 tests: no UTF-8 support in PCRE2 library
|
||||
)
|
||||
|
||||
|
||||
:: If pcre2grep supports script callouts, run some tests on them. It is possible
|
||||
:: to restrict these callouts to the non-fork case, either for security, or for
|
||||
:: environments that do not support fork(). This is handled by comparing to a
|
||||
:: different output.
|
||||
|
||||
%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported"
|
||||
if %ERRORLEVEL% equ 0 (
|
||||
echo Testing pcre2grep script callouts
|
||||
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
|
||||
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||
%pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||
%pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||
%pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported"
|
||||
if %ERRORLEVEL% equ 0 (
|
||||
|
||||
echo --- Test 1 --->testtrygrep
|
||||
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
echo --- Test 2 --->>testtrygrep
|
||||
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
echo --- Test 3 --->>testtrygrep
|
||||
%pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
echo --- Test 4 --->>testtrygrep
|
||||
%pcre2grep% "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
echo --- Test 5 --->>testtrygrep
|
||||
%pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
echo --- Test 6 --->>testtrygrep
|
||||
%pcre2grep% -m1 "(T)(?C'|$0:$1:$x{41}$o{101}$n')" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
%pcre2grep% --help | %pcre2grep% -q "Non-fork callout scripts in patterns are supported"
|
||||
if ^!ERRORLEVEL! equ 0 (
|
||||
set nonfork=1
|
||||
%cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout%
|
||||
) else (
|
||||
set nonfork=0
|
||||
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
|
||||
)
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
@REM These callout tests need UTF support.
|
||||
|
||||
if %utf8% neq 0 (
|
||||
echo Testing pcre2grep script callout with UTF-8 features
|
||||
|
||||
echo --- Test 1 --->testtrygrep
|
||||
%pcre2grep% -u "(T)(?C'|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
echo --- Test 2 --->>testtrygrep
|
||||
%pcre2grep% -u "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep
|
||||
echo RC=^!ERRORLEVEL!>>testtrygrep
|
||||
|
||||
if ^!nonfork! equ 1 (
|
||||
%cf% %srcdir%\testdata\grepoutputCNU testtrygrep %cfout%
|
||||
) else (
|
||||
%cf% %srcdir%\testdata\grepoutputCU testtrygrep %cfout%
|
||||
)
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
) else (
|
||||
echo Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library
|
||||
)
|
||||
|
||||
) else (
|
||||
echo Script callouts are not supported
|
||||
)
|
||||
|
||||
|
||||
:: Finally, some tests to exercise code that is not tested above, just to be
|
||||
:: sure that it runs OK. Doing this improves the coverage statistics. The output
|
||||
:: is not checked.
|
||||
|
||||
echo Testing miscellaneous pcre2grep arguments (unchecked)
|
||||
%printf% "" >testtrygrep
|
||||
echo. >nul 2>testtrygrep
|
||||
call :checkspecial "-xxxxx" 2 || exit /b 1
|
||||
call :checkspecial "--help" 0 || exit /b 1
|
||||
call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1
|
||||
call :checkspecial "--line-buffered --color abc nul" 1 || exit /b 1
|
||||
call :checkspecial "-dskip abc ." 1 || exit /b 1
|
||||
call :checkspecial "-Dread -Dskip abc nul" 1 || exit /b 1
|
||||
|
||||
|
||||
:: Clean up local working files
|
||||
del testcf printf.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
|
||||
del testcf printf.js trnull.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
|
||||
|
||||
exit /b 0
|
||||
|
||||
|
||||
+96
-65
@@ -88,8 +88,9 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)"
|
||||
title23="Test 23: \C disabled test"
|
||||
title24="Test 24: Non-UTF pattern conversion tests"
|
||||
title25="Test 25: UTF pattern conversion tests"
|
||||
title26="Test 26: Auto-generated unicode property tests"
|
||||
maxtest=26
|
||||
title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)"
|
||||
title27="Test 27: Auto-generated unicode property tests"
|
||||
maxtest=27
|
||||
titleheap="Test 'heap': Environment-specific heap tests"
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
@@ -120,6 +121,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title24
|
||||
echo $title25
|
||||
echo $title26
|
||||
echo $title27
|
||||
echo ""
|
||||
echo $titleheap
|
||||
echo ""
|
||||
@@ -183,7 +185,7 @@ checkresult()
|
||||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind $vjs ./pcre2test $1 >>testtry
|
||||
$sim $valgrind $vjs $pcre2test $1 >>testtry
|
||||
if [ $? -ne 0 ] ; then
|
||||
echo "** pcre2test $1 failed - check testtry"
|
||||
exit 1
|
||||
@@ -191,24 +193,7 @@ checkspecial()
|
||||
}
|
||||
|
||||
|
||||
# ------ Special EBCDIC Test -------
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
|
||||
$valgrind ./pcre2test -C ebcdic >/dev/null
|
||||
ebcdic=$?
|
||||
if [ $ebcdic -ne 1 ] ; then
|
||||
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
|
||||
exit 1
|
||||
fi
|
||||
for opt in "" "-dfa"; do
|
||||
./pcre2test -q $opt $testdata/testinputEBC >testtry
|
||||
checkresult $? EBC "$opt"
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
# ------ Normal Tests ------
|
||||
# ------ Test setup ------
|
||||
|
||||
# Default values
|
||||
|
||||
@@ -221,10 +206,16 @@ sim=
|
||||
skip=
|
||||
valgrind=
|
||||
vjs=
|
||||
: ${pcre2test:=./pcre2test}
|
||||
|
||||
# This is in case the caller has set aliases (as I do - PH)
|
||||
unset cp ls mv rm
|
||||
|
||||
if [ ! -x $pcre2test ] ; then
|
||||
echo "** $pcre2test does not exist or is not executable."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Process options and select which tests to run; for those that are explicitly
|
||||
# requested, check that the necessary optional facilities are available.
|
||||
|
||||
@@ -255,7 +246,9 @@ do23=no
|
||||
do24=no
|
||||
do25=no
|
||||
do26=no
|
||||
do27=no
|
||||
doheap=no
|
||||
doebcdic=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
@@ -286,14 +279,16 @@ while [ $# -gt 0 ] ; do
|
||||
24) do24=yes;;
|
||||
25) do25=yes;;
|
||||
26) do26=yes;;
|
||||
heap) doheap=yes;;
|
||||
-8) arg8=yes;;
|
||||
27) do27=yes;;
|
||||
heap) doheap=yes;;
|
||||
ebcdic) doebcdic=yes;;
|
||||
-8) arg8=yes;;
|
||||
-16) arg16=yes;;
|
||||
-32) arg32=yes;;
|
||||
bigstack|-bigstack) bigstack=yes;;
|
||||
nojit|-nojit) nojit=yes;;
|
||||
sim|-sim) shift; sim=$1;;
|
||||
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file";;
|
||||
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --error-exitcode=70";;
|
||||
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";;
|
||||
~*)
|
||||
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
|
||||
@@ -325,7 +320,7 @@ done
|
||||
|
||||
# Find which optional facilities are available.
|
||||
|
||||
$sim ./pcre2test -C linksize >/dev/null
|
||||
$sim $pcre2test -C linksize >/dev/null
|
||||
link_size=$?
|
||||
if [ $link_size -lt 2 ] ; then
|
||||
echo "RunTest: Failed to find internal link size"
|
||||
@@ -339,10 +334,10 @@ fi
|
||||
# If it is possible to set the system stack size and -bigstack was given,
|
||||
# set up a large stack.
|
||||
|
||||
$sim ./pcre2test -S 64 /dev/null /dev/null
|
||||
$sim $pcre2test -S 32 /dev/null /dev/null
|
||||
support_setstack=$?
|
||||
if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then
|
||||
setstack="-S 64"
|
||||
setstack="-S 32"
|
||||
else
|
||||
setstack=""
|
||||
fi
|
||||
@@ -350,16 +345,16 @@ fi
|
||||
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
|
||||
# one need be.
|
||||
|
||||
$sim ./pcre2test -C pcre2-8 >/dev/null
|
||||
$sim $pcre2test -C pcre2-8 >/dev/null
|
||||
support8=$?
|
||||
$sim ./pcre2test -C pcre2-16 >/dev/null
|
||||
$sim $pcre2test -C pcre2-16 >/dev/null
|
||||
support16=$?
|
||||
$sim ./pcre2test -C pcre2-32 >/dev/null
|
||||
$sim $pcre2test -C pcre2-32 >/dev/null
|
||||
support32=$?
|
||||
|
||||
# \C may be disabled
|
||||
|
||||
$sim ./pcre2test -C backslash-C >/dev/null
|
||||
$sim $pcre2test -C backslash-C >/dev/null
|
||||
supportBSC=$?
|
||||
|
||||
# Initialize all bitsizes skipped
|
||||
@@ -411,7 +406,7 @@ fi
|
||||
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
|
||||
# UTF-32 support.
|
||||
|
||||
$sim ./pcre2test -C unicode >/dev/null
|
||||
$sim $pcre2test -C unicode >/dev/null
|
||||
utf=$?
|
||||
|
||||
# When JIT is used with valgrind, we need to set up valgrind suppressions as
|
||||
@@ -419,7 +414,7 @@ utf=$?
|
||||
# the hardware supports SSE2.
|
||||
|
||||
jitopt=
|
||||
$sim ./pcre2test -C jit >/dev/null
|
||||
$sim $pcre2test -C jit >/dev/null
|
||||
jit=$?
|
||||
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||
jitopt=-jit
|
||||
@@ -437,7 +432,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
||||
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
|
||||
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
|
||||
$do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \
|
||||
$do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \
|
||||
$doheap = no -a $doebcdic = no \
|
||||
]; then
|
||||
do0=yes
|
||||
do1=yes
|
||||
@@ -466,6 +462,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||
do24=yes
|
||||
do25=yes
|
||||
do26=yes
|
||||
do27=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
@@ -477,9 +474,12 @@ for i in $skip; do eval do$i=no; done
|
||||
|
||||
echo ""
|
||||
echo PCRE2 C library tests using test data from $testdata
|
||||
$sim ./pcre2test /dev/null
|
||||
$sim $pcre2test /dev/null
|
||||
echo ""
|
||||
|
||||
|
||||
# ------ Normal Tests ------
|
||||
|
||||
for bmode in "$test8" "$test16" "$test32"; do
|
||||
case "$bmode" in
|
||||
skip) continue;;
|
||||
@@ -512,7 +512,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $do1 = yes ] ; then
|
||||
echo $title1
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry
|
||||
checkresult $? 1 "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -524,10 +524,10 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo $title2 "(excluding UTF-$bits)"
|
||||
cp $testdata/testbtables .
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
||||
saverc=$?
|
||||
if [ $saverc = 0 ] ; then
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,300 >>testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $bmode $opt -error -80,-62,-2,-1,0,100,101,191,300 >>testtry
|
||||
checkresult $? 2 "$opt"
|
||||
else
|
||||
checkresult $saverc 2 "$opt"
|
||||
@@ -553,7 +553,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
locale -a | grep "^$loc\$" >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
echo "/a/locale=$loc" | \
|
||||
$sim $valgrind ./pcre2test -q $bmode | \
|
||||
$sim $valgrind $pcre2test -q $bmode | \
|
||||
grep "Failed to set locale" >/dev/null
|
||||
if [ $? -ne 0 ] ; then
|
||||
locale=$loc
|
||||
@@ -580,7 +580,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ "$locale" != "" ] ; then
|
||||
echo $title3 "(using '$locale' locale)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $infile testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $infile testtry
|
||||
if [ $? = 0 ] ; then
|
||||
case "$opt" in
|
||||
-jit) with=" with JIT";;
|
||||
@@ -617,7 +617,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry
|
||||
checkresult $? 4 "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -629,7 +629,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry
|
||||
checkresult $? 5 "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -639,7 +639,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
|
||||
if [ $do6 = yes ] ; then
|
||||
echo $title6
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput6 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput6 testtry
|
||||
checkresult $? 6 ""
|
||||
fi
|
||||
|
||||
@@ -648,7 +648,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry
|
||||
checkresult $? 7 ""
|
||||
fi
|
||||
fi
|
||||
@@ -666,7 +666,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput8 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput8 testtry
|
||||
checkresult $? 8-$bits-$link_size ""
|
||||
fi
|
||||
fi
|
||||
@@ -679,7 +679,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry
|
||||
checkresult $? 9 "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -695,7 +695,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry
|
||||
checkresult $? 10 "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -709,7 +709,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry
|
||||
checkresult $? 11-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -726,7 +726,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry
|
||||
checkresult $? 12-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -739,7 +739,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput13 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput13 testtry
|
||||
checkresult $? 13 ""
|
||||
fi
|
||||
fi
|
||||
@@ -751,7 +751,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry
|
||||
checkresult $? 14-$bits ""
|
||||
fi
|
||||
fi
|
||||
@@ -760,7 +760,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
|
||||
if [ $do15 = yes ] ; then
|
||||
echo $title15
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput15 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput15 testtry
|
||||
checkresult $? 15 ""
|
||||
fi
|
||||
|
||||
@@ -771,7 +771,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $jit -ne 0 ] ; then
|
||||
echo " Skipped because JIT is available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput16 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput16 testtry
|
||||
checkresult $? 16 ""
|
||||
fi
|
||||
fi
|
||||
@@ -783,7 +783,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||
echo " Skipped because JIT is not available or nojit was specified"
|
||||
else
|
||||
$sim $valgrind $vjs ./pcre2test -q $setstack $bmode $testdata/testinput17 testtry
|
||||
$sim $valgrind $vjs $pcre2test -q $setstack $bmode $testdata/testinput17 testtry
|
||||
checkresult $? 17 ""
|
||||
fi
|
||||
fi
|
||||
@@ -795,7 +795,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput18 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput18 testtry
|
||||
checkresult $? 18 ""
|
||||
fi
|
||||
fi
|
||||
@@ -809,7 +809,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput19 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput19 testtry
|
||||
checkresult $? 19 ""
|
||||
fi
|
||||
fi
|
||||
@@ -818,7 +818,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
|
||||
if [ $do20 = yes ] ; then
|
||||
echo $title20
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput20 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput20 testtry
|
||||
checkresult $? 20 ""
|
||||
fi
|
||||
|
||||
@@ -830,7 +830,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped because \C is disabled"
|
||||
else
|
||||
for opt in "" $jitopt -dfa; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry
|
||||
checkresult $? 21 "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -846,7 +846,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry
|
||||
checkresult $? 22-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
@@ -859,7 +859,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $supportBSC -ne 0 ] ; then
|
||||
echo " Skipped because \C is not disabled"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput23 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput23 testtry
|
||||
checkresult $? 23 ""
|
||||
fi
|
||||
fi
|
||||
@@ -868,7 +868,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
|
||||
if [ "$do24" = yes ] ; then
|
||||
echo $title24
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput24 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput24 testtry
|
||||
checkresult $? 24 ""
|
||||
fi
|
||||
|
||||
@@ -879,12 +879,12 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput25 testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput25 testtry
|
||||
checkresult $? 25 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Auto-generated unicode property tests
|
||||
# Unicode property tests
|
||||
|
||||
if [ $do26 = yes ] ; then
|
||||
echo $title26
|
||||
@@ -892,24 +892,55 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
|
||||
checkresult $? 26 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Auto-generated Unicode property tests
|
||||
|
||||
if [ $do27 = yes ] ; then
|
||||
echo $title27
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput27 testtry
|
||||
checkresult $? 27 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Manually selected heap tests - output may vary in different environments,
|
||||
# which is why that are not automatically run.
|
||||
|
||||
if [ $doheap = yes ] ; then
|
||||
echo $titleheap
|
||||
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry
|
||||
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinputheap testtry
|
||||
checkresult $? heap-$bits ""
|
||||
fi
|
||||
|
||||
# End of loop for 8/16/32-bit tests
|
||||
done
|
||||
|
||||
|
||||
# ------ Special EBCDIC Test -------
|
||||
|
||||
if [ $doebcdic = yes ] ; then
|
||||
$sim $valgrind $pcre2test -C ebcdic >/dev/null
|
||||
ebcdic=$?
|
||||
if [ $ebcdic -ne 1 ] ; then
|
||||
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
|
||||
exit 1
|
||||
fi
|
||||
for opt in "" "-dfa"; do
|
||||
$sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry
|
||||
checkresult $? EBC "$opt"
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Clean up local working files
|
||||
rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
|
||||
|
||||
|
||||
+68
-32
@@ -13,7 +13,7 @@
|
||||
@rem line. Added argument validation and added error reporting.
|
||||
@rem
|
||||
@rem Sheri Pierce added logic to skip feature dependent tests
|
||||
@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support
|
||||
@rem tests 4 5 7 10 12 14 19 22 25 and 26 require Unicode support
|
||||
@rem 8 requires Unicode and link size 2
|
||||
@rem 16 requires absence of jit support
|
||||
@rem 17 requires presence of jit support
|
||||
@@ -27,8 +27,8 @@
|
||||
@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
|
||||
@rem PH added missing "set type" for test 22, April 2016.
|
||||
@rem PH added copy command for new testbtables file, November 2020
|
||||
@rem PH caused it to show comparison output when comparison faile, July 2023
|
||||
@rem PH updated unknown error number in test
|
||||
@rem PH caused it to show comparison output when comparison failed, July 2023
|
||||
@rem PH updated unknown error number in test
|
||||
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
@@ -39,7 +39,7 @@ if exist ..\testdata\ set srcdir=..)
|
||||
if [%srcdir%]==[] (
|
||||
if exist ..\..\testdata\ set srcdir=..\..)
|
||||
if NOT exist %srcdir%\testdata\ (
|
||||
Error: echo distribution testdata folder not found!
|
||||
echo Error: distribution testdata folder not found!
|
||||
call :conferror
|
||||
exit /b 1
|
||||
goto :eof
|
||||
@@ -82,7 +82,7 @@ if not exist testout16 md testout16
|
||||
if not exist testoutjit16 md testoutjit16
|
||||
)
|
||||
|
||||
if %support16% EQU 1 (
|
||||
if %support32% EQU 1 (
|
||||
if not exist testout32 md testout32
|
||||
if not exist testoutjit32 md testoutjit32
|
||||
)
|
||||
@@ -110,20 +110,24 @@ set do20=no
|
||||
set do21=no
|
||||
set do22=no
|
||||
set do23=no
|
||||
set do24=no
|
||||
set do25=no
|
||||
set do26=no
|
||||
set do27=no
|
||||
set all=yes
|
||||
|
||||
for %%a in (%*) do (
|
||||
set valid=no
|
||||
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes
|
||||
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27) do if %%v == %%a set valid=yes
|
||||
if "!valid!" == "yes" (
|
||||
set do%%a=yes
|
||||
set all=no
|
||||
) else (
|
||||
) else (
|
||||
echo Invalid test number - %%a!
|
||||
echo Usage %0 [ test_number ] ...
|
||||
echo Where test_number is one or more optional test numbers 1 through 23, default is all tests.
|
||||
exit /b 1
|
||||
)
|
||||
echo Usage %0 [ test_number ] ...
|
||||
echo Where test_number is one or more optional test numbers 1 through 27, default is all tests.
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
set failed="no"
|
||||
|
||||
@@ -137,9 +141,9 @@ if "%all%" == "yes" (
|
||||
set do7=yes
|
||||
set do8=yes
|
||||
set do9=yes
|
||||
set do10=no
|
||||
set do10=yes
|
||||
set do11=yes
|
||||
set do12=no
|
||||
set do12=yes
|
||||
set do13=yes
|
||||
set do14=yes
|
||||
set do15=yes
|
||||
@@ -151,6 +155,10 @@ if "%all%" == "yes" (
|
||||
set do21=yes
|
||||
set do22=yes
|
||||
set do23=yes
|
||||
set do24=yes
|
||||
set do25=yes
|
||||
set do26=yes
|
||||
set do27=yes
|
||||
)
|
||||
|
||||
@echo RunTest.bat's pcre2test output is written to newly created subfolders
|
||||
@@ -202,6 +210,10 @@ if "%do20%" == "yes" call :do20
|
||||
if "%do21%" == "yes" call :do21
|
||||
if "%do22%" == "yes" call :do22
|
||||
if "%do23%" == "yes" call :do23
|
||||
if "%do24%" == "yes" call :do24
|
||||
if "%do25%" == "yes" call :do25
|
||||
if "%do26%" == "yes" call :do26
|
||||
if "%do27%" == "yes" call :do27
|
||||
:modeSkip
|
||||
if "%mode%" == "" (
|
||||
set mode=-16
|
||||
@@ -247,7 +259,15 @@ if [%3] == [] (
|
||||
)
|
||||
|
||||
if %1 == 8 (
|
||||
set outnum=8-%bits%-%link_size%
|
||||
set outnum=%1-%bits%-%link_size%
|
||||
) else if %1 == 11 (
|
||||
set outnum=%1-%bits%
|
||||
) else if %1 == 12 (
|
||||
set outnum=%1-%bits%
|
||||
) else if %1 == 14 (
|
||||
set outnum=%1-%bits%
|
||||
) else if %1 == 22 (
|
||||
set outnum=%1-%bits%
|
||||
) else (
|
||||
set outnum=%1
|
||||
)
|
||||
@@ -266,24 +286,10 @@ if errorlevel 1 (
|
||||
set failed="yes"
|
||||
goto :eof
|
||||
) else if [%1]==[2] (
|
||||
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput%
|
||||
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -80,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput%
|
||||
)
|
||||
|
||||
set type=
|
||||
if [%1]==[11] (
|
||||
set type=-%bits%
|
||||
)
|
||||
if [%1]==[12] (
|
||||
set type=-%bits%
|
||||
)
|
||||
if [%1]==[14] (
|
||||
set type=-%bits%
|
||||
)
|
||||
if [%1]==[22] (
|
||||
set type=-%bits%
|
||||
)
|
||||
|
||||
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
|
||||
fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% >NUL
|
||||
|
||||
if errorlevel 1 (
|
||||
echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
|
||||
@@ -294,7 +300,7 @@ if errorlevel 1 (
|
||||
echo.
|
||||
goto :eof
|
||||
)
|
||||
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput%
|
||||
fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
|
||||
|
||||
set failed="yes"
|
||||
goto :eof
|
||||
@@ -309,7 +315,7 @@ if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do2
|
||||
copy /y %srcdir%\testdata\testbtables testbtables
|
||||
copy /y %srcdir%\testdata\testbtables testbtables
|
||||
call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
|
||||
if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
@@ -504,6 +510,36 @@ if %supportBSC% EQU 1 (
|
||||
call :runsub 23 testout "Backslash-C disabled test" -q
|
||||
goto :eof
|
||||
|
||||
:do24
|
||||
call :runsub 24 testout "Non-UTF pattern conversion tests" -q
|
||||
goto :eof
|
||||
|
||||
:do25
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 25 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 25 testout "UTF pattern conversion tests" -q
|
||||
goto :eof
|
||||
|
||||
:do26
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 26 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 26 testout "Unicode property tests (Compatible with Perl >= 5.38)" -q
|
||||
if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do27
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 27 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 27 testout "Auto-generated unicode property tests" -q
|
||||
if %jit% EQU 1 call :runsub 27 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:conferror
|
||||
@echo.
|
||||
@echo Either your build is incomplete or you have a configuration error.
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
# Security policies
|
||||
|
||||
## Release security
|
||||
|
||||
The PCRE2 project provides source-only releases, with no binaries.
|
||||
|
||||
These source releases can be downloaded from the
|
||||
[GitHub Releases](https://github.com/PCRE2Project/pcre2/releases) page. Each
|
||||
release file is GPG-signed.
|
||||
|
||||
* Releases up to and including 10.44 are signed by Philip Hazel (GPG key:
|
||||
<kbd>45F68D54BBE23FB3039B46E59766E084FB0F43D8</kbd>)
|
||||
* Releases from 10.45 onwards will be signed by Nicholas Wilson (GPG key:
|
||||
<kbd>A95536204A3BB489715231282A98E77EB6F24CA8</kbd>, cross-signed by Philip
|
||||
Hazel's key for release continuity)
|
||||
|
||||
From releases 10.45 onwards, the source code will additionally be provided via
|
||||
Git checkout of the (GPG-signed) release tag.
|
||||
|
||||
Please contact the maintainers for any queries about release integrity or the
|
||||
project's supply-chain.
|
||||
|
||||
## Previous vulnerabilities
|
||||
|
||||
* CVE-2025-58050 (August 2025). Affects 10.45 only (not earlier), and is fixed
|
||||
in 10.46.
|
||||
|
||||
## Reporting vulnerabilities
|
||||
|
||||
The PCRE2 project prioritises security. We appreciate third-party testing and
|
||||
security research, and would be grateful if you could responsibly disclose your
|
||||
findings to us. We will make every effort to acknowledge your contributions.
|
||||
|
||||
To report a security issue, please use the GitHub Security Advisory
|
||||
["Report a Vulnerability"](https://github.com/PCRE2Project/pcre2/security/advisories/new)
|
||||
tab. (Alternatively, if you prefer you may send a GPG-encrypted email to one of
|
||||
the maintainers.)
|
||||
|
||||
### Timeline
|
||||
|
||||
As a very small volunteer team, we cannot guarantee rapid response, but would
|
||||
aim to respond within 1 week, or perhaps 2 during holidays.
|
||||
|
||||
### Response procedure
|
||||
|
||||
PCRE2 has in the past made at least one rapid release in response to
|
||||
security incidents.
|
||||
|
||||
We have never produced an embargoed release, or provided preferential
|
||||
access to security fixes to any clients.
|
||||
|
||||
We would aim to notify security managers from trusted downstream distributors,
|
||||
such as major Linux distributions, via the `pcre2-dev` mailing list, by
|
||||
publicly signalling an upcoming security release before disclosing the
|
||||
vulnerability publicly, where advance notification is possible.
|
||||
+107
-17
@@ -12,9 +12,39 @@ pub fn build(b: *std.Build) !void {
|
||||
const linkage = b.option(std.builtin.LinkMode, "linkage", "whether to statically or dynamically link the library") orelse @as(std.builtin.LinkMode, if (target.result.isGnuLibC()) .dynamic else .static);
|
||||
const codeUnitWidth = b.option(CodeUnitWidth, "code-unit-width", "Sets the code unit width") orelse .@"8";
|
||||
|
||||
const copyFiles = b.addWriteFiles();
|
||||
_ = copyFiles.addCopyFile(.{ .path = "src/config.h.generic" }, "config.h");
|
||||
_ = copyFiles.addCopyFile(.{ .path = "src/pcre2.h.generic" }, "pcre2.h");
|
||||
const pcre2_header_dir = b.addWriteFiles();
|
||||
const pcre2_header = pcre2_header_dir.addCopyFile(b.path("src/pcre2.h.generic"), "pcre2.h");
|
||||
|
||||
const config_header = b.addConfigHeader(
|
||||
.{
|
||||
.style = .{ .cmake = b.path("config-cmake.h.in") },
|
||||
.include_path = "config.h",
|
||||
},
|
||||
.{
|
||||
.HAVE_ASSERT_H = true,
|
||||
.HAVE_UNISTD_H = (target.result.os.tag != .windows),
|
||||
.HAVE_WINDOWS_H = (target.result.os.tag == .windows),
|
||||
|
||||
.HAVE_MEMMOVE = true,
|
||||
.HAVE_STRERROR = true,
|
||||
|
||||
.SUPPORT_PCRE2_8 = codeUnitWidth == CodeUnitWidth.@"8",
|
||||
.SUPPORT_PCRE2_16 = codeUnitWidth == CodeUnitWidth.@"16",
|
||||
.SUPPORT_PCRE2_32 = codeUnitWidth == CodeUnitWidth.@"32",
|
||||
.SUPPORT_UNICODE = true,
|
||||
|
||||
.PCRE2_EXPORT = null,
|
||||
.PCRE2_LINK_SIZE = 2,
|
||||
.PCRE2_HEAP_LIMIT = 20000000,
|
||||
.PCRE2_MATCH_LIMIT = 10000000,
|
||||
.PCRE2_MATCH_LIMIT_DEPTH = "MATCH_LIMIT",
|
||||
.PCRE2_MAX_VARLOOKBEHIND = 255,
|
||||
.NEWLINE_DEFAULT = 2,
|
||||
.PCRE2_PARENS_NEST_LIMIT = 250,
|
||||
},
|
||||
);
|
||||
|
||||
// pcre2-8/16/32.so
|
||||
|
||||
const lib = std.Build.Step.Compile.create(b, .{
|
||||
.name = b.fmt("pcre2-{s}", .{@tagName(codeUnitWidth)}),
|
||||
@@ -27,27 +57,26 @@ pub fn build(b: *std.Build) !void {
|
||||
.linkage = linkage,
|
||||
});
|
||||
|
||||
lib.defineCMacro("HAVE_CONFIG_H", null);
|
||||
lib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
|
||||
if (linkage == .static) {
|
||||
try lib.root_module.c_macros.append(b.allocator, "-DPCRE2_STATIC");
|
||||
lib.defineCMacro("PCRE2_STATIC", null);
|
||||
}
|
||||
|
||||
lib.root_module.addCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
|
||||
lib.addConfigHeader(config_header);
|
||||
lib.addIncludePath(pcre2_header_dir.getDirectory());
|
||||
lib.addIncludePath(b.path("src"));
|
||||
|
||||
lib.addCSourceFile(.{
|
||||
.file = copyFiles.addCopyFile(.{ .path = "src/pcre2_chartables.c.dist" }, "pcre2_chartables.c"),
|
||||
.flags = &.{
|
||||
"-DHAVE_CONFIG_H",
|
||||
},
|
||||
.file = b.addWriteFiles().addCopyFile(b.path("src/pcre2_chartables.c.dist"), "pcre2_chartables.c"),
|
||||
});
|
||||
|
||||
lib.addIncludePath(.{ .path = b.pathFromRoot("src") });
|
||||
lib.addIncludePath(copyFiles.getDirectory());
|
||||
|
||||
lib.addCSourceFiles(.{
|
||||
.files = &.{
|
||||
"src/pcre2_auto_possess.c",
|
||||
"src/pcre2_chkdint.c",
|
||||
"src/pcre2_compile.c",
|
||||
"src/pcre2_compile_class.c",
|
||||
"src/pcre2_config.c",
|
||||
"src/pcre2_context.c",
|
||||
"src/pcre2_convert.c",
|
||||
@@ -55,6 +84,7 @@ pub fn build(b: *std.Build) !void {
|
||||
"src/pcre2_error.c",
|
||||
"src/pcre2_extuni.c",
|
||||
"src/pcre2_find_bracket.c",
|
||||
"src/pcre2_jit_compile.c",
|
||||
"src/pcre2_maketables.c",
|
||||
"src/pcre2_match.c",
|
||||
"src/pcre2_match_data.c",
|
||||
@@ -72,12 +102,72 @@ pub fn build(b: *std.Build) !void {
|
||||
"src/pcre2_valid_utf.c",
|
||||
"src/pcre2_xclass.c",
|
||||
},
|
||||
.flags = &.{
|
||||
"-DHAVE_CONFIG_H",
|
||||
"-DPCRE2_STATIC",
|
||||
},
|
||||
});
|
||||
|
||||
lib.installHeader(.{ .path = "src/pcre2.h.generic" }, "pcre2.h");
|
||||
lib.installHeader(pcre2_header, "pcre2.h");
|
||||
b.installArtifact(lib);
|
||||
|
||||
|
||||
// pcre2test
|
||||
|
||||
const pcre2test = b.addExecutable(.{
|
||||
.name = "pcre2test",
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
|
||||
// pcre2-posix.so
|
||||
|
||||
if (codeUnitWidth == CodeUnitWidth.@"8") {
|
||||
const posixLib = std.Build.Step.Compile.create(b, .{
|
||||
.name = "pcre2-posix",
|
||||
.root_module = .{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
},
|
||||
.kind = .lib,
|
||||
.linkage = linkage,
|
||||
});
|
||||
|
||||
posixLib.defineCMacro("HAVE_CONFIG_H", null);
|
||||
posixLib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
|
||||
if (linkage == .static) {
|
||||
posixLib.defineCMacro("PCRE2_STATIC", null);
|
||||
}
|
||||
|
||||
posixLib.addConfigHeader(config_header);
|
||||
posixLib.addIncludePath(pcre2_header_dir.getDirectory());
|
||||
posixLib.addIncludePath(b.path("src"));
|
||||
|
||||
posixLib.addCSourceFiles(.{
|
||||
.files = &.{
|
||||
"src/pcre2posix.c",
|
||||
},
|
||||
});
|
||||
|
||||
posixLib.installHeader(b.path("src/pcre2posix.h"), "pcre2posix.h");
|
||||
b.installArtifact(posixLib);
|
||||
|
||||
pcre2test.linkLibrary(posixLib);
|
||||
}
|
||||
|
||||
|
||||
// pcre2test (again)
|
||||
|
||||
pcre2test.defineCMacro("HAVE_CONFIG_H", null);
|
||||
|
||||
pcre2test.addConfigHeader(config_header);
|
||||
pcre2test.addIncludePath(pcre2_header_dir.getDirectory());
|
||||
pcre2test.addIncludePath(b.path("src"));
|
||||
|
||||
pcre2test.addCSourceFile(.{
|
||||
.file = b.path("src/pcre2test.c"),
|
||||
});
|
||||
|
||||
pcre2test.linkLibC();
|
||||
pcre2test.linkLibrary(lib);
|
||||
|
||||
b.installArtifact(pcre2test);
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ are met:
|
||||
2. Redistributions in binary form must reproduce the copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
|
||||
@@ -2,15 +2,12 @@
|
||||
|
||||
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
|
||||
set(EDITLINE_FOUND TRUE)
|
||||
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
|
||||
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES
|
||||
editline
|
||||
edit/readline
|
||||
)
|
||||
|
||||
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
else()
|
||||
find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline)
|
||||
|
||||
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
|
||||
find_library(EDITLINE_LIBRARY NAMES edit)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
|
||||
mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
|
||||
endif()
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
|
||||
# This macro is intended to be used in FindXXX.cmake modules files.
|
||||
# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
|
||||
# it also sets the <UPPERCASED_NAME>_FOUND variable.
|
||||
# The package is found if all variables listed are TRUE.
|
||||
# Example:
|
||||
#
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
|
||||
#
|
||||
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
|
||||
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
|
||||
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
|
||||
# independent whether QUIET was used or not.
|
||||
# If it is found, the location is reported using the VAR1 argument, so
|
||||
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
|
||||
# If the second argument is DEFAULT_MSG, the message in the failure case will
|
||||
# be "Could NOT find LibXml2", if you don't like this message you can specify
|
||||
# your own custom failure message there.
|
||||
|
||||
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
|
||||
|
||||
IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
SET(_FAIL_MESSAGE "${_FAIL_MSG}")
|
||||
ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
|
||||
STRING(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
|
||||
SET(${_NAME_UPPER}_FOUND TRUE)
|
||||
IF(NOT ${_VAR1})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_VAR1})
|
||||
|
||||
FOREACH(_CURRENT_VAR ${ARGN})
|
||||
IF(NOT ${_CURRENT_VAR})
|
||||
SET(${_NAME_UPPER}_FOUND FALSE)
|
||||
ENDIF(NOT ${_CURRENT_VAR})
|
||||
ENDFOREACH(_CURRENT_VAR)
|
||||
|
||||
IF (${_NAME_UPPER}_FOUND)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ELSE (${_NAME_UPPER}_FOUND)
|
||||
IF (${_NAME}_FIND_REQUIRED)
|
||||
MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
|
||||
ELSE (${_NAME}_FIND_REQUIRED)
|
||||
IF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "${_FAIL_MESSAGE}")
|
||||
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
|
||||
ENDIF (${_NAME}_FIND_REQUIRED)
|
||||
ENDIF (${_NAME_UPPER}_FOUND)
|
||||
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
|
||||
@@ -5,25 +5,23 @@
|
||||
# GNU Readline library finder
|
||||
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
set(READLINE_FOUND TRUE)
|
||||
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
|
||||
/usr/include/readline
|
||||
)
|
||||
|
||||
# 2008-04-22 The next clause used to read like this:
|
||||
#
|
||||
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||
# include(FindPackageHandleStandardArgs)
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
#
|
||||
# I was advised to modify it such that it will find an ncurses library if
|
||||
# required, but not if one was explicitly given, that is, it allows the
|
||||
# default to be overridden. PH
|
||||
else()
|
||||
find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline)
|
||||
|
||||
FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
# 2008-04-22 The next clause used to read like this:
|
||||
#
|
||||
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
|
||||
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
|
||||
# include(FindPackageHandleStandardArgs)
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
|
||||
#
|
||||
# I was advised to modify it such that it will find an ncurses library if
|
||||
# required, but not if one was explicitly given, that is, it allows the
|
||||
# default to be overridden. PH
|
||||
|
||||
MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
|
||||
find_library(READLINE_LIBRARY NAMES readline)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
|
||||
mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY)
|
||||
endif()
|
||||
|
||||
@@ -4,8 +4,7 @@ set(PACKAGE_VERSION_PATCH 0)
|
||||
set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
|
||||
|
||||
# Check whether the requested PACKAGE_FIND_VERSION is compatible
|
||||
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR
|
||||
PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
|
||||
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
else()
|
||||
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||
|
||||
@@ -30,33 +30,49 @@ set(PCRE2_16BIT_NAME pcre2-16)
|
||||
set(PCRE2_32BIT_NAME pcre2-32)
|
||||
set(PCRE2_POSIX_NAME pcre2-posix)
|
||||
find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
|
||||
if (PCRE2_USE_STATIC_LIBS)
|
||||
if (MSVC)
|
||||
if(PCRE2_USE_STATIC_LIBS)
|
||||
if(MSVC)
|
||||
set(PCRE2_8BIT_NAME pcre2-8-static)
|
||||
set(PCRE2_16BIT_NAME pcre2-16-static)
|
||||
set(PCRE2_32BIT_NAME pcre2-32-static)
|
||||
set(PCRE2_POSIX_NAME pcre2-posix-static)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
|
||||
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
else ()
|
||||
else()
|
||||
set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
|
||||
if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||
if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||
set(PCRE2_PREFIX "")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
|
||||
if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||
if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||
set(PCRE2_SUFFIX "-0.dll")
|
||||
elseif(MSVC)
|
||||
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
||||
endif ()
|
||||
endif ()
|
||||
find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library")
|
||||
find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library")
|
||||
find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library")
|
||||
find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library")
|
||||
endif()
|
||||
endif()
|
||||
find_library(
|
||||
PCRE2_8BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "8 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_16BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "16 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_32BIT_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "32 bit PCRE2 library"
|
||||
)
|
||||
find_library(
|
||||
PCRE2_POSIX_LIBRARY
|
||||
NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX}
|
||||
DOC "8 bit POSIX PCRE2 library"
|
||||
)
|
||||
unset(PCRE2_NON_STANDARD_LIB_PREFIX)
|
||||
unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
|
||||
unset(PCRE2_8BIT_NAME)
|
||||
@@ -65,51 +81,55 @@ unset(PCRE2_32BIT_NAME)
|
||||
unset(PCRE2_POSIX_NAME)
|
||||
|
||||
# Set version
|
||||
if (PCRE2_INCLUDE_DIR)
|
||||
if(PCRE2_INCLUDE_DIR)
|
||||
set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
# Which components have been found.
|
||||
if (PCRE2_8BIT_LIBRARY)
|
||||
if(PCRE2_8BIT_LIBRARY)
|
||||
set(PCRE2_8BIT_FOUND TRUE)
|
||||
endif ()
|
||||
if (PCRE2_16BIT_LIBRARY)
|
||||
endif()
|
||||
if(PCRE2_16BIT_LIBRARY)
|
||||
set(PCRE2_16BIT_FOUND TRUE)
|
||||
endif ()
|
||||
if (PCRE2_32BIT_LIBRARY)
|
||||
endif()
|
||||
if(PCRE2_32BIT_LIBRARY)
|
||||
set(PCRE2_32BIT_FOUND TRUE)
|
||||
endif ()
|
||||
if (PCRE2_POSIX_LIBRARY)
|
||||
endif()
|
||||
if(PCRE2_POSIX_LIBRARY)
|
||||
set(PCRE2_POSIX_FOUND TRUE)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
# Check if at least one component has been specified.
|
||||
list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
|
||||
if (PCRE2_NCOMPONENTS LESS 1)
|
||||
if(PCRE2_NCOMPONENTS LESS 1)
|
||||
message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
|
||||
endif ()
|
||||
endif()
|
||||
unset(PCRE2_NCOMPONENTS)
|
||||
|
||||
# When POSIX component has been specified make sure that also 8BIT component is specified.
|
||||
set(PCRE2_8BIT_COMPONENT FALSE)
|
||||
set(PCRE2_POSIX_COMPONENT FALSE)
|
||||
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||
if (component STREQUAL "8BIT")
|
||||
if(component STREQUAL "8BIT")
|
||||
set(PCRE2_8BIT_COMPONENT TRUE)
|
||||
elseif (component STREQUAL "POSIX")
|
||||
elseif(component STREQUAL "POSIX")
|
||||
set(PCRE2_POSIX_COMPONENT TRUE)
|
||||
endif ()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if (PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
|
||||
message(FATAL_ERROR "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component.")
|
||||
if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component."
|
||||
)
|
||||
endif()
|
||||
unset(PCRE2_8BIT_COMPONENT)
|
||||
unset(PCRE2_POSIX_COMPONENT)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
|
||||
find_package_handle_standard_args(PCRE2
|
||||
find_package_handle_standard_args(
|
||||
PCRE2
|
||||
FOUND_VAR PCRE2_FOUND
|
||||
REQUIRED_VARS PCRE2_INCLUDE_DIR
|
||||
HANDLE_COMPONENTS
|
||||
@@ -118,31 +138,31 @@ find_package_handle_standard_args(PCRE2
|
||||
)
|
||||
|
||||
set(PCRE2_LIBRARIES)
|
||||
if (PCRE2_FOUND)
|
||||
if(PCRE2_FOUND)
|
||||
foreach(component ${PCRE2_FIND_COMPONENTS})
|
||||
if (PCRE2_USE_STATIC_LIBS)
|
||||
if(PCRE2_USE_STATIC_LIBS)
|
||||
add_library(PCRE2::${component} STATIC IMPORTED)
|
||||
target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
|
||||
else ()
|
||||
else()
|
||||
add_library(PCRE2::${component} SHARED IMPORTED)
|
||||
endif ()
|
||||
set_target_properties(PCRE2::${component} PROPERTIES
|
||||
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
|
||||
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
|
||||
endif()
|
||||
set_target_properties(
|
||||
PCRE2::${component}
|
||||
PROPERTIES
|
||||
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
|
||||
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
|
||||
)
|
||||
if (component STREQUAL "POSIX")
|
||||
set_target_properties(PCRE2::${component} PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "PCRE2::8BIT"
|
||||
LINK_LIBRARIES "PCRE2::8BIT"
|
||||
if(component STREQUAL "POSIX")
|
||||
set_target_properties(
|
||||
PCRE2::${component}
|
||||
PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT"
|
||||
)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
|
||||
mark_as_advanced(PCRE2_${component}_LIBRARY)
|
||||
endforeach()
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
mark_as_advanced(
|
||||
PCRE2_INCLUDE_DIR
|
||||
)
|
||||
mark_as_advanced(PCRE2_INCLUDE_DIR)
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_ASSERT_H 1
|
||||
#cmakedefine HAVE_BUILTIN_ASSUME 1
|
||||
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
|
||||
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
|
||||
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_SYS_STAT_H 1
|
||||
@@ -17,7 +20,6 @@
|
||||
#cmakedefine SUPPORT_PCRE2_8 1
|
||||
#cmakedefine SUPPORT_PCRE2_16 1
|
||||
#cmakedefine SUPPORT_PCRE2_32 1
|
||||
#cmakedefine PCRE2_DEBUG 1
|
||||
#cmakedefine DISABLE_PERCENT_ZT 1
|
||||
|
||||
#cmakedefine SUPPORT_LIBBZ2 1
|
||||
@@ -39,11 +41,11 @@
|
||||
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define PCRE2_EXPORT @PCRE2_EXPORT@
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define PCRE2_EXPORT @PCRE2_EXPORT@
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||
|
||||
+63
-25
@@ -9,23 +9,32 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [44])
|
||||
m4_define(pcre2_minor, [46])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2024-06-07])
|
||||
m4_define(pcre2_date, [2025-08-27])
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [13:0:13])
|
||||
m4_define(libpcre2_16_version, [13:0:13])
|
||||
m4_define(libpcre2_32_version, [13:0:13])
|
||||
m4_define(libpcre2_posix_version, [3:5:0])
|
||||
m4_define(libpcre2_8_version, [14:0:14])
|
||||
m4_define(libpcre2_16_version, [14:0:14])
|
||||
m4_define(libpcre2_32_version, [14:0:14])
|
||||
m4_define(libpcre2_posix_version, [3:6:0])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
AC_PREREQ([2.62])
|
||||
AC_PREREQ([2.60])
|
||||
AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
|
||||
AC_CONFIG_SRCDIR([src/pcre2.h.in])
|
||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
||||
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign])
|
||||
ifelse(pcre2_prerelease, [-DEV],
|
||||
[dnl For development builds, ./configure is not checked in to Git, so we are
|
||||
dnl happy to have it regenerated as needed.
|
||||
AM_MAINTAINER_MODE([enable])],
|
||||
[dnl For a release build (or RC), the ./configure script we ship in the
|
||||
dnl tarball (and check in to the Git tag) should not be regenerated
|
||||
dnl implicitly. This is important if users want to check out a release tag
|
||||
dnl using Git.
|
||||
AM_MAINTAINER_MODE])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
AC_CONFIG_HEADERS(src/config.h)
|
||||
|
||||
@@ -73,6 +82,40 @@ AC_SYS_LARGEFILE
|
||||
|
||||
PCRE2_VISIBILITY
|
||||
|
||||
# Check for Clang __attribute__((uninitialized)) feature
|
||||
|
||||
AC_MSG_CHECKING([for __attribute__((uninitialized))])
|
||||
AC_LANG_PUSH([C])
|
||||
tmp_CFLAGS=$CFLAGS
|
||||
if test $WORKING_WERROR -eq 1; then
|
||||
CFLAGS="$CFLAGS -Werror"
|
||||
fi
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||
[[char buf[128] __attribute__((uninitialized));(void)buf]])],
|
||||
[pcre2_cc_cv_attribute_uninitialized=yes],
|
||||
[pcre2_cc_cv_attribute_uninitialized=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
|
||||
if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
|
||||
AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
|
||||
supports __attribute__((uninitialized))])
|
||||
fi
|
||||
CFLAGS=$tmp_CFLAGS
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the assume() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __assume()])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[__assume(1)]])],
|
||||
[pcre2_cc_cv_builtin_assume=yes],
|
||||
[pcre2_cc_cv_builtin_assume=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_assume])
|
||||
if test "$pcre2_cc_cv_builtin_assume" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_ASSUME], 1,
|
||||
[Define this if your compiler provides __assume()])
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for the mul_overflow() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __builtin_mul_overflow()])
|
||||
@@ -95,22 +138,18 @@ if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Check for Clang __attribute__((uninitialized)) feature
|
||||
# Check for the unreachable() builtin
|
||||
|
||||
AC_MSG_CHECKING([for __attribute__((uninitialized))])
|
||||
AC_MSG_CHECKING([for __builtin_unreachable()])
|
||||
AC_LANG_PUSH([C])
|
||||
tmp_CFLAGS=$CFLAGS
|
||||
CFLAGS="$CFLAGS -Werror"
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
|
||||
[[char buf[128] __attribute__((uninitialized));(void)buf]])],
|
||||
[pcre2_cc_cv_attribute_uninitialized=yes],
|
||||
[pcre2_cc_cv_attribute_uninitialized=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
|
||||
if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
|
||||
AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
|
||||
supports __attribute__((uninitialized))])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])],
|
||||
[pcre2_cc_cv_builtin_unreachable=yes],
|
||||
[pcre2_cc_cv_builtin_unreachable=no])
|
||||
AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable])
|
||||
if test "$pcre2_cc_cv_builtin_unreachable" = yes; then
|
||||
AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1,
|
||||
[Define this if your compiler provides __builtin_unreachable()])
|
||||
fi
|
||||
CFLAGS=$tmp_CFLAGS
|
||||
AC_LANG_POP([C])
|
||||
|
||||
# Versioning
|
||||
@@ -191,7 +230,7 @@ if test "$enable_jit" = "auto"; then
|
||||
CPPFLAGS=-I$srcdir
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#define SLJIT_CONFIG_AUTO 1
|
||||
#include "src/sljit/sljitConfigCPU.h"
|
||||
#include "deps/sljit/sljit_src/sljitConfigCPU.h"
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
#error unsupported
|
||||
#endif]])], enable_jit=yes, enable_jit=no)
|
||||
@@ -285,7 +324,7 @@ AC_ARG_ENABLE(never-backslash-C,
|
||||
# Handle --enable-ebcdic
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
AS_HELP_STRING([--enable-ebcdic],
|
||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||
[assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
|
||||
, enable_ebcdic=no)
|
||||
|
||||
# Handle --enable-ebcdic-nl25
|
||||
@@ -521,7 +560,7 @@ HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */])
|
||||
|
||||
# Checks for header files.
|
||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
|
||||
|
||||
@@ -529,7 +568,6 @@ AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
|
||||
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
|
||||
AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
|
||||
|
||||
@@ -105,6 +105,7 @@ example.
|
||||
pcre2_chkdint.c
|
||||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_compile_class.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_convert.c
|
||||
@@ -138,7 +139,7 @@ example.
|
||||
Note that you must compile pcre2_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in src/config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre2_jit_compile.c #includes other files from the sljit subdirectory,
|
||||
pcre2_jit_compile.c #includes other files from the sljit dependency,
|
||||
all of whose names begin with "sljit". It also #includes
|
||||
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
|
||||
those yourself.
|
||||
@@ -301,56 +302,66 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
cache can be deleted by selecting "File > Delete Cache".
|
||||
If you are using CMake and encounter errors, deleting the CMake cache and
|
||||
restarting from a fresh build may fix the error. In the CMake GUI, the cache can
|
||||
be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can
|
||||
be deleted.
|
||||
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
|
||||
directory such as C:\pcre2. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre2\pcre2-xx\build.
|
||||
|
||||
4. Run cmake-gui from the Shell environment of your build tool, for example,
|
||||
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||
4. Run CMake.
|
||||
|
||||
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
- Using the CLI, simply run `cmake ..` inside the `build/` directory. You can
|
||||
use the `ccmake` ncurses GUI to select and configure PCRE2 features.
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
- Using the CMake GUI:
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
a) Run cmake-gui from the Shell environment of your build tool, for
|
||||
example, Msys for Msys/MinGW or Visual Studio Command Prompt for
|
||||
VC/VC++.
|
||||
|
||||
8. The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
|
||||
build directories, respectively.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
c) Press the "Configure" button.
|
||||
|
||||
10. Hit "Generate".
|
||||
d) Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
e) The GUI will then list several configuration options. This is where
|
||||
you can disable Unicode support or select other PCRE2 optional features.
|
||||
|
||||
12. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
f) Press "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
|
||||
g) Press "Generate".
|
||||
|
||||
5. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
|
||||
Regardless of build system used, `cmake --build .` will build it.
|
||||
|
||||
6. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
Regardless of build system used, `ctest` will run the tests.
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
|
||||
@@ -425,6 +436,7 @@ OpenVMS. They are in the "vms" directory in the distribution tarball. Please
|
||||
read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep
|
||||
programs contain some VMS-specific code.
|
||||
|
||||
===========================
|
||||
Last Updated: 16 April 2024
|
||||
===========================
|
||||
==============================
|
||||
Last updated: 26 December 2024
|
||||
==============================
|
||||
|
||||
|
||||
+86
-72
@@ -385,7 +385,7 @@ library. They are also documented in the pcre2build man page.
|
||||
|
||||
If this is done, when pcre2test's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
Note that libreadline is GPL-licensed, so if you distribute a binary of
|
||||
pcre2test linked in this way, there may be licensing issues. These can be
|
||||
avoided by linking with libedit (which has a BSD licence) instead.
|
||||
|
||||
@@ -411,20 +411,19 @@ library. They are also documented in the pcre2build man page.
|
||||
Instead of %td or %zu, %lu is used, with a cast for size_t values.
|
||||
|
||||
. There is a special option called --enable-fuzz-support for use by people who
|
||||
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
||||
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
||||
be built, but not installed. This contains a single function called
|
||||
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
||||
length of the string. When called, this function tries to compile the string
|
||||
as a pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the string.
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||
be created. This is normally run under valgrind or used when PCRE2 is
|
||||
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||
outputs information about what it is doing. The input strings are specified
|
||||
by arguments: if an argument starts with "=" the rest of it is a literal
|
||||
input string. Otherwise, it is assumed to be a file name, and the contents
|
||||
of the file are the test string.
|
||||
want to run fuzzing tests on PCRE2. If set, it causes an extra library
|
||||
called libpcre2-fuzzsupport.a to be built, but not installed. This contains
|
||||
a single function called LLVMFuzzerTestOneInput() whose arguments are a
|
||||
pointer to a string and the length of the string. When called, this function
|
||||
tries to compile the string as a pattern, and if that succeeds, to match
|
||||
it. This is done both with no options and with some random options bits that
|
||||
are generated from the string. Setting --enable-fuzz-support also causes an
|
||||
executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally
|
||||
run under valgrind or used when PCRE2 is compiled with address sanitizing
|
||||
enabled. It calls the fuzzing function and outputs information about what it
|
||||
is doing. The input strings are specified by arguments: if an argument
|
||||
starts with "=" the rest of it is a literal input string. Otherwise, it is
|
||||
assumed to be a file name, and the contents of the file are the test string.
|
||||
|
||||
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
|
||||
which caused pcre2_match() to use individual blocks on the heap for
|
||||
@@ -510,6 +509,7 @@ system. The following are installed (file names are all relative to the
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
SECURITY
|
||||
pcre2.txt (a concatenation of the man(3) pages)
|
||||
pcre2test.txt the pcre2test man page
|
||||
pcre2grep.txt the pcre2grep man page
|
||||
@@ -607,8 +607,9 @@ zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
should first run the maint/PrepareRelease script before making a distribution.
|
||||
This script creates the .txt and HTML forms of the documentation from the man
|
||||
pages.
|
||||
|
||||
|
||||
Testing PCRE2
|
||||
@@ -822,37 +823,38 @@ The distribution should contain the files listed below.
|
||||
ASCII coding; unless --enable-rebuild-chartables is
|
||||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_chkdint.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_convert.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_extuni.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_ucptables.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_chkdint.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_compile_class.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_convert.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_extuni.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_ucptables.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
src/pcre2_xclass.c )
|
||||
|
||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
||||
@@ -860,13 +862,16 @@ The distribution should contain the files listed below.
|
||||
src/config.h.in template for config.h, when built by "configure"
|
||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||
src/pcre2posix.h header for the external POSIX wrapper API
|
||||
src/pcre2_compile.h header for internal use
|
||||
src/pcre2_internal.h header for internal use
|
||||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_jit_char_inc.h header used by JIT
|
||||
src/pcre2_jit_neon_inc.h header used by JIT
|
||||
src/pcre2_jit_simd_inc.h header used by JIT
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
src/pcre2_util.h header for internal utils
|
||||
|
||||
sljit/* source files for the JIT compiler
|
||||
deps/sljit/sljit_src/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
@@ -878,48 +883,49 @@ The distribution should contain the files listed below.
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE2
|
||||
AUTHORS.md information about the authors of PCRE2
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE2
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE2
|
||||
LICENCE.md conditions for the use of PCRE2
|
||||
COPYING the same, using GNU's standard name
|
||||
SECURITY.md information on reporting vulnerabilities
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcre2grep tests
|
||||
RunTest.bat a Windows batch file for running tests
|
||||
RunGrepTest.bat a Windows batch file for pcre2grep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
m4/* m4 macros (used by autoconf)
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for PCRE2
|
||||
doc/*.1 man page sources for pcre2grep and pcre2test
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre2.txt plain text version of the man pages
|
||||
doc/pcre2-config.txt plain text documentation of pcre2-config script
|
||||
doc/pcre2grep.txt plain text documentation of grep utility program
|
||||
doc/pcre2test.txt plain text documentation of test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
ar-lib )
|
||||
config.guess )
|
||||
config.sub )
|
||||
depcomp ) helper tools generated by libtool and
|
||||
compile ) automake, used internally by ./configure
|
||||
install-sh )
|
||||
ltmain.sh )
|
||||
missing )
|
||||
test-driver )
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
testdata/testinput* test data for main library tests
|
||||
@@ -927,12 +933,13 @@ The distribution should contain the files listed below.
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
testdata/* other supporting test files
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
(D) Auxiliary files for CMake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindEditline.cmake
|
||||
cmake/FindReadline.cmake
|
||||
cmake/pcre2-config-version.cmake.in
|
||||
cmake/pcre2-config.cmake.in
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
@@ -943,14 +950,21 @@ The distribution should contain the files listed below.
|
||||
src/config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
(F) Auxiliary files for building PCRE2 under OpenVMS
|
||||
(F) Auxiliary files for building PCRE2 using other build systems
|
||||
|
||||
BUILD.bazel )
|
||||
MODULE.bazel ) files used by the Bazel build system
|
||||
WORKSPACE.bazel )
|
||||
build.zig file used by zig's build system
|
||||
|
||||
(G) Auxiliary files for building PCRE2 under OpenVMS
|
||||
|
||||
vms/configure.com )
|
||||
vms/openvms_readme.txt ) These files were contributed by a PCRE2 user.
|
||||
vms/pcre2.h_patch )
|
||||
vms/stdint.h )
|
||||
|
||||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 15 April 2024
|
||||
==============================
|
||||
Last updated: 18 December 2024
|
||||
==============================
|
||||
|
||||
|
||||
@@ -267,6 +267,9 @@ in the library.
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_optimize.html">pcre2_set_optimize</a></td>
|
||||
<td> Set an optimization directive</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
|
||||
<td> Set the parentheses nesting limit</td></tr>
|
||||
|
||||
@@ -276,6 +279,12 @@ in the library.
|
||||
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
|
||||
<td> Obsolete function that (from 10.30 onwards) does nothing</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_substitute_callout.html">pcre2_set_substitute_callout</a></td>
|
||||
<td> Set a substitution callout function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_substitute_case_callout.html">pcre2_set_substitute_case_callout</a></td>
|
||||
<td> Set a substitution case callout function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
|
||||
<td> Match a compiled pattern to a subject string and do
|
||||
substitutions</td></tr>
|
||||
|
||||
+11
-11
@@ -16,7 +16,7 @@ please consult the man page, in case the conversion went wrong.
|
||||
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
|
||||
<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
|
||||
<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
|
||||
<li><a name="TOC4" href="#SEC4">AUTHOR</a>
|
||||
<li><a name="TOC4" href="#SEC4">AUTHORS</a>
|
||||
<li><a name="TOC5" href="#SEC5">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
|
||||
@@ -190,22 +190,22 @@ listing), and the short pages for individual functions, are concatenated in
|
||||
In the "man" and HTML formats, there is also a short page for each C library
|
||||
function, listing its arguments and results.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC4" href="#TOC1">AUTHORS</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
Retired from University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg.
|
||||
</P>
|
||||
<P>
|
||||
Putting an actual email address here is a spam magnet. If you want to email me,
|
||||
use my two names separated by a dot at gmail.com.
|
||||
PCRE2 was written by Philip Hazel, of the University Computing Service,
|
||||
Cambridge, England. Many others have also contributed.
|
||||
</P>
|
||||
<P>
|
||||
To contact the maintainers, please use the GitHub issues tracker or PCRE2
|
||||
mailing list, as described at the project page:
|
||||
<a href="https://github.com/PCRE2Project/pcre2">https://github.com/PCRE2Project/pcre2</a>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 27 August 2021
|
||||
Last updated: 18 December 2024
|
||||
<br>
|
||||
Copyright © 1997-2021 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -57,6 +57,7 @@ The primary option bits are:
|
||||
PCRE2_ALLOW_EMPTY_CLASS Allow empty classes
|
||||
PCRE2_ALT_BSUX Alternative handling of \u, \U, and \x
|
||||
PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode
|
||||
PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax
|
||||
PCRE2_ALT_VERBNAMES Process backslashes in verb names
|
||||
PCRE2_AUTO_CALLOUT Compile automatic callouts
|
||||
PCRE2_CASELESS Do caseless matching
|
||||
|
||||
@@ -33,9 +33,18 @@ details are given in the
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
The first argument is a pointer that was returned by a successful call to
|
||||
<b>pcre2_compile()</b>, and the second must contain one or more of the following
|
||||
bits:
|
||||
The availability of JIT support can be tested by calling
|
||||
<b>pcre2_compile_jit()</b> with a single option PCRE2_JIT_TEST_ALLOC (the
|
||||
code argument is ignored, so a NULL value is accepted). Such a call
|
||||
returns zero if JIT is available and has a working allocator. Otherwise
|
||||
it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate
|
||||
executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not
|
||||
compiled.
|
||||
</P>
|
||||
<P>
|
||||
Otherwise, the first argument must be a pointer that was returned by a
|
||||
successful call to <b>pcre2_compile()</b>, and the second must contain one or
|
||||
more of the following bits:
|
||||
<pre>
|
||||
PCRE2_JIT_COMPLETE compile code for full matching
|
||||
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
|
||||
@@ -46,11 +55,13 @@ superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF. The old
|
||||
option is deprecated and may be removed in the future.
|
||||
</P>
|
||||
<P>
|
||||
The yield of the function is 0 for success, or a negative error code otherwise.
|
||||
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
|
||||
if an unknown bit is set in <i>options</i>. The function can also return
|
||||
PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the
|
||||
compiler, even if it was because of a system security restriction.
|
||||
The yield of the function when called with any of the three options above is 0
|
||||
for success, or a negative error code otherwise. In particular,
|
||||
PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown
|
||||
bit is set in <i>options</i>. The function can also return PCRE2_ERROR_NOMEMORY
|
||||
if JIT is unable to allocate executable memory for the compiler, even if it was
|
||||
because of a system security restriction. In a few cases, the function may
|
||||
return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
||||
@@ -43,6 +43,10 @@ options are:
|
||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
|
||||
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
|
||||
PCRE2_EXTRA_MATCH_WORD Pattern matches "words"
|
||||
PCRE2_EXTRA_NEVER_CALLOUT Disallow callouts in pattern
|
||||
PCRE2_EXTRA_NO_BS0 Disallow \0 (but not \00 or \000)
|
||||
PCRE2_EXTRA_PYTHON_OCTAL Use Python rules for octal
|
||||
PCRE2_EXTRA_TURKISH_CASING Use Turkish I case folding
|
||||
</pre>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
|
||||
@@ -27,9 +27,9 @@ DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets, in a compile context, the maximum size (in bytes) for the
|
||||
memory needed to hold the compiled version of a pattern that is compiled with
|
||||
this context. The result is always zero. If a pattern that is passed to
|
||||
<b>pcre2_compile()</b> with this context needs more memory, an error is
|
||||
memory needed to hold the compiled version of a pattern that is using this
|
||||
context. The result is always zero. If a pattern that is passed to
|
||||
<b>pcre2_compile()</b> referencing this context needs more memory, an error is
|
||||
generated. The default is the largest number that a PCRE2_SIZE variable can
|
||||
hold, which is effectively unlimited.
|
||||
</P>
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_optimize specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_optimize man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_optimize(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>directive</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function controls which performance optimizations will be applied
|
||||
by <b>pcre2_compile()</b>. It can be called multiple times with the same compile
|
||||
context; the effects are cumulative, with the effects of later calls taking
|
||||
precedence over earlier ones.
|
||||
</P>
|
||||
<P>
|
||||
The result is zero for success, PCRE2_ERROR_NULL if <i>ccontext</i> is NULL,
|
||||
or PCRE2_ERROR_BADOPTION if <i>directive</i> is unknown. The latter could be
|
||||
useful to detect if a certain optimization is available.
|
||||
</P>
|
||||
<P>
|
||||
The list of possible values for the <i>directive</i> parameter are:
|
||||
<pre>
|
||||
PCRE2_OPTIMIZATION_FULL Enable all optimizations (default)
|
||||
PCRE2_OPTIMIZATION_NONE Disable all optimizations
|
||||
PCRE2_AUTO_POSSESS Enable auto-possessification
|
||||
PCRE2_AUTO_POSSESS_OFF Disable auto-possessification
|
||||
PCRE2_DOTSTAR_ANCHOR Enable implicit dotstar anchoring
|
||||
PCRE2_DOTSTAR_ANCHOR_OFF Disable implicit dotstar anchoring
|
||||
PCRE2_START_OPTIMIZE Enable start-up optimizations at match time
|
||||
PCRE2_START_OPTIMIZE_OFF Disable start-up optimizations at match time
|
||||
</pre>
|
||||
There is a complete description of the PCRE2 native API, including detailed
|
||||
descriptions <i>directive</i> parameter values in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
@@ -20,7 +20,7 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *),</b>
|
||||
<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_substitute_case_callout specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_substitute_case_callout man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
|
||||
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
|
||||
<b> int, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the substitute case callout fields in a match context (the
|
||||
first argument). The second argument specifies a callout function, and the third
|
||||
argument is an opaque data item that is passed to it. The result of this
|
||||
function is always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
+411
-101
@@ -179,6 +179,10 @@ document for an overview of all the PCRE2 documentation.
|
||||
<br>
|
||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_optimize(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>directive</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a><br>
|
||||
<P>
|
||||
@@ -203,6 +207,13 @@ document for an overview of all the PCRE2 documentation.
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
|
||||
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
|
||||
<b> int, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
@@ -808,6 +819,7 @@ following compile-time parameters:
|
||||
The compile time nested parentheses limit
|
||||
The maximum length of the pattern string
|
||||
The extra options bits (none set by default)
|
||||
Which performance optimizations the compiler should apply
|
||||
</pre>
|
||||
A compile context is also required if you are using custom memory management.
|
||||
If none of these apply, just pass NULL as the context argument of
|
||||
@@ -952,6 +964,110 @@ The first argument to the callout function gives the current depth of
|
||||
nesting, and the second is user data that is set up by the last argument of
|
||||
<b>pcre2_set_compile_recursion_guard()</b>. The callout function should return
|
||||
zero if all is well, or non-zero to force an error.
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_optimize(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>directive</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
PCRE2 can apply various performance optimizations during compilation, in order
|
||||
to make matching faster. For example, the compiler might convert some regex
|
||||
constructs into an equivalent construct which <b>pcre2_match()</b> can execute
|
||||
faster. By default, all available optimizations are enabled. However, in rare
|
||||
cases, one might wish to disable specific optimizations. For example, if it is
|
||||
known that some optimizations cannot benefit a certain regex, it might be
|
||||
desirable to disable them, in order to speed up compilation.
|
||||
</P>
|
||||
<P>
|
||||
The permitted values of <i>directive</i> are as follows:
|
||||
<pre>
|
||||
PCRE2_OPTIMIZATION_FULL
|
||||
</pre>
|
||||
Enable all optional performance optimizations. This is the default value.
|
||||
<pre>
|
||||
PCRE2_OPTIMIZATION_NONE
|
||||
</pre>
|
||||
Disable all optional performance optimizations.
|
||||
<pre>
|
||||
PCRE2_AUTO_POSSESS
|
||||
PCRE2_AUTO_POSSESS_OFF
|
||||
</pre>
|
||||
Enable/disable "auto-possessification" of variable quantifiers such as * and +.
|
||||
This optimization, for example, turns a+b into a++b in order to avoid
|
||||
backtracks into a+ that can never be successful. However, if callouts are in
|
||||
use, auto-possessification means that some callouts are never taken. You can
|
||||
disable this optimization if you want the matching functions to do a full,
|
||||
unoptimized search and run all the callouts.
|
||||
<pre>
|
||||
PCRE2_DOTSTAR_ANCHOR
|
||||
PCRE2_DOTSTAR_ANCHOR_OFF
|
||||
</pre>
|
||||
Enable/disable an optimization that is applied when .* is the first significant
|
||||
item in a top-level branch of a pattern, and all the other branches also start
|
||||
with .* or with \A or \G or ^. Such a pattern is automatically anchored if
|
||||
PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set for any
|
||||
^ items. Otherwise, the fact that any match must start either at the start of
|
||||
the subject or following a newline is remembered. Like other optimizations,
|
||||
this can cause callouts to be skipped.
|
||||
</P>
|
||||
<P>
|
||||
Dotstar anchor optimization is automatically disabled for .* if it is inside an
|
||||
atomic group or a capture group that is the subject of a backreference, or if
|
||||
the pattern contains (*PRUNE) or (*SKIP).
|
||||
<pre>
|
||||
PCRE2_START_OPTIMIZE
|
||||
PCRE2_START_OPTIMIZE_OFF
|
||||
</pre>
|
||||
Enable/disable optimizations which cause matching functions to scan the subject
|
||||
string for specific code unit values before attempting a match. For example, if
|
||||
it is known that an unanchored match must start with a specific value, the
|
||||
matching code searches the subject for that value, and fails immediately if it
|
||||
cannot find it, without actually running the main matching function. This means
|
||||
that a special item such as (*COMMIT) at the start of a pattern is not
|
||||
considered until after a suitable starting point for the match has been found.
|
||||
Also, when callouts or (*MARK) items are in use, these "start-up" optimizations
|
||||
can cause them to be skipped if the pattern is never actually used. The start-up
|
||||
optimizations are in effect a pre-scan of the subject that takes place before
|
||||
the pattern is run.
|
||||
</P>
|
||||
<P>
|
||||
Disabling start-up optimizations ensures that in cases where the result is "no
|
||||
match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) are
|
||||
considered at every possible starting position in the subject string.
|
||||
</P>
|
||||
<P>
|
||||
Disabling start-up optimizations may change the outcome of a matching operation.
|
||||
Consider the pattern
|
||||
<pre>
|
||||
(*COMMIT)ABC
|
||||
</pre>
|
||||
When this is compiled, PCRE2 records the fact that a match must start with the
|
||||
character "A". Suppose the subject string is "DEFABC". The start-up
|
||||
optimization scans along the subject, finds "A" and runs the first match
|
||||
attempt from there. The (*COMMIT) item means that the pattern must match the
|
||||
current starting position, which in this case, it does. However, if the same
|
||||
match is run without start-up optimizations, the initial scan along the subject
|
||||
string does not happen. The first match attempt is run starting from "D" and
|
||||
when this fails, (*COMMIT) prevents any further matches being tried, so the
|
||||
overall result is "no match".
|
||||
</P>
|
||||
<P>
|
||||
Another start-up optimization makes use of a minimum length for a matching
|
||||
subject, which is recorded when possible. Consider the pattern
|
||||
<pre>
|
||||
(*MARK:1)B(*MARK:2)(X|Y)
|
||||
</pre>
|
||||
The minimum length for a match is two characters. If the subject is "XXBB", the
|
||||
"starting character" optimization skips "XX", then tries to match "BB", which
|
||||
is long enough. In the process, (*MARK:2) is encountered and remembered. When
|
||||
the match attempt fails, the next "B" is found, but there is only one character
|
||||
left, so there are no more attempts, and "no match" is returned with the "last
|
||||
mark seen" set to "2". Without start-up optimizations, however, matches are
|
||||
tried at every possible starting position, including at the end of the subject,
|
||||
where (*MARK:1) is encountered, but there is no "B", so the "last mark seen"
|
||||
that is returned is "1". In this case, the optimizations do not affect the
|
||||
overall match result, which is still "no match", but they do affect the
|
||||
auxiliary information that is returned.
|
||||
<a name="matchcontext"></a></P>
|
||||
<br><b>
|
||||
The match context
|
||||
@@ -1011,6 +1127,19 @@ made by <b>pcre2_substitute()</b>. Details are given in the section entitled
|
||||
<a href="#substitutions">below.</a>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
|
||||
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
|
||||
<b> int, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
This sets up a callout function for PCRE2 to call when performing case
|
||||
transformations inside <b>pcre2_substitute()</b>. Details are given in the
|
||||
section entitled "Creating a new string with substitutions"
|
||||
<a href="#substitutions">below.</a>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
@@ -1228,7 +1357,10 @@ for the amount of heap memory used by <b>pcre2_match()</b> or
|
||||
The output is a uint32_t integer that is set to one if support for just-in-time
|
||||
compiling is included in the library; otherwise it is set to zero. Note that
|
||||
having the support in the library does not guarantee that JIT will be used for
|
||||
any given match. See the
|
||||
any given match, and neither does it guarantee that JIT will actually be able
|
||||
to function, because it may not be able to allocate executable memory in some
|
||||
environments. There is a special call to <b>pcre2_jit_compile()</b> that can be
|
||||
used to check this. See the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
documentation for more details.
|
||||
<pre>
|
||||
@@ -1431,7 +1563,7 @@ respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
|
||||
error has occurred.
|
||||
</P>
|
||||
<P>
|
||||
There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
|
||||
There are over 100 positive error codes that <b>pcre2_compile()</b> may return
|
||||
if it finds an error in the pattern. There are also some negative error codes
|
||||
that are used for invalid UTF strings when validity checking is in force. These
|
||||
are the same as given by <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, and
|
||||
@@ -1539,6 +1671,16 @@ after any internal newline. However, it does not match after a newline at the
|
||||
end of the subject, for compatibility with Perl. If you want a multiline
|
||||
circumflex also to match after a terminating newline, you must set
|
||||
PCRE2_ALT_CIRCUMFLEX.
|
||||
<pre>
|
||||
PCRE2_ALT_EXTENDED_CLASS
|
||||
</pre>
|
||||
Alters the parsing of character classes to follow the extended syntax
|
||||
described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no impact
|
||||
on the behaviour of the Perl-specific "(?[...])" syntax for extended classes,
|
||||
but instead enables the alternative syntax of extended class behaviour inside
|
||||
ordinary "[...]" character classes. See the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation for details of the character classes supported.
|
||||
<pre>
|
||||
PCRE2_ALT_VERBNAMES
|
||||
</pre>
|
||||
@@ -1569,16 +1711,31 @@ letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||
changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
|
||||
PCRE2_UCP is set, Unicode properties are used for all characters with more than
|
||||
one other case, and for all characters whose code points are greater than
|
||||
U+007F. Note that there are two ASCII characters, K and S, that, in addition to
|
||||
U+007F.
|
||||
</P>
|
||||
<P>
|
||||
Note that there are two ASCII characters, K and S, that, in addition to
|
||||
their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin
|
||||
sign) and U+017F (long S) respectively. If you do not want this case
|
||||
equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT.
|
||||
</P>
|
||||
<P>
|
||||
One language family, Turkish and Azeri, has its own case-insensitivity rules,
|
||||
which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the
|
||||
behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131
|
||||
(small dotless i) characters.
|
||||
</P>
|
||||
<P>
|
||||
For lower valued characters with only one other case, a lookup table is used
|
||||
for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used
|
||||
for all code points less than 256, and higher code points (available only in
|
||||
16-bit or 32-bit mode) are treated as not having another case.
|
||||
</P>
|
||||
<P>
|
||||
From release 10.45 PCRE2_CASELESS also affects what some of the letter-related
|
||||
Unicode property escapes (\p and \P) match. The properties Lu (upper case
|
||||
letter), Ll (lower case letter), and Lt (title case letter) are all treated as
|
||||
LC (cased letter) when PCRE2_CASELESS is set.
|
||||
<pre>
|
||||
PCRE2_DOLLAR_ENDONLY
|
||||
</pre>
|
||||
@@ -1775,7 +1932,7 @@ This option locks out the use of Unicode properties for handling \B, \b, \D,
|
||||
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
||||
pattern from enabling this facility by starting the pattern with (*UCP). This
|
||||
option may be useful in applications that process patterns from external
|
||||
sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
|
||||
sources. The option combination PCRE2_UCP and PCRE2_NEVER_UCP causes an error.
|
||||
<pre>
|
||||
PCRE2_NEVER_UTF
|
||||
</pre>
|
||||
@@ -1798,85 +1955,57 @@ though the reference can be by name or by number.
|
||||
<pre>
|
||||
PCRE2_NO_AUTO_POSSESS
|
||||
</pre>
|
||||
If this option is set, it disables "auto-possessification", which is an
|
||||
optimization that, for example, turns a+b into a++b in order to avoid
|
||||
If this (deprecated) option is set, it disables "auto-possessification", which
|
||||
is an optimization that, for example, turns a+b into a++b in order to avoid
|
||||
backtracks into a+ that can never be successful. However, if callouts are in
|
||||
use, auto-possessification means that some callouts are never taken. You can
|
||||
set this option if you want the matching functions to do a full unoptimized
|
||||
search and run all the callouts, but it is mainly provided for testing
|
||||
purposes.
|
||||
</P>
|
||||
<P>
|
||||
If a compile context is available, it is recommended to use
|
||||
<b>pcre2_set_optimize()</b> with the <i>directive</i> PCRE2_AUTO_POSSESS_OFF rather
|
||||
than the compile option PCRE2_NO_AUTO_POSSESS. Note that PCRE2_NO_AUTO_POSSESS
|
||||
takes precedence over the <b>pcre2_set_optimize()</b> optimization directives
|
||||
PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF.
|
||||
<pre>
|
||||
PCRE2_NO_DOTSTAR_ANCHOR
|
||||
</pre>
|
||||
If this option is set, it disables an optimization that is applied when .* is
|
||||
the first significant item in a top-level branch of a pattern, and all the
|
||||
other branches also start with .* or with \A or \G or ^. The optimization is
|
||||
automatically disabled for .* if it is inside an atomic group or a capture
|
||||
group that is the subject of a backreference, or if the pattern contains
|
||||
(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is
|
||||
automatically anchored if PCRE2_DOTALL is set for all the .* items and
|
||||
PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match
|
||||
must start either at the start of the subject or following a newline is
|
||||
If this (deprecated) option is set, it disables an optimization that is applied
|
||||
when .* is the first significant item in a top-level branch of a pattern, and
|
||||
all the other branches also start with .* or with \A or \G or ^. The
|
||||
optimization is automatically disabled for .* if it is inside an atomic group
|
||||
or a capture group that is the subject of a backreference, or if the pattern
|
||||
contains (*PRUNE) or (*SKIP). When the optimization is not disabled, such a
|
||||
pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items
|
||||
and PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any
|
||||
match must start either at the start of the subject or following a newline is
|
||||
remembered. Like other optimizations, this can cause callouts to be skipped.
|
||||
(If a compile context is available, it is recommended to use
|
||||
<b>pcre2_set_optimize()</b> with the <i>directive</i> PCRE2_DOTSTAR_ANCHOR_OFF
|
||||
instead.)
|
||||
<pre>
|
||||
PCRE2_NO_START_OPTIMIZE
|
||||
</pre>
|
||||
This is an option whose main effect is at matching time. It does not change
|
||||
what <b>pcre2_compile()</b> generates, but it does affect the output of the JIT
|
||||
compiler.
|
||||
compiler. Setting this option is equivalent to calling <b>pcre2_set_optimize()</b>
|
||||
with the <i>directive</i> parameter set to PCRE2_START_OPTIMIZE_OFF.
|
||||
</P>
|
||||
<P>
|
||||
There are a number of optimizations that may occur at the start of a match, in
|
||||
order to speed up the process. For example, if it is known that an unanchored
|
||||
match must start with a specific code unit value, the matching code searches
|
||||
the subject for that value, and fails immediately if it cannot find it, without
|
||||
actually running the main matching function. This means that a special item
|
||||
such as (*COMMIT) at the start of a pattern is not considered until after a
|
||||
suitable starting point for the match has been found. Also, when callouts or
|
||||
(*MARK) items are in use, these "start-up" optimizations can cause them to be
|
||||
skipped if the pattern is never actually used. The start-up optimizations are
|
||||
actually running the main matching function. The start-up optimizations are
|
||||
in effect a pre-scan of the subject that takes place before the pattern is run.
|
||||
</P>
|
||||
<P>
|
||||
The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
|
||||
possibly causing performance to suffer, but ensuring that in cases where the
|
||||
result is "no match", the callouts do occur, and that items such as (*COMMIT)
|
||||
and (*MARK) are considered at every possible starting position in the subject
|
||||
string.
|
||||
</P>
|
||||
<P>
|
||||
Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation.
|
||||
Consider the pattern
|
||||
<pre>
|
||||
(*COMMIT)ABC
|
||||
</pre>
|
||||
When this is compiled, PCRE2 records the fact that a match must start with the
|
||||
character "A". Suppose the subject string is "DEFABC". The start-up
|
||||
optimization scans along the subject, finds "A" and runs the first match
|
||||
attempt from there. The (*COMMIT) item means that the pattern must match the
|
||||
current starting position, which in this case, it does. However, if the same
|
||||
match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
|
||||
subject string does not happen. The first match attempt is run starting from
|
||||
"D" and when this fails, (*COMMIT) prevents any further matches being tried, so
|
||||
the overall result is "no match".
|
||||
</P>
|
||||
<P>
|
||||
As another start-up optimization makes use of a minimum length for a matching
|
||||
subject, which is recorded when possible. Consider the pattern
|
||||
<pre>
|
||||
(*MARK:1)B(*MARK:2)(X|Y)
|
||||
</pre>
|
||||
The minimum length for a match is two characters. If the subject is "XXBB", the
|
||||
"starting character" optimization skips "XX", then tries to match "BB", which
|
||||
is long enough. In the process, (*MARK:2) is encountered and remembered. When
|
||||
the match attempt fails, the next "B" is found, but there is only one character
|
||||
left, so there are no more attempts, and "no match" is returned with the "last
|
||||
mark seen" set to "2". If NO_START_OPTIMIZE is set, however, matches are tried
|
||||
at every possible starting position, including at the end of the subject, where
|
||||
(*MARK:1) is encountered, but there is no "B", so the "last mark seen" that is
|
||||
returned is "1". In this case, the optimizations do not affect the overall
|
||||
match result, which is still "no match", but they do affect the auxiliary
|
||||
information that is returned.
|
||||
Disabling the start-up optimizations may cause performance to suffer. However,
|
||||
this may be desirable for patterns which contain callouts or items such as
|
||||
(*COMMIT) and (*MARK). See the above description of PCRE2_START_OPTIMIZE_OFF
|
||||
for further details.
|
||||
<pre>
|
||||
PCRE2_NO_UTF_CHECK
|
||||
</pre>
|
||||
@@ -1931,9 +2060,16 @@ The second effect of PCRE2_UCP is to force the use of Unicode properties for
|
||||
upper/lower casing operations, even when PCRE2_UTF is not set. This makes it
|
||||
possible to process strings in the 16-bit UCS-2 code. This option is available
|
||||
only if PCRE2 has been compiled with Unicode support (which is the default).
|
||||
The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless
|
||||
</P>
|
||||
<P>
|
||||
The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless
|
||||
matching such that ASCII characters match only ASCII characters and non-ASCII
|
||||
characters match only non-ASCII characters.
|
||||
characters match only non-ASCII characters. The PCRE2_EXTRA_TURKISH_CASING option
|
||||
(see above) alters the matching of the 'i' characters to follow their behaviour
|
||||
in Turkish and Azeri languages. For further details on
|
||||
PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EXTRA_TURKISH_CASING, see the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page.
|
||||
<pre>
|
||||
PCRE2_UNGREEDY
|
||||
</pre>
|
||||
@@ -2070,7 +2206,8 @@ characters. The ASCII letter S is case-equivalent to U+017f (long S) and the
|
||||
ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables
|
||||
recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a
|
||||
caseless match, both characters must either be ASCII or non-ASCII. The option
|
||||
can be changed with a pattern by the (?r) option setting.
|
||||
can be changed within a pattern by the (*CASELESS_RESTRICT) or (?r) option
|
||||
settings.
|
||||
<pre>
|
||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||
</pre>
|
||||
@@ -2097,6 +2234,34 @@ and the end. This is achieved by automatically inserting the code for "\b(?:"
|
||||
at the start of the compiled pattern and ")\b" at the end. The option may be
|
||||
used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is
|
||||
also set.
|
||||
<pre>
|
||||
PCRE2_EXTRA_NO_BS0
|
||||
</pre>
|
||||
If this option is set (note that its final character is the digit 0) it locks
|
||||
out the use of the sequence \0 unless at least one more octal digit follows.
|
||||
<pre>
|
||||
PCRE2_EXTRA_PYTHON_OCTAL
|
||||
</pre>
|
||||
If this option is set, PCRE2 follows Python's rules for interpreting octal
|
||||
escape sequences. The rules for handling sequences such as \14, which could
|
||||
be an octal number or a back reference are different. Details are given in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation.
|
||||
<pre>
|
||||
PCRE2_EXTRA_NEVER_CALLOUT
|
||||
</pre>
|
||||
If this option is set, PCRE2 treats callouts in the pattern as a syntax error,
|
||||
returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if the application
|
||||
knows that a callout will not be provided to <b>pcre2_match()</b>, so that
|
||||
callouts in the pattern are not silently ignored.
|
||||
<pre>
|
||||
PCRE2_EXTRA_TURKISH_CASING
|
||||
</pre>
|
||||
This option alters case-equivalence of the 'i' letters to follow the
|
||||
alphabet used by Turkish and Azeri languages. The option can be changed within
|
||||
a pattern by the (*TURKISH_CASING) start-of-pattern setting. Either the UTF or
|
||||
UCP options must be set. In the 8-bit library, UTF must be set. This option
|
||||
cannot be combined with PCRE2_EXTRA_CASELESS_RESTRICT.
|
||||
<a name="jitcompiling"></a></P>
|
||||
<br><a name="SEC21" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||
<P>
|
||||
@@ -2303,6 +2468,7 @@ following are true:
|
||||
PCRE2_DOTALL is in force for .*
|
||||
Neither (*PRUNE) nor (*SKIP) appears in the pattern
|
||||
PCRE2_NO_DOTSTAR_ANCHOR is not set
|
||||
Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF
|
||||
</pre>
|
||||
For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the
|
||||
options returned for PCRE2_INFO_ALLOPTIONS.
|
||||
@@ -3646,9 +3812,10 @@ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
|
||||
too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
|
||||
this option is set, however, <b>pcre2_substitute()</b> continues to go through
|
||||
the motions of matching and substituting (without, of course, writing anything)
|
||||
in order to compute the size of buffer that is needed. This value is passed
|
||||
back via the <i>outlengthptr</i> variable, with the result of the function still
|
||||
being PCRE2_ERROR_NOMEMORY.
|
||||
in order to compute the size of buffer that is needed, which will include the
|
||||
extra space for the terminating NUL. This value is passed back via the
|
||||
<i>outlengthptr</i> variable, with the result of the function still being
|
||||
PCRE2_ERROR_NOMEMORY.
|
||||
</P>
|
||||
<P>
|
||||
Passing a buffer size of zero is a permitted way of finding out how much memory
|
||||
@@ -3667,18 +3834,26 @@ If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
|
||||
in any way. By default, however, a dollar character is an escape character that
|
||||
can specify the insertion of characters from capture groups and names from
|
||||
(*MARK) or other control verbs in the pattern. Dollar is the only escape
|
||||
character (backslash is treated as literal). The following forms are always
|
||||
character (backslash is treated as literal). The following forms are
|
||||
recognized:
|
||||
<pre>
|
||||
$$ insert a dollar character
|
||||
$<n> or ${<n>} insert the contents of group <n>
|
||||
$n or ${n} insert the contents of group <i>n</i>
|
||||
$0 or $& insert the entire matched substring
|
||||
$` insert the substring that precedes the match
|
||||
$' insert the substring that follows the match
|
||||
$_ insert the entire input string
|
||||
$*MARK or ${*MARK} insert a control verb name
|
||||
</pre>
|
||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||
required only if the following character would be interpreted as part of the
|
||||
number or name. The number may be zero to include the entire matched string.
|
||||
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
||||
string "+$1$0$1+", the result is "=+babcb+=".
|
||||
Either a group number or a group name can be given for <i>n</i>, for example $2 or
|
||||
$NAME. Curly brackets are required only if the following character would be
|
||||
interpreted as part of the number or name. The number may be zero to include
|
||||
the entire matched string. For example, if the pattern a(b)c is matched with
|
||||
"=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=".
|
||||
</P>
|
||||
<P>
|
||||
The JavaScript form $<name>, where the angle brackets are part of the syntax,
|
||||
is also recognized for group names, but not for group numbers or *MARK.
|
||||
</P>
|
||||
<P>
|
||||
$*MARK inserts the name from the last encountered backtracking control verb on
|
||||
@@ -3732,28 +3907,53 @@ not influence the extended substitution syntax described below.
|
||||
PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
|
||||
replacement string. Without this option, only the dollar character is special,
|
||||
and only the group insertion forms listed above are valid. When
|
||||
PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
|
||||
PCRE2_SUBSTITUTE_EXTENDED is set, several things change:
|
||||
</P>
|
||||
<P>
|
||||
Firstly, backslash in a replacement string is interpreted as an escape
|
||||
character. The usual forms such as \n or \x{ddd} can be used to specify
|
||||
particular character codes, and backslash followed by any non-alphanumeric
|
||||
character quotes that character. Extended quoting can be coded using \Q...\E,
|
||||
exactly as in pattern strings.
|
||||
character. The usual forms such as \x{ddd} can be used to specify particular
|
||||
character codes, and backslash followed by any non-alphanumeric character
|
||||
quotes that character. Extended quoting can be coded using \Q...\E, exactly
|
||||
as in pattern strings. The escapes \b and \v are interpreted as the
|
||||
characters backspace and vertical tab, respectively.
|
||||
</P>
|
||||
<P>
|
||||
The interpretation of backslash followed by one or more digits is the same as
|
||||
in a pattern, which in Perl has some ambiguities. Details are given in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
The Python form \g<n>, where the angle brackets are part of the syntax and <i>n</i>
|
||||
is either a group name or number, is recognized as an altertive way of
|
||||
inserting the contents of a group, for example \g<3>.
|
||||
</P>
|
||||
<P>
|
||||
There are also four escape sequences for forcing the case of inserted letters.
|
||||
The insertion mechanism has three states: no case forcing, force upper case,
|
||||
and force lower case. The escape sequences change the current state: \U and
|
||||
\L change to upper or lower case forcing, respectively, and \E (when not
|
||||
terminating a \Q quoted sequence) reverts to no case forcing. The sequences
|
||||
\u and \l force the next character (if it is a letter) to upper or lower
|
||||
case, respectively, and then the state automatically reverts to no case
|
||||
forcing. Case forcing applies to all inserted characters, including those from
|
||||
capture groups and letters within \Q...\E quoted sequences. If either
|
||||
PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
|
||||
Case forcing applies to all inserted characters, including those from capture
|
||||
groups and letters within \Q...\E quoted sequences. The insertion mechanism
|
||||
has three states: no case forcing, force upper case, and force lower case. The
|
||||
escape sequences change the current state: \U and \L change to upper or lower
|
||||
case forcing, respectively, and \E (when not terminating a \Q quoted
|
||||
sequence) reverts to no case forcing. The sequences \u and \l force the next
|
||||
character (if it is a letter) to upper or lower case, respectively, and then
|
||||
the state automatically reverts to no case forcing.
|
||||
</P>
|
||||
<P>
|
||||
However, if \u is immediately followed by \L or \l is immediately followed
|
||||
by \U, the next character's case is forced by the first escape sequence, and
|
||||
subsequent characters by the second. This provides a "title casing" facility
|
||||
that can be applied to group captures. For example, if group 1 has captured
|
||||
"heLLo", the replacement string "\u\L$1" becomes "Hello".
|
||||
</P>
|
||||
<P>
|
||||
If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
|
||||
properties are used for case forcing characters whose code points are greater
|
||||
than 127.
|
||||
than 127. However, only simple case folding, as determined by the Unicode file
|
||||
<b>CaseFolding.txt</b> is supported. PCRE2 does not support language-specific
|
||||
special casing rules such as using different lower case Greek sigmas in the
|
||||
middle and ends of words (as defined in the Unicode file
|
||||
<b>SpecialCasing.txt</b>).
|
||||
</P>
|
||||
<P>
|
||||
Note that case forcing sequences such as \U...\E do not nest. For example,
|
||||
@@ -3762,20 +3962,20 @@ effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
||||
not apply to replacement strings.
|
||||
</P>
|
||||
<P>
|
||||
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||
The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||
flexibility to capture group substitution. The syntax is similar to that used
|
||||
by Bash:
|
||||
<pre>
|
||||
${<n>:-<string>}
|
||||
${<n>:+<string1>:<string2>}
|
||||
${n:-string}
|
||||
${n:+string1:string2}
|
||||
</pre>
|
||||
As before, <n> may be a group number or a name. The first form specifies a
|
||||
default value. If group <n> is set, its value is inserted; if not, <string> is
|
||||
expanded and the result inserted. The second form specifies strings that are
|
||||
expanded and inserted when group <n> is set or unset, respectively. The first
|
||||
form is just a convenient shorthand for
|
||||
As in the simple case, <i>n</i> may be a group number or a name. The first form
|
||||
specifies a default value. If group <i>n</i> is set, its value is inserted; if
|
||||
not, the string is expanded and the result inserted. The second form specifies
|
||||
strings that are expanded and inserted when group <i>n</i> is set or unset,
|
||||
respectively. The first form is just a convenient shorthand for
|
||||
<pre>
|
||||
${<n>:+${<n>}:<string>}
|
||||
${n:+${n}:string}
|
||||
</pre>
|
||||
Backslash can be used to escape colons and closing curly brackets in the
|
||||
replacement strings. A change of the case forcing state within a replacement
|
||||
@@ -3852,9 +4052,18 @@ Substitution callouts
|
||||
The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
|
||||
callout function for <b>pcre2_substitute()</b>. This information is passed in
|
||||
a match context. The callout function is called after each substitution has
|
||||
been processed, but it can cause the replacement not to happen. The callout
|
||||
function is not called for simulated substitutions that happen as a result of
|
||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
||||
been processed, but it can cause the replacement not to happen.
|
||||
</P>
|
||||
<P>
|
||||
The callout function is not called for simulated substitutions that happen as a
|
||||
result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In this mode, when
|
||||
substitution processing exceeds the buffer space provided by the caller,
|
||||
processing continues by counting code units. The simulation is unable to
|
||||
populate the callout block, and so the simulation is pessimistic about the
|
||||
required buffer size. Whichever is larger of accepted or rejected substitution
|
||||
is reported as the required size. Therefore, the returned buffer length may be
|
||||
an overestimate (without a substitution callout, it is normally an exact
|
||||
measurement).
|
||||
</P>
|
||||
<P>
|
||||
The first argument of the callout function is a pointer to a substitute callout
|
||||
@@ -3903,6 +4112,107 @@ PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the
|
||||
output and the call to <b>pcre2_substitute()</b> exits, returning the number of
|
||||
matches so far.
|
||||
</P>
|
||||
<br><b>
|
||||
Substitution case callouts
|
||||
</b><br>
|
||||
<P>
|
||||
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
|
||||
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
|
||||
<b> int, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The <b>pcre2_set_substitution_case_callout()</b> function can be used to specify
|
||||
a callout function for <b>pcre2_substitute()</b> to use when performing case
|
||||
transformations. This does not affect any case insensitivity behaviour when
|
||||
performing a match, but only the user-visible transformations performed when
|
||||
processing a substitution such as:
|
||||
<pre>
|
||||
pcre2_substitute(..., "\\U$1", ...)
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
The default case transformations applied by PCRE2 are reasonably complete, and,
|
||||
in UTF or UCP mode, perform the simple locale-invariant case transformations as
|
||||
specified by Unicode. This is suitable for the internal (invisible)
|
||||
case-equivalence procedures used during pattern matching, but an application
|
||||
may wish to use more sophisticated locale-aware processing for the user-visible
|
||||
substitution transformations.
|
||||
</P>
|
||||
<P>
|
||||
One example implementation of the <i>callout_function</i> using the ICU
|
||||
library would be:
|
||||
<br>
|
||||
<br>
|
||||
<pre>
|
||||
PCRE2_SIZE
|
||||
icu_case_callout(
|
||||
PCRE2_SPTR input, PCRE2_SIZE input_len,
|
||||
PCRE2_UCHAR *output, PCRE2_SIZE output_cap,
|
||||
int to_case, void *data_ptr)
|
||||
{
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER
|
||||
? u_strToLower(output, output_cap, input, input_len, NULL, &err)
|
||||
: to_case == PCRE2_SUBSTITUTE_CASE_UPPER
|
||||
? u_strToUpper(output, output_cap, input, input_len, NULL, &err)
|
||||
: u_strToTitle(output, output_cap, input, input_len, &first_char_only,
|
||||
NULL, &err);
|
||||
if (U_FAILURE(err)) return (~(PCRE2_SIZE)0);
|
||||
return r;
|
||||
}
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
The first and second arguments of the case callout function are the Unicode
|
||||
string to transform.
|
||||
</P>
|
||||
<P>
|
||||
The third and fourth arguments are the output buffer and its capacity.
|
||||
</P>
|
||||
<P>
|
||||
The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER,
|
||||
PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST.
|
||||
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
|
||||
callout to indicate that the case of the entire callout input should be
|
||||
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
|
||||
only the first character or glyph should be transformed to Unicode titlecase
|
||||
and the rest to Unicode lowercase (note that titlecasing sometimes uses Unicode
|
||||
properties to titlecase each word in a string; but PCRE2 is requesting that only
|
||||
the single leading character is to be titlecased).
|
||||
</P>
|
||||
<P>
|
||||
The sixth argument is the <i>callout_data</i> supplied to
|
||||
<b>pcre2_set_substitute_case_callout()</b>.
|
||||
</P>
|
||||
<P>
|
||||
The resulting string in the destination buffer may be larger or smaller than the
|
||||
input, if the casing rules merge or split characters. The return value is the
|
||||
length required for the output string. If a buffer of sufficient size was
|
||||
provided to the callout, then the result must be written to the buffer and the
|
||||
number of code units returned. If the result does not fit in the provided
|
||||
buffer, then the required capacity must be returned and PCRE2 will not make use
|
||||
of the output buffer. PCRE2 provides input and output buffers which overlap, so
|
||||
the callout must support this by suitable internal buffering.
|
||||
</P>
|
||||
<P>
|
||||
Alternatively, if the callout wishes to indicate an error, then it may return
|
||||
(~(PCRE2_SIZE)0). In this case pcre2_substitute() will immediately fail with
|
||||
error PCRE2_ERROR_REPLACECASE.
|
||||
</P>
|
||||
<P>
|
||||
When a case callout is combined with the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
option, there are situations when pcre2_substitute() will return an
|
||||
underestimate of the required buffer size. If you call pcre2_substitute() once
|
||||
with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the input buffer is too small for
|
||||
the replacement string to be constructed, then instead of calling the case
|
||||
callout, pcre2_substitute() will make an estimate of the required buffer size.
|
||||
The second call should also pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that
|
||||
second call is not guaranteed to succeed either, if the case callout requires
|
||||
more buffer space than expected. The caller must make repeated attempts in a
|
||||
loop.
|
||||
</P>
|
||||
<br><a name="SEC38" href="#TOC1">DUPLICATE CAPTURE GROUP NAMES</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||
@@ -4177,7 +4487,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC43" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 April 2024
|
||||
Last updated: 26 December 2024
|
||||
<br>
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -643,7 +643,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 15 April 2024
|
||||
Last updated: 16 April 2024
|
||||
<br>
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -71,7 +71,7 @@ interprets them.
|
||||
7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
|
||||
built with Unicode support (the default). The properties that can be tested
|
||||
with \p and \P are limited to the general category properties such as Lu and
|
||||
Nd, the derived properties Any and LC (synonym L&), script names such as Greek
|
||||
Nd, the derived properties Any and Lc (synonym L&), script names such as Greek
|
||||
or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and
|
||||
Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See
|
||||
the
|
||||
@@ -99,7 +99,12 @@ following examples:
|
||||
\Q\\E \ \\E
|
||||
</pre>
|
||||
The \Q...\E sequence is recognized both inside and outside character classes
|
||||
by both PCRE2 and Perl.
|
||||
by both PCRE2 and Perl. Another difference from Perl is that any appearance of
|
||||
\Q or \E inside what might otherwise be a quantifier causes PCRE2 not to
|
||||
recognize the sequence as a quantifier. Perl recognizes a quantifier if
|
||||
(redundantly) either of the numbers is inside \Q...\E, but not if the
|
||||
separating comma is. When not recognized as a quantifier a sequence such as
|
||||
{\Q1\E,2} is treated as the literal string "{1,2}".
|
||||
</P>
|
||||
<P>
|
||||
9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
|
||||
@@ -120,7 +125,9 @@ confined to that group; it does not extend to the surrounding pattern. This is
|
||||
not always the case in Perl. In particular, if (*THEN) is present in a group
|
||||
that is called as a subroutine, its action is limited to that group, even if
|
||||
the group does not contain any | characters. Note that such groups are
|
||||
processed as anchored at the point where they are tested.
|
||||
processed as anchored at the point where they are tested. PCRE2 also confines
|
||||
all control verbs within atomic assertions, again including (*THEN) in
|
||||
assertions with only one branch.
|
||||
</P>
|
||||
<P>
|
||||
12. If a pattern contains more than one backtracking control verb, the first
|
||||
@@ -159,11 +166,11 @@ warning features, so it gives an error in these cases because they are almost
|
||||
certainly user mistakes.
|
||||
</P>
|
||||
<P>
|
||||
17. In PCRE2, the upper/lower case character properties Lu and Ll are not
|
||||
affected when case-independent matching is specified. For example, \p{Lu}
|
||||
always matches an upper case letter. I think Perl has changed in this respect;
|
||||
in the release at the time of writing (5.38), \p{Lu} and \p{Ll} match all
|
||||
letters, regardless of case, when case independence is specified.
|
||||
17. In PCRE2, until release 10.45, the upper/lower case character properties Lu
|
||||
and Ll were not affected when case-independent matching was specified. Perl has
|
||||
changed in this respect, and PCRE2 has now changed to match. When caseless
|
||||
matching is in force, Lu, Ll, and Lt (title case) are all treated as Lc (cased
|
||||
letter).
|
||||
</P>
|
||||
<P>
|
||||
18. From release 5.32.0, Perl locks out the use of \K in lookaround
|
||||
@@ -231,6 +238,10 @@ and condition references such as (?(4)...). PCRE2 supports relative group
|
||||
numbers such as +2 and -4 in all three cases. Perl supports both plus and minus
|
||||
for subroutine calls, but only minus for back references, and no relative
|
||||
numbering at all for conditions.
|
||||
<br>
|
||||
<br>
|
||||
(m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 extension
|
||||
that is not available in Perl.
|
||||
</P>
|
||||
<P>
|
||||
20. Perl has different limits than PCRE2. See the
|
||||
@@ -252,6 +263,18 @@ handled by PCRE2, either by the interpreter or the JIT. An example is
|
||||
/(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number of repeated
|
||||
"abcd" substrings at the end of the subject.
|
||||
</P>
|
||||
<P>
|
||||
23. Both PCRE2 and Perl error when \x{ escapes are invalid, but Perl tries to
|
||||
recover and prints a warning if the problem was that an invalid hexadecimal
|
||||
digit was found, since PCRE2 doesn't have warnings it returns an error instead.
|
||||
Additionally, Perl accepts \x{} and generates NUL unlike PCRE2.
|
||||
</P>
|
||||
<P>
|
||||
24. From release 10.45, PCRE2 gives an error if \x is not followed by a
|
||||
hexadecimal digit or a curly bracket. It used to interpret this as the NUL
|
||||
character. Perl still generates NUL, but warns when in warning mode in most
|
||||
cases.
|
||||
</P>
|
||||
<br><b>
|
||||
AUTHOR
|
||||
</b><br>
|
||||
@@ -267,9 +290,9 @@ Cambridge, England.
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 30 November 2023
|
||||
Last updated: 02 October 2024
|
||||
<br>
|
||||
Copyright © 1997-2023 University of Cambridge.
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
||||
@@ -182,7 +182,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 28 June 2018
|
||||
Last updated: 14 November 2023
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -391,9 +391,10 @@ Read patterns from the file, one per line. As is the case with patterns on the
|
||||
command line, no delimiters should be used. What constitutes a newline when
|
||||
reading the file is the operating system's default interpretation of \n. The
|
||||
<b>--newline</b> option has no effect on this option. Trailing white space is
|
||||
removed from each line, and blank lines are ignored. An empty file contains no
|
||||
removed from each line, and blank lines are ignored unless the
|
||||
<b>--posix-pattern-file</b> option is also provided. An empty file contains no
|
||||
patterns and therefore matches nothing. Patterns read from a file in this way
|
||||
may contain binary zeros, which are treated as ordinary data characters.
|
||||
may contain binary zeros, which are treated as ordinary character literals.
|
||||
<br>
|
||||
<br>
|
||||
If this option is given more than once, all the specified files are read. A
|
||||
@@ -723,9 +724,9 @@ text.
|
||||
<br>
|
||||
<br>
|
||||
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
||||
decimal number; zero substitutes the whole match. If the number is greater than
|
||||
the number of capturing substrings, or if the capture is unset, the replacement
|
||||
is empty.
|
||||
decimal number; $& (or the legacy $0) substitutes the whole match. If the
|
||||
number is greater than the number of capturing substrings, or if the capture
|
||||
is unset, the replacement is empty.
|
||||
<br>
|
||||
<br>
|
||||
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
||||
@@ -808,6 +809,15 @@ when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while
|
||||
allowing \w to match Unicode letters and digits.
|
||||
</P>
|
||||
<P>
|
||||
<b>--posix-pattern-file</b>
|
||||
When patterns are provided with the <b>-f</b> option, do not trim trailing
|
||||
spaces or ignore empty lines in a similar way than other grep tools. To keep
|
||||
the behaviour consistent with older versions, if the pattern read was
|
||||
terminated with CRLF (as character literals) then both characters won't be
|
||||
included as part of it, so if you really need to have pattern ending in '\r',
|
||||
use a escape sequence or provide it by a different method.
|
||||
</P>
|
||||
<P>
|
||||
<b>-q</b>, <b>--quiet</b>
|
||||
Work quietly, that is, display nothing except error messages. The exit
|
||||
status indicates whether or not any matches were found.
|
||||
@@ -993,7 +1003,7 @@ scripts or echoing specific strings during matching by making use of PCRE2's
|
||||
callout facility. However, this support can be completely or partially disabled
|
||||
when <b>pcre2grep</b> is built. You can find out whether your binary has support
|
||||
for callouts by running it with the <b>--help</b> option. If callout support is
|
||||
completely disabled, all callouts in patterns are ignored by <b>pcre2grep</b>.
|
||||
completely disabled, callouts in patterns are forbidden by <b>pcre2grep</b>.
|
||||
If the facility is partially disabled, calling external programs is not
|
||||
supported, and callouts that request it are ignored.
|
||||
</P>
|
||||
@@ -1015,9 +1025,9 @@ available, provided that callouts were not completely disabled when
|
||||
zero-terminated string, which means it should not contain any internal binary
|
||||
zeros. It is written to the output, having first been passed through the same
|
||||
escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
|
||||
above). However, $0 cannot be used to insert a matched substring because the
|
||||
match is still in progress. Instead, the single character '0' is inserted. Any
|
||||
syntax errors in the string (for example, a dollar not followed by another
|
||||
above). However, $0 or $& cannot be used to insert a matched substring because
|
||||
the match is still in progress. Instead, the single character '0' is inserted.
|
||||
Any syntax errors in the string (for example, a dollar not followed by another
|
||||
character) causes the callout to be ignored. No terminator is added to the
|
||||
output string, so if you want a newline, you must include it explicitly using
|
||||
the escape $n. For example:
|
||||
@@ -1047,9 +1057,9 @@ arguments:
|
||||
</pre>
|
||||
Any substring (including the executable name) may contain escape sequences
|
||||
started by a dollar character. These are the same as for the <b>--output</b>
|
||||
(<b>-O</b>) option documented above, except that $0 cannot insert the matched
|
||||
string because the match is still in progress. Instead, the character '0'
|
||||
is inserted. If you need a literal dollar or pipe character in any
|
||||
(<b>-O</b>) option documented above, except that $0 or $& cannot insert the
|
||||
matched string because the match is still in progress. Instead, the character
|
||||
'0' is inserted. If you need a literal dollar or pipe character in any
|
||||
substring, use $$ or $| respectively. Here is an example:
|
||||
<pre>
|
||||
echo -e "abcde\n12345" | pcre2grep \
|
||||
@@ -1116,7 +1126,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 22 December 2023
|
||||
Last updated: 04 February 2025
|
||||
<br>
|
||||
Copyright © 1997-2023 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -64,7 +64,7 @@ platforms:
|
||||
If --enable-jit is set on an unsupported platform, compilation fails.
|
||||
</P>
|
||||
<P>
|
||||
A client program can tell if JIT support is available by calling
|
||||
A client program can tell if JIT support has been compiled by calling
|
||||
<b>pcre2_config()</b> with the PCRE2_CONFIG_JIT option. The result is one if
|
||||
PCRE2 was built with JIT support, and zero otherwise. However, having the JIT
|
||||
code available does not guarantee that it will be used for any particular
|
||||
@@ -72,11 +72,19 @@ match. One reason for this is that there are a number of options and pattern
|
||||
items that are
|
||||
<a href="#unsupported">not supported by JIT</a>
|
||||
(see below). Another reason is that in some environments JIT is unable to get
|
||||
memory in which to build its compiled code. The only guarantee from
|
||||
executable memory in which to build its compiled code. The only guarantee from
|
||||
<b>pcre2_config()</b> is that if it returns zero, JIT will definitely <i>not</i>
|
||||
be used.
|
||||
</P>
|
||||
<P>
|
||||
As of release 10.45 there is a more informative way to test for JIT support. If
|
||||
<b>pcre2_compile_jit()</b> is called with the single option PCRE2_JIT_TEST_ALLOC
|
||||
it returns zero if JIT is available and has a working allocator. Otherwise it
|
||||
returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate executable
|
||||
memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not compiled. The
|
||||
code argument is ignored, so it can be a NULL value.
|
||||
</P>
|
||||
<P>
|
||||
A simple program does not need to check availability in order to use JIT when
|
||||
possible. The API is implemented in a way that falls back to the interpretive
|
||||
code if JIT is not available or cannot be used for a given match. For programs
|
||||
@@ -126,7 +134,8 @@ option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and
|
||||
PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore
|
||||
PCRE2_JIT_COMPLETE and just compile code for partial matching. If
|
||||
<b>pcre2_jit_compile()</b> is called with no option bits set, it immediately
|
||||
returns zero. This is an alternative way of testing whether JIT is available.
|
||||
returns zero. This is an alternative way of testing whether JIT support has
|
||||
been compiled.
|
||||
</P>
|
||||
<P>
|
||||
At present, it is not possible to free JIT compiled code except when the entire
|
||||
@@ -487,7 +496,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 21 February 2024
|
||||
Last updated: 22 August 2024
|
||||
<br>
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -96,7 +96,7 @@ Cambridge, England.
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: August 2023
|
||||
Last updated: 16 August 2023
|
||||
<br>
|
||||
Copyright © 1997-2023 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -27,7 +27,7 @@ please consult the man page, in case the conversion went wrong.
|
||||
This document describes the two different algorithms that are available in
|
||||
PCRE2 for matching a compiled regular expression against a given subject
|
||||
string. The "standard" algorithm is the one provided by the <b>pcre2_match()</b>
|
||||
function. This works in the same as Perl's matching function, and provide a
|
||||
function. This works in the same as Perl's matching function, and provides a
|
||||
Perl-compatible matching operation. The just-in-time (JIT) optimization that is
|
||||
described in the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
@@ -42,7 +42,7 @@ these are described below.
|
||||
<P>
|
||||
When there is only one possible way in which a given subject string can match a
|
||||
pattern, the two algorithms give the same answer. A difference arises, however,
|
||||
when there are multiple possibilities. For example, if the pattern
|
||||
when there are multiple possibilities. For example, if the anchored pattern
|
||||
<pre>
|
||||
^<.*>
|
||||
</pre>
|
||||
@@ -115,9 +115,9 @@ algorithm after the first match (which is necessarily the shortest) is found.
|
||||
</P>
|
||||
<P>
|
||||
Note that the size of vector needed to contain all the results depends on the
|
||||
number of simultaneous matches, not on the number of parentheses in the
|
||||
pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the match
|
||||
data block is therefore not advisable when doing DFA matching.
|
||||
number of simultaneous matches, not on the number of capturing parentheses in
|
||||
the pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the
|
||||
match data block is therefore not advisable when doing DFA matching.
|
||||
</P>
|
||||
<P>
|
||||
Note also that all the matches that are found start at the same point in the
|
||||
@@ -166,37 +166,43 @@ possibilities, and PCRE2's implementation of this algorithm does not attempt to
|
||||
do this. This means that no captured substrings are available.
|
||||
</P>
|
||||
<P>
|
||||
3. Because no substrings are captured, backreferences within the pattern are
|
||||
not supported.
|
||||
3. Because no substrings are captured, a number of related features are not
|
||||
available:
|
||||
<br>
|
||||
<br>
|
||||
(a) Backreferences;
|
||||
<br>
|
||||
<br>
|
||||
(b) Conditional expressions that use a backreference as the condition or test
|
||||
for a specific group recursion;
|
||||
<br>
|
||||
<br>
|
||||
(c) Script runs;
|
||||
<br>
|
||||
<br>
|
||||
(d) Scan substring assertions.
|
||||
</P>
|
||||
<P>
|
||||
4. For the same reason, conditional expressions that use a backreference as the
|
||||
condition or test for a specific group recursion are not supported.
|
||||
</P>
|
||||
<P>
|
||||
5. Again for the same reason, script runs are not supported.
|
||||
</P>
|
||||
<P>
|
||||
6. Because many paths through the tree may be active, the \K escape sequence,
|
||||
4. Because many paths through the tree may be active, the \K escape sequence,
|
||||
which resets the start of the match when encountered (but may be on some paths
|
||||
and not on others), is not supported.
|
||||
</P>
|
||||
<P>
|
||||
7. Callouts are supported, but the value of the <i>capture_top</i> field is
|
||||
5. Callouts are supported, but the value of the <i>capture_top</i> field is
|
||||
always 1, and the value of the <i>capture_last</i> field is always 0.
|
||||
</P>
|
||||
<P>
|
||||
8. The \C escape sequence, which (in the standard algorithm) always matches a
|
||||
single code unit, even in a UTF mode, is not supported in these modes, because
|
||||
6. The \C escape sequence, which (in the standard algorithm) always matches a
|
||||
single code unit, even in a UTF mode, is not supported in UTF modes because
|
||||
the alternative algorithm moves through the subject string one character (not
|
||||
code unit) at a time, for all active paths through the tree.
|
||||
</P>
|
||||
<P>
|
||||
9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
|
||||
7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
|
||||
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
|
||||
</P>
|
||||
<P>
|
||||
10. The PCRE2_MATCH_INVALID_UTF option for <b>pcre2_compile()</b> is not
|
||||
8. The PCRE2_MATCH_INVALID_UTF option for <b>pcre2_compile()</b> is not
|
||||
supported by <b>pcre2_dfa_match()</b>.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
|
||||
@@ -223,15 +229,18 @@ because it has to search for all possible matches, but is also because it is
|
||||
less susceptible to optimization.
|
||||
</P>
|
||||
<P>
|
||||
2. Capturing parentheses, backreferences, script runs, and matching within
|
||||
invalid UTF string are not supported.
|
||||
2. Capturing parentheses and other features such as backreferences that rely on
|
||||
them are not supported.
|
||||
</P>
|
||||
<P>
|
||||
3. Although atomic groups are supported, their use does not provide the
|
||||
3. Matching within invalid UTF strings is not supported.
|
||||
</P>
|
||||
<P>
|
||||
4. Although atomic groups are supported, their use does not provide the
|
||||
performance advantage that it does for the standard algorithm.
|
||||
</P>
|
||||
<P>
|
||||
4. JIT optimization is not supported.
|
||||
5. JIT optimization is not supported.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
@@ -244,7 +253,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 19 January 2024
|
||||
Last updated: 30 August 2024
|
||||
<br>
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -399,7 +399,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 04 September 2019
|
||||
Last updated: 27 November 2024
|
||||
<br>
|
||||
Copyright © 1997-2019 University of Cambridge.
|
||||
<br>
|
||||
|
||||
+503
-218
File diff suppressed because it is too large
Load Diff
@@ -271,7 +271,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 27 July 2022
|
||||
Last updated: 06 December 2022
|
||||
<br>
|
||||
Copyright © 1997-2022 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -171,7 +171,7 @@ REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard.
|
||||
</pre>
|
||||
When a pattern that is compiled with this flag is passed to
|
||||
<b>pcre2_regexec()</b> for matching, the <i>nmatch</i> and <i>pmatch</i> arguments
|
||||
are ignored, and no captured strings are returned. Versions of the PCRE library
|
||||
are ignored, and no captured strings are returned. Versions of the PCRE2 library
|
||||
prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this
|
||||
no longer happens because it disables the use of backreferences.
|
||||
<pre>
|
||||
@@ -370,7 +370,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 19 January 2024
|
||||
Last updated: 27 November 2024
|
||||
<br>
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -101,7 +101,7 @@ Cambridge, England.
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 02 February 2016
|
||||
Last updated: 14 November 2023
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -203,7 +203,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 27 June 2018
|
||||
Last updated: 19 January 2024
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
||||
+190
-71
@@ -24,34 +24,41 @@ please consult the man page, in case the conversion went wrong.
|
||||
<li><a name="TOC9" href="#SEC9">SCRIPT MATCHING WITH \p AND \P</a>
|
||||
<li><a name="TOC10" href="#SEC10">THE BIDI_CLASS PROPERTY FOR \p AND \P</a>
|
||||
<li><a name="TOC11" href="#SEC11">CHARACTER CLASSES</a>
|
||||
<li><a name="TOC12" href="#SEC12">QUANTIFIERS</a>
|
||||
<li><a name="TOC13" href="#SEC13">ANCHORS AND SIMPLE ASSERTIONS</a>
|
||||
<li><a name="TOC14" href="#SEC14">REPORTED MATCH POINT SETTING</a>
|
||||
<li><a name="TOC15" href="#SEC15">ALTERNATION</a>
|
||||
<li><a name="TOC16" href="#SEC16">CAPTURING</a>
|
||||
<li><a name="TOC17" href="#SEC17">ATOMIC GROUPS</a>
|
||||
<li><a name="TOC18" href="#SEC18">COMMENT</a>
|
||||
<li><a name="TOC19" href="#SEC19">OPTION SETTING</a>
|
||||
<li><a name="TOC20" href="#SEC20">NEWLINE CONVENTION</a>
|
||||
<li><a name="TOC21" href="#SEC21">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC22" href="#SEC22">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
||||
<li><a name="TOC23" href="#SEC23">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
|
||||
<li><a name="TOC24" href="#SEC24">SCRIPT RUNS</a>
|
||||
<li><a name="TOC25" href="#SEC25">BACKREFERENCES</a>
|
||||
<li><a name="TOC26" href="#SEC26">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
||||
<li><a name="TOC27" href="#SEC27">CONDITIONAL PATTERNS</a>
|
||||
<li><a name="TOC28" href="#SEC28">BACKTRACKING CONTROL</a>
|
||||
<li><a name="TOC29" href="#SEC29">CALLOUTS</a>
|
||||
<li><a name="TOC30" href="#SEC30">SEE ALSO</a>
|
||||
<li><a name="TOC31" href="#SEC31">AUTHOR</a>
|
||||
<li><a name="TOC32" href="#SEC32">REVISION</a>
|
||||
<li><a name="TOC12" href="#SEC12">PERL EXTENDED CHARACTER CLASSES</a>
|
||||
<li><a name="TOC13" href="#SEC13">QUANTIFIERS</a>
|
||||
<li><a name="TOC14" href="#SEC14">ANCHORS AND SIMPLE ASSERTIONS</a>
|
||||
<li><a name="TOC15" href="#SEC15">REPORTED MATCH POINT SETTING</a>
|
||||
<li><a name="TOC16" href="#SEC16">ALTERNATION</a>
|
||||
<li><a name="TOC17" href="#SEC17">CAPTURING</a>
|
||||
<li><a name="TOC18" href="#SEC18">ATOMIC GROUPS</a>
|
||||
<li><a name="TOC19" href="#SEC19">COMMENT</a>
|
||||
<li><a name="TOC20" href="#SEC20">OPTION SETTING</a>
|
||||
<li><a name="TOC21" href="#SEC21">NEWLINE CONVENTION</a>
|
||||
<li><a name="TOC22" href="#SEC22">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC23" href="#SEC23">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
||||
<li><a name="TOC24" href="#SEC24">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
|
||||
<li><a name="TOC25" href="#SEC25">SUBSTRING SCAN ASSERTION</a>
|
||||
<li><a name="TOC26" href="#SEC26">SCRIPT RUNS</a>
|
||||
<li><a name="TOC27" href="#SEC27">BACKREFERENCES</a>
|
||||
<li><a name="TOC28" href="#SEC28">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
||||
<li><a name="TOC29" href="#SEC29">CONDITIONAL PATTERNS</a>
|
||||
<li><a name="TOC30" href="#SEC30">BACKTRACKING CONTROL</a>
|
||||
<li><a name="TOC31" href="#SEC31">CALLOUTS</a>
|
||||
<li><a name="TOC32" href="#SEC32">REPLACEMENT STRINGS</a>
|
||||
<li><a name="TOC33" href="#SEC33">SEE ALSO</a>
|
||||
<li><a name="TOC34" href="#SEC34">AUTHOR</a>
|
||||
<li><a name="TOC35" href="#SEC35">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
|
||||
<P>
|
||||
The full syntax and semantics of the regular expressions that are supported by
|
||||
PCRE2 are described in the
|
||||
The full syntax and semantics of the regular expression patterns that are
|
||||
supported by PCRE2 are described in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation. This document contains a quick-reference summary of the syntax.
|
||||
documentation. This document contains a quick-reference summary of the pattern
|
||||
syntax followed by the syntax of replacement strings in substitution function.
|
||||
The full description of the latter is in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">QUOTING</a><br>
|
||||
<P>
|
||||
@@ -60,7 +67,10 @@ documentation. This document contains a quick-reference summary of the syntax.
|
||||
\Q...\E treat enclosed characters as literal
|
||||
</pre>
|
||||
Note that white space inside \Q...\E is always treated as literal, even if
|
||||
PCRE2_EXTENDED is set, causing most other white space to be ignored.
|
||||
PCRE2_EXTENDED is set, causing most other white space to be ignored. Note also
|
||||
that PCRE2's handling of \Q...\E has some differences from Perl's. See the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation for details.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">BRACED ITEMS</a><br>
|
||||
<P>
|
||||
@@ -91,6 +101,11 @@ sequence causes an error.
|
||||
\xhh character with hex code hh
|
||||
\x{hh..} character with hex code hh..
|
||||
</pre>
|
||||
\N{U+hh..} is synonymous with \x{hh..} but is not supported in environments
|
||||
that use EBCDIC code (mainly IBM mainframes). Note that \N not followed by an
|
||||
opening curly bracket has a different meaning (see below).
|
||||
</P>
|
||||
<P>
|
||||
If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the
|
||||
following are also recognized:
|
||||
<pre>
|
||||
@@ -98,7 +113,7 @@ following are also recognized:
|
||||
\uhhhh character with hex code hhhh
|
||||
\u{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX
|
||||
</pre>
|
||||
When \x is not followed by {, from zero to two hexadecimal digits are read,
|
||||
When \x is not followed by {, one or two hexadecimal digits are read,
|
||||
but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be
|
||||
recognized as a hexadecimal escape; otherwise it matches a literal "x".
|
||||
Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
||||
@@ -112,9 +127,7 @@ a non-zero digit is complicated; for details see the section
|
||||
in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation, where details of escape processing in EBCDIC environments are
|
||||
also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not
|
||||
supported in EBCDIC environments. Note that \N not followed by an opening
|
||||
curly bracket has a different meaning (see below).
|
||||
also given.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">CHARACTER TYPES</a><br>
|
||||
<P>
|
||||
@@ -154,8 +167,9 @@ sequences to matching only ASCII characters.
|
||||
</P>
|
||||
<P>
|
||||
Property descriptions in \p and \P are matched caselessly; hyphens,
|
||||
underscores, and white space are ignored, in accordance with Unicode's "loose
|
||||
matching" rules.
|
||||
underscores, and ASCII white space characters are ignored, in accordance with
|
||||
Unicode's "loose matching" rules. For example, \p{Bidi_Class=al} is the same
|
||||
as \p{ bidi class = AL }.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||
<P>
|
||||
@@ -168,13 +182,13 @@ matching" rules.
|
||||
Cs Surrogate
|
||||
|
||||
L Letter
|
||||
Lc Cased letter, the union of Ll, Lu, and Lt
|
||||
L& Synonym of Lc
|
||||
Ll Lower case letter
|
||||
Lm Modifier letter
|
||||
Lo Other letter
|
||||
Lt Title case letter
|
||||
Lu Upper case letter
|
||||
Lc Ll, Lu, or Lt
|
||||
L& Ll, Lu, or Lt
|
||||
|
||||
M Mark
|
||||
Mc Spacing mark
|
||||
@@ -205,7 +219,9 @@ matching" rules.
|
||||
Zl Line separator
|
||||
Zp Paragraph separator
|
||||
Zs Space separator
|
||||
</PRE>
|
||||
</pre>
|
||||
From release 10.45, when caseless matching is set, Ll, Lu, and Lt are all
|
||||
equivalent to Lc.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||
<P>
|
||||
@@ -268,7 +284,7 @@ The recognized classes are:
|
||||
RLI right-to-left isolate
|
||||
RLO right-to-left override
|
||||
S segment separator
|
||||
WS which space
|
||||
WS white space
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">CHARACTER CLASSES</a><br>
|
||||
@@ -299,7 +315,45 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default,
|
||||
but some of them use Unicode properties if PCRE2_UCP is set. You can use
|
||||
\Q...\E inside a character class.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">QUANTIFIERS</a><br>
|
||||
<P>
|
||||
When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes may be
|
||||
used, allowing nested character classes, combined using set operators.
|
||||
<pre>
|
||||
[x&&[^y]] UTS#18 extended character class
|
||||
|
||||
x||y set union (OR)
|
||||
x&&y set intersection (AND)
|
||||
x--y set difference (AND NOT)
|
||||
x~~y set symmetric difference (XOR)
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">PERL EXTENDED CHARACTER CLASSES</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?[...]) Perl extended character class
|
||||
(?[\p{Thai} & \p{Nd}]) operators; whitespace ignored
|
||||
(?[(x - y) & z]) parentheses for grouping
|
||||
|
||||
(?[ [^3] & \p{Nd} ]) [...] is a nested ordinary class
|
||||
(?[ [:alpha:] - [z] ]) POSIX set is allowed outside [...]
|
||||
(?[ \d - [3] ]) backslash-escaped set is allowed outside [...]
|
||||
(?[ !\n & [:ascii:] ]) backslash-escaped character is allowed outside [...]
|
||||
all other characters or ranges must be enclosed in [...]
|
||||
|
||||
x|y, x+y set union (OR)
|
||||
x&y set intersection (AND)
|
||||
x-y set difference (AND NOT)
|
||||
x^y set symmetric difference (XOR)
|
||||
!x set complement (NOT)
|
||||
</pre>
|
||||
Inside a Perl extended character class, [...] switches mode to be interpreted
|
||||
as an ordinary character class. Outside of a nested [...], the only items
|
||||
permitted are backslash-escapes, POSIX sets, operators, and parentheses. Inside
|
||||
a nested ordinary class, ^ has its usual meaning (inverts the class when used
|
||||
as the first character); outside of a nested class, ^ is the XOR operator.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">QUANTIFIERS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
? 0 or 1, greedy
|
||||
@@ -323,7 +377,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
|
||||
{,m}? zero up to m, lazy
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
|
||||
<br><a name="SEC14" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\b word boundary
|
||||
@@ -341,7 +395,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
|
||||
\G first matching position in subject
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
|
||||
<br><a name="SEC15" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\K set reported start of match
|
||||
@@ -351,13 +405,13 @@ for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
|
||||
option is set, the previous behaviour is re-enabled. When this option is set,
|
||||
\K is honoured in positive assertions, but ignored in negative ones.
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">ALTERNATION</a><br>
|
||||
<br><a name="SEC16" href="#TOC1">ALTERNATION</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
expr|expr|expr...
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">CAPTURING</a><br>
|
||||
<br><a name="SEC17" href="#TOC1">CAPTURING</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(...) capture group
|
||||
@@ -372,20 +426,20 @@ In non-UTF modes, names may contain underscores and ASCII letters and digits;
|
||||
in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
|
||||
both cases, a name must not start with a digit.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">ATOMIC GROUPS</a><br>
|
||||
<br><a name="SEC18" href="#TOC1">ATOMIC GROUPS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?>...) atomic non-capture group
|
||||
(*atomic:...) atomic non-capture group
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">COMMENT</a><br>
|
||||
<br><a name="SEC19" href="#TOC1">COMMENT</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?#....) comment (not nestable)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">OPTION SETTING</a><br>
|
||||
<br><a name="SEC20" href="#TOC1">OPTION SETTING</a><br>
|
||||
<P>
|
||||
Changes of these options within a group are automatically cancelled at the end
|
||||
of the group.
|
||||
@@ -409,7 +463,7 @@ of the group.
|
||||
(?^) unset imnrsx options
|
||||
</pre>
|
||||
(?aP) implies (?aT) as well, though this has no additional effect. However, it
|
||||
means that (?-aP) is really (?-PT) which disables all ASCII restrictions for
|
||||
means that (?-aP) also implies (?-aT) and disables all ASCII restrictions for
|
||||
POSIX classes.
|
||||
</P>
|
||||
<P>
|
||||
@@ -421,20 +475,22 @@ example (?i:...).
|
||||
</P>
|
||||
<P>
|
||||
The following are recognized only at the very start of a pattern or after one
|
||||
of the newline or \R options with similar syntax. More than one of them may
|
||||
appear. For the first three, d is a decimal number.
|
||||
of the newline or \R sequences or options with similar syntax. More than one
|
||||
of them may appear. For the first three, d is a decimal number.
|
||||
<pre>
|
||||
(*LIMIT_DEPTH=d) set the backtracking limit to d
|
||||
(*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
|
||||
(*LIMIT_MATCH=d) set the match limit to d
|
||||
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
||||
(*LIMIT_DEPTH=d) set the backtracking limit to d
|
||||
(*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
|
||||
(*LIMIT_MATCH=d) set the match limit to d
|
||||
(*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching
|
||||
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
||||
(*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR)
|
||||
(*NO_JIT) disable JIT optimization
|
||||
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
||||
(*UTF) set appropriate UTF mode for the library in use
|
||||
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
||||
(*NO_JIT) disable JIT optimization
|
||||
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
||||
(*TURKISH_CASING) set PCRE2_EXTRA_TURKISH_CASING when matching
|
||||
(*UTF) set appropriate UTF mode for the library in use
|
||||
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
||||
</pre>
|
||||
Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of
|
||||
the limits set by the caller of <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>,
|
||||
@@ -442,7 +498,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The
|
||||
application can lock out the use of (*UTF) and (*UCP) by setting the
|
||||
PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.
|
||||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">NEWLINE CONVENTION</a><br>
|
||||
<br><a name="SEC21" href="#TOC1">NEWLINE CONVENTION</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after option
|
||||
settings with a similar syntax.
|
||||
@@ -455,7 +511,7 @@ settings with a similar syntax.
|
||||
(*NUL) the NUL character (binary zero)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<br><a name="SEC22" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after option
|
||||
setting with a similar syntax.
|
||||
@@ -464,7 +520,7 @@ setting with a similar syntax.
|
||||
(*BSR_UNICODE) any Unicode newline sequence
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||
<br><a name="SEC23" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?=...) )
|
||||
@@ -490,7 +546,7 @@ the maximum for each branch is limited to a value set by the caller of
|
||||
(ultimate default 255). If every branch matches a fixed number of characters,
|
||||
the limit for each branch is 65535 characters.
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
|
||||
<br><a name="SEC24" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
|
||||
<P>
|
||||
These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
<pre>
|
||||
@@ -503,7 +559,24 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
(*non_atomic_positive_lookbehind:...) )
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC24" href="#TOC1">SCRIPT RUNS</a><br>
|
||||
<br><a name="SEC25" href="#TOC1">SUBSTRING SCAN ASSERTION</a><br>
|
||||
<P>
|
||||
This feature is not Perl-compatible.
|
||||
<pre>
|
||||
(*scan_substring:(grouplist)...) scan captured substring
|
||||
(*scs:(grouplist)...) scan captured substring
|
||||
</pre>
|
||||
The comma-separated list may identify groups in any of the following ways:
|
||||
<pre>
|
||||
n absolute reference
|
||||
+n relative reference
|
||||
-n relative reference
|
||||
<name> name
|
||||
'name' name
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC26" href="#TOC1">SCRIPT RUNS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(*script_run:...) ) script run, can be backtracked into
|
||||
@@ -513,7 +586,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
(*asr:...) )
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC25" href="#TOC1">BACKREFERENCES</a><br>
|
||||
<br><a name="SEC27" href="#TOC1">BACKREFERENCES</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\n reference by number (can be ambiguous)
|
||||
@@ -530,7 +603,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
(?P=name) reference by name (Python)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC26" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
||||
<br><a name="SEC28" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?R) recurse whole pattern
|
||||
@@ -549,7 +622,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
\g'-n' call subroutine by relative number (PCRE2 extension)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
||||
<br><a name="SEC29" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?(condition)yes-pattern)
|
||||
@@ -572,7 +645,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
||||
conditions or recursion tests. Such a condition is interpreted as a reference
|
||||
condition if the relevant named group exists.
|
||||
</P>
|
||||
<br><a name="SEC28" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<br><a name="SEC30" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<P>
|
||||
All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
|
||||
name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
|
||||
@@ -599,7 +672,7 @@ pattern is not anchored.
|
||||
The effect of one of these verbs in a group called as a subroutine is confined
|
||||
to the subroutine call.
|
||||
</P>
|
||||
<br><a name="SEC29" href="#TOC1">CALLOUTS</a><br>
|
||||
<br><a name="SEC31" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?C) callout (assumed number 0)
|
||||
@@ -610,12 +683,58 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
|
||||
start and the end), and the starting delimiter { matched with the ending
|
||||
delimiter }. To encode the ending delimiter within the string, double it.
|
||||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC32" href="#TOC1">REPLACEMENT STRINGS</a><br>
|
||||
<P>
|
||||
If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for
|
||||
<b>pcre2_substitute()</b> is not interpreted. Otherwise, by default, the only
|
||||
special character is the dollar character in one of the following forms:
|
||||
<pre>
|
||||
$$ insert a dollar character
|
||||
$n or ${n} insert the contents of group <i>n</i>
|
||||
$<name> insert the contents of named group
|
||||
$0 or $& insert the entire matched substring
|
||||
$` insert the substring that precedes the match
|
||||
$' insert the substring that follows the match
|
||||
$_ insert the entire input string
|
||||
$*MARK or ${*MARK} insert a control verb name
|
||||
</pre>
|
||||
For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is set,
|
||||
there is additional interpretation:
|
||||
</P>
|
||||
<P>
|
||||
1. Backslash is an escape character, and the forms described in "ESCAPED
|
||||
CHARACTERS" above are recognized. Also:
|
||||
<pre>
|
||||
\Q...\E can be used to suppress interpretation
|
||||
\l force the next character to lower case
|
||||
\u force the next character to upper case
|
||||
\L force subsequent characters to lower case
|
||||
\U force subsequent characters to upper case
|
||||
\u\L force next character to upper case, then all lower
|
||||
\l\U force next character to lower case, then all upper
|
||||
\E end \L or \U case forcing
|
||||
\b backspace character (note: as in character class in pattern)
|
||||
\v vertical tab character (note: not the same as in a pattern)
|
||||
</pre>
|
||||
2. The Python form \g<n>, where the angle brackets are part of the syntax and
|
||||
<i>n</i> is either a group name or a number, is recognized as an alternative way
|
||||
of inserting the contents of a group, for example \g<3>.
|
||||
</P>
|
||||
<P>
|
||||
3. Capture substitution supports the following additional forms:
|
||||
<pre>
|
||||
${n:-string} default for unset group
|
||||
${n:+string1:string2} values for set/unset group
|
||||
</pre>
|
||||
The substitution strings themselves are expanded. Backslash can be used to
|
||||
escape colons and closing curly brackets.
|
||||
</P>
|
||||
<br><a name="SEC33" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2pattern</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
|
||||
<b>pcre2matching</b>(3), <b>pcre2</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC34" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
@@ -624,11 +743,11 @@ Retired from University Computing Service
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC35" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 12 October 2023
|
||||
Last updated: 27 November 2024
|
||||
<br>
|
||||
Copyright © 1997-2023 University of Cambridge.
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
||||
@@ -105,8 +105,8 @@ Input for the 16-bit and 32-bit libraries
|
||||
<P>
|
||||
When testing the 16-bit or 32-bit libraries, there is a need to be able to
|
||||
generate character code points greater than 255 in the strings that are passed
|
||||
to the library. For subject lines, backslash escapes can be used. In addition,
|
||||
when the <b>utf</b> modifier (see
|
||||
to the library. For subject lines and some patterns, backslash escapes can be
|
||||
used. In addition, when the <b>utf</b> modifier (see
|
||||
<a href="#optionmodifiers">"Setting compilation options"</a>
|
||||
below) is set, the pattern and any following subject lines are interpreted as
|
||||
UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate.
|
||||
@@ -125,9 +125,8 @@ UTF-8 (in its original definition) is not capable of encoding values greater
|
||||
than 0x7fffffff, but such values can be handled by the 32-bit library. When
|
||||
testing this library in non-UTF mode with <b>utf8_input</b> set, if any
|
||||
character is preceded by the byte 0xff (which is an invalid byte in UTF-8)
|
||||
0x80000000 is added to the character's value. This is the only way of passing
|
||||
such code points in a pattern string. For subject strings, using an escape
|
||||
sequence is preferable.
|
||||
0x80000000 is added to the character's value. For subject strings, using an
|
||||
escape sequence is preferable.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
|
||||
<P>
|
||||
@@ -178,8 +177,8 @@ functionality is intended for use in scripts such as <b>RunTest</b>. The
|
||||
following options output the value and set the exit code as indicated:
|
||||
<pre>
|
||||
ebcdic-nl the code for LF (= NL) in an EBCDIC environment:
|
||||
0x15 or 0x25
|
||||
0 if used in an ASCII environment
|
||||
either 0x15 or 0x25
|
||||
0 if used in an ASCII/Unicode environment
|
||||
exit code is always 0
|
||||
linksize the configured internal link size (2, 3, or 4)
|
||||
exit code is set to the link size
|
||||
@@ -201,6 +200,16 @@ to the same value:
|
||||
pcre2-8 the 8-bit library was built
|
||||
unicode Unicode support is available
|
||||
</pre>
|
||||
Note that the availability of JIT support in the library does not guarantee
|
||||
that it can actually be used because in some environments it is unable to
|
||||
allocate executable memory. The option "jitusable" gives more detailed
|
||||
information. It returns one of the following values:
|
||||
<pre>
|
||||
0 JIT is available and usable
|
||||
1 JIT is available but cannot allocate executable memory
|
||||
2 JIT is not available
|
||||
3 Unexpected return from test call to <b>pcre2_jit_compile()</b>
|
||||
</pre>
|
||||
If an unknown option is given, an error message is output; the exit code is 0.
|
||||
</P>
|
||||
<P>
|
||||
@@ -527,39 +536,48 @@ space is removed, and the line is scanned for backslash escapes, unless the
|
||||
<b>subject_literal</b> modifier was set for the pattern. The following provide a
|
||||
means of encoding non-printing characters in a visible way:
|
||||
<pre>
|
||||
\a alarm (BEL, \x07)
|
||||
\b backspace (\x08)
|
||||
\e escape (\x27)
|
||||
\f form feed (\x0c)
|
||||
\n newline (\x0a)
|
||||
\r carriage return (\x0d)
|
||||
\t tab (\x09)
|
||||
\v vertical tab (\x0b)
|
||||
\nnn octal character (up to 3 octal digits); always
|
||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||
\o{dd...} octal character (any number of octal digits}
|
||||
\xhh hexadecimal byte (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal character (any number of hex digits)
|
||||
\a alarm (BEL, \x07)
|
||||
\b backspace (\x08)
|
||||
\e escape (\x27)
|
||||
\f form feed (\x0c)
|
||||
\n newline (\x0a)
|
||||
\N{U+hh...} unicode character (any number of hex digits)
|
||||
\r carriage return (\x0d)
|
||||
\t tab (\x09)
|
||||
\v vertical tab (\x0b)
|
||||
\ddd octal number (up to 3 octal digits); represent a single
|
||||
code point unless larger than 255 with the 8-bit library
|
||||
\o{dd...} octal number (any number of octal digits} representing a
|
||||
character in UTF mode or a code point
|
||||
\xhh hexadecimal byte (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal number (up to 8 hex digits) representing a
|
||||
character in UTF mode or a code point
|
||||
</pre>
|
||||
The use of \x{hh...} is not dependent on the use of the <b>utf</b> modifier on
|
||||
the pattern. It is recognized always. There may be any number of hexadecimal
|
||||
digits inside the braces; invalid values provoke error messages.
|
||||
Invoking \N{U+hh...} or \x{hh...} doesn't require the use of the <b>utf</b>
|
||||
modifier on the pattern. It is always recognized. There may be any number of
|
||||
hexadecimal digits inside the braces; invalid values provoke error messages
|
||||
but when using \N{U+hh...} with some invalid unicode characters they will
|
||||
be accepted with a warning instead.
|
||||
</P>
|
||||
<P>
|
||||
Note that \xhh specifies one byte rather than one character in UTF-8 mode;
|
||||
this makes it possible to construct invalid UTF-8 sequences for testing
|
||||
purposes. On the other hand, \x{hh} is interpreted as a UTF-8 character in
|
||||
UTF-8 mode, generating more than one byte if the value is greater than 127.
|
||||
When testing the 8-bit library not in UTF-8 mode, \x{hh} generates one byte
|
||||
for values less than 256, and causes an error for greater values.
|
||||
Note that even in UTF-8 mode, \xhh (and depending of how large, \ddd)
|
||||
describe one byte rather than one character; this makes it possible to
|
||||
construct invalid UTF-8 sequences for testing purposes. On the other hand,
|
||||
\x{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only generating
|
||||
more than one byte if the value is greater than 127. To avoid the ambiguity
|
||||
it is preferred to use \N{U+hh...} when describing characters. When testing
|
||||
the 8-bit library not in UTF-8 mode, \x{hh} generates one byte for values
|
||||
that could fit on it, and causes an error for greater values.
|
||||
</P>
|
||||
<P>
|
||||
In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
|
||||
possible to construct invalid UTF-16 sequences for testing purposes.
|
||||
When testing the 16-bit library, not in UTF-16 mode, all 4-digit \x{hhhh}
|
||||
values are accepted. This makes it possible to construct invalid UTF-16
|
||||
sequences for testing purposes.
|
||||
</P>
|
||||
<P>
|
||||
In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This makes it
|
||||
possible to construct invalid UTF-32 sequences for testing purposes.
|
||||
When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit \x{...}
|
||||
values are accepted. This makes it possible to construct invalid UTF-32
|
||||
sequences for testing purposes.
|
||||
</P>
|
||||
<P>
|
||||
There is a special backslash sequence that specifies replication of one or more
|
||||
@@ -625,6 +643,7 @@ for a description of the effects of these options.
|
||||
allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
|
||||
alt_bsux set PCRE2_ALT_BSUX
|
||||
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
||||
alt_extended_class set PCRE2_ALT_EXTENDED_CLASS
|
||||
alt_verbnames set PCRE2_ALT_VERBNAMES
|
||||
anchored set PCRE2_ANCHORED
|
||||
/a ascii_all set all ASCII options
|
||||
@@ -653,13 +672,17 @@ for a description of the effects of these options.
|
||||
match_word set PCRE2_EXTRA_MATCH_WORD
|
||||
/m multiline set PCRE2_MULTILINE
|
||||
never_backslash_c set PCRE2_NEVER_BACKSLASH_C
|
||||
never_callout set PCRE2_EXTRA_NEVER_CALLOUT
|
||||
never_ucp set PCRE2_NEVER_UCP
|
||||
never_utf set PCRE2_NEVER_UTF
|
||||
/n no_auto_capture set PCRE2_NO_AUTO_CAPTURE
|
||||
no_auto_possess set PCRE2_NO_AUTO_POSSESS
|
||||
no_bs0 set PCRE2_EXTRA_NO_BS0
|
||||
no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR
|
||||
no_start_optimize set PCRE2_NO_START_OPTIMIZE
|
||||
no_utf_check set PCRE2_NO_UTF_CHECK
|
||||
python_octal set PCRE2_EXTRA_PYTHON_OCTAL
|
||||
turkish_casing set PCRE2_EXTRA_TURKISH_CASING
|
||||
ucp set PCRE2_UCP
|
||||
ungreedy set PCRE2_UNGREEDY
|
||||
use_offset_limit set PCRE2_USE_OFFSET_LIMIT
|
||||
@@ -671,6 +694,23 @@ notation. Otherwise, those less than 0x100 are output in hex without the curly
|
||||
brackets. Setting <b>utf</b> in 16-bit or 32-bit mode also causes pattern and
|
||||
subject strings to be translated to UTF-16 or UTF-32, respectively, before
|
||||
being passed to library functions.
|
||||
<br>
|
||||
<br>
|
||||
The following modifiers enable or disable performance optimizations by
|
||||
calling <b>pcre2_set_optimize()</b> before invoking the regex compiler.
|
||||
<pre>
|
||||
optimization_full enable all optional optimizations
|
||||
optimization_none disable all optional optimizations
|
||||
auto_possess auto-possessify variable quantifiers
|
||||
auto_possess_off don't auto-possessify variable quantifiers
|
||||
dotstar_anchor anchor patterns starting with .*
|
||||
dotstar_anchor_off don't anchor patterns starting with .*
|
||||
start_optimize enable pre-scan of subject string
|
||||
start_optimize_off disable pre-scan of subject string
|
||||
</pre>
|
||||
See the
|
||||
<a href="pcre2_set_optimize.html"><b>pcre2_set_optimize</b></a>
|
||||
documentation for details on these optimizations.
|
||||
<a name="controlmodifiers"></a></P>
|
||||
<br><b>
|
||||
Setting compilation controls
|
||||
@@ -680,14 +720,15 @@ The following modifiers affect the compilation process or request information
|
||||
about the pattern. There are single-letter abbreviations for some that are
|
||||
heavily used in the test files.
|
||||
<pre>
|
||||
bsr=[anycrlf|unicode] specify \R handling
|
||||
/B bincode show binary code without lengths
|
||||
bsr=[anycrlf|unicode] specify \R handling
|
||||
callout_info show callout information
|
||||
convert=<options> request foreign pattern conversion
|
||||
convert_glob_escape=c set glob escape character
|
||||
convert_glob_separator=c set glob separator character
|
||||
convert_length set convert buffer length
|
||||
debug same as info,fullbincode
|
||||
expand expand repetition syntax in pattern
|
||||
framesize show matching frame size
|
||||
fullbincode show binary code with lengths
|
||||
/I info show info about compiled pattern
|
||||
@@ -709,6 +750,7 @@ heavily used in the test files.
|
||||
posix_nosub use the POSIX API with REG_NOSUB
|
||||
push push compiled pattern onto the stack
|
||||
pushcopy push a copy onto the stack
|
||||
pushtablescopy push a copy with tables onto the stack
|
||||
stackguard=<number> test the stackguard feature
|
||||
subject_literal treat all subject lines as literal
|
||||
tables=[0|1|2|3] select internal tables
|
||||
@@ -1128,6 +1170,7 @@ process.
|
||||
replace=<string> specify a replacement string
|
||||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_case_callout use substitution case callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
@@ -1217,10 +1260,11 @@ Setting match options
|
||||
<P>
|
||||
The following modifiers set options for <b>pcre2_match()</b> or
|
||||
<b>pcre2_dfa_match()</b>. See
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
for a description of their effects.
|
||||
<pre>
|
||||
anchored set PCRE2_ANCHORED
|
||||
copy_matched_subject set PCRE2_COPY_MATCHED_SUBJECT
|
||||
endanchored set PCRE2_ENDANCHORED
|
||||
dfa_restart set PCRE2_DFA_RESTART
|
||||
dfa_shortest set PCRE2_DFA_SHORTEST
|
||||
@@ -1271,8 +1315,8 @@ pattern, but can be overridden by modifiers on the subject.
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text (non-JIT only)
|
||||
allvector show the entire ovector
|
||||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
callout_data=<n> set a value to pass via callouts
|
||||
@@ -1306,7 +1350,8 @@ pattern, but can be overridden by modifiers on the subject.
|
||||
startchar show startchar when relevant
|
||||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_case_callout use substitution case callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
@@ -1592,6 +1637,21 @@ If both are set for the same number, stop takes precedence. Only a single skip
|
||||
or stop is supported, which is sufficient for testing that the feature works.
|
||||
</P>
|
||||
<br><b>
|
||||
Testing substitute case callouts
|
||||
</b><br>
|
||||
<P>
|
||||
If the <b>substitute_case_callout</b> modifier is set, a substitution
|
||||
case callout function is set up. The callout function is called for each
|
||||
substituted chunk which is to be case-transformed.
|
||||
</P>
|
||||
<P>
|
||||
The callout function passed is a fixed function with implementation for certain
|
||||
behaviours: inputs which shrink when case-transformed; inputs which grow; inputs
|
||||
with distinct upper/lower/titlecase forms. The characters which are not
|
||||
special-cased for testing purposes are left unmodified, as if they are caseless
|
||||
characters.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting the JIT stack size
|
||||
</b><br>
|
||||
<P>
|
||||
@@ -2204,7 +2264,7 @@ Cambridge, England.
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 April 2024
|
||||
Last updated: 26 December 2024
|
||||
<br>
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
|
||||
@@ -53,7 +53,7 @@ When PCRE2 is built with Unicode support, the escape sequences \p{..},
|
||||
The Unicode properties that can be tested are a subset of those that Perl
|
||||
supports. Currently they are limited to the general category properties such as
|
||||
Lu for an upper case letter or Nd for a decimal number, the derived properties
|
||||
Any and LC (synonym L&), the Unicode script names such as Arabic or Han,
|
||||
Any and Lc (synonym L&), the Unicode script names such as Arabic or Han,
|
||||
Bidi_Class, Bidi_Control, and a few binary properties.
|
||||
</P>
|
||||
<P>
|
||||
@@ -157,6 +157,40 @@ Recognition of these non-ASCII characters as case-equivalent to their ASCII
|
||||
counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT
|
||||
option. When this is set, all characters in a case equivalence must either be
|
||||
ASCII or non-ASCII; there can be no mixing.
|
||||
<pre>
|
||||
Without PCRE2_EXTRA_CASELESS_RESTRICT:
|
||||
'k' = 'K' = U+212A (Kelvin sign)
|
||||
's' = 'S' = U+017F (long S)
|
||||
With PCRE2_EXTRA_CASELESS_RESTRICT:
|
||||
'k' = 'K'
|
||||
U+212A (Kelvin sign) only case-equivalent to itself
|
||||
's' = 'S'
|
||||
U+017F (long S) only case-equivalent to itself
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
One language family, Turkish and Azeri, has its own case-insensitivity rules,
|
||||
which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the
|
||||
behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131
|
||||
(small dotless i) characters.
|
||||
<pre>
|
||||
Without PCRE2_EXTRA_TURKISH_CASING:
|
||||
'i' = 'I'
|
||||
U+0130 (capital I with dot above) only case-equivalent to itself
|
||||
U+0131 (small dotless i) only case-equivalent to itself
|
||||
With PCRE2_EXTRA_TURKISH_CASING:
|
||||
'i' = U+0130 (capital I with dot above)
|
||||
U+0131 (small dotless i) = 'I'
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and
|
||||
PCRE2_EXTRA_TURKISH_CASING together.
|
||||
</P>
|
||||
<P>
|
||||
From release 10.45 the Unicode letter properties Lu (upper case), Ll (lower
|
||||
case), and Lt (title case) are all treated as Lc (cased letter) when caseless
|
||||
matching is set by the PCRE2_CASELESS option or (?i) within the pattern.
|
||||
<a name="scriptruns"></a></P>
|
||||
<br><b>
|
||||
SCRIPT RUNS
|
||||
@@ -513,9 +547,9 @@ Cambridge, England.
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 12 October 2023
|
||||
Last updated: 27 November 2024
|
||||
<br>
|
||||
Copyright © 1997-2023 University of Cambridge.
|
||||
Copyright © 1997-2024 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
||||
@@ -267,6 +267,9 @@ in the library.
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_optimize.html">pcre2_set_optimize</a></td>
|
||||
<td> Set an optimization directive</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
|
||||
<td> Set the parentheses nesting limit</td></tr>
|
||||
|
||||
@@ -276,6 +279,12 @@ in the library.
|
||||
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
|
||||
<td> Obsolete function that (from 10.30 onwards) does nothing</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_substitute_callout.html">pcre2_set_substitute_callout</a></td>
|
||||
<td> Set a substitution callout function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_substitute_case_callout.html">pcre2_set_substitute_case_callout</a></td>
|
||||
<td> Set a substitution case callout function</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
|
||||
<td> Match a compiled pattern to a subject string and do
|
||||
substitutions</td></tr>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.00"
|
||||
.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
pcre2-config - program to return PCRE2 configuration
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
PCRE2-CONFIG(1) General Commands Manual PCRE2-CONFIG(1)
|
||||
|
||||
|
||||
@@ -82,4 +81,4 @@ REVISION
|
||||
Last updated: 28 September 2014
|
||||
|
||||
|
||||
PCRE2 10.00 28 September 2014 PCRE2-CONFIG(1)
|
||||
PCRE2 10.46 28 September 2014 PCRE2-CONFIG(1)
|
||||
|
||||
+13
-10
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2 3 "27 August 2021" "PCRE2 10.38"
|
||||
.TH PCRE2 3 "18 December 2024" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH INTRODUCTION
|
||||
@@ -186,23 +186,26 @@ In the "man" and HTML formats, there is also a short page for each C library
|
||||
function, listing its arguments and results.
|
||||
.
|
||||
.
|
||||
.SH AUTHOR
|
||||
.SH AUTHORS
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Philip Hazel
|
||||
Retired from University Computing Service
|
||||
Cambridge, England.
|
||||
.fi
|
||||
The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg.
|
||||
.P
|
||||
Putting an actual email address here is a spam magnet. If you want to email me,
|
||||
use my two names separated by a dot at gmail.com.
|
||||
PCRE2 was written by Philip Hazel, of the University Computing Service,
|
||||
Cambridge, England. Many others have also contributed.
|
||||
.P
|
||||
To contact the maintainers, please use the GitHub issues tracker or PCRE2
|
||||
mailing list, as described at the project page:
|
||||
.\" HTML <a href="https://github.com/PCRE2Project/pcre2">
|
||||
.\" </a>
|
||||
https://github.com/PCRE2Project/pcre2
|
||||
.\"
|
||||
.
|
||||
.
|
||||
.SH REVISION
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 27 August 2021
|
||||
Last updated: 18 December 2024
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
.fi
|
||||
|
||||
+2168
-1452
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
|
||||
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
|
||||
.TH PCRE2_CODE_COPY 3 "16 January 2017" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.32"
|
||||
.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_COMPILE 3 "19 January 2024" "PCRE2 10.43"
|
||||
.TH PCRE2_COMPILE 3 "30 October 2024" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
@@ -45,6 +45,7 @@ The primary option bits are:
|
||||
PCRE2_ALLOW_EMPTY_CLASS Allow empty classes
|
||||
PCRE2_ALT_BSUX Alternative handling of \eu, \eU, and \ex
|
||||
PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode
|
||||
PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax
|
||||
PCRE2_ALT_VERBNAMES Process backslashes in verb names
|
||||
PCRE2_AUTO_CALLOUT Compile automatic callouts
|
||||
PCRE2_CASELESS Do caseless matching
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_COMPILE_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_COMPILE_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "25 October 2014" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_COMPILE_CONTEXT_FREE 3 "29 June 2018" "PCRE2 10.32"
|
||||
.TH PCRE2_COMPILE_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.31"
|
||||
.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CONVERT_CONTEXT_COPY 3 "10 July 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_CONVERT_CONTEXT_COPY 3 "12 July 2017" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "10 July 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "12 July 2017" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CONVERT_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
|
||||
.TH PCRE2_CONVERT_CONTEXT_FREE 3 "13 August 2018" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_CONVERTED_PATTERN_FREE 3 "28 June 2018" "PCRE2 10.32"
|
||||
.TH PCRE2_CONVERTED_PATTERN_FREE 3 "13 August 2018" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_DFA_MATCH 3 "28 August 2021" "PCRE2 10.38"
|
||||
.TH PCRE2_DFA_MATCH 3 "31 August 2021" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GENERAL_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_GENERAL_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "23 January 2023" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
|
||||
.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GET_MARK 3 "13 October 2017" "PCRE2 10.31"
|
||||
.TH PCRE2_GET_MARK 3 "13 January 2018" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "13 January 2023" "PCRE2 10.43"
|
||||
.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "18 January 2023" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GET_MATCH_DATA_SIZE 3 "16 July 2019" "PCRE2 10.34"
|
||||
.TH PCRE2_GET_MATCH_DATA_SIZE 3 "17 October 2019" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GET_OVECTOR_COUNT 3 "24 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_GET_OVECTOR_COUNT 3 "25 October 2014" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GET_OVECTOR_POINTER 3 "24 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_GET_OVECTOR_POINTER 3 "25 October 2014" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_GET_STARTCHAR 3 "24 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_GET_STARTCHAR 3 "25 October 2014" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.TH PCRE2_JIT_COMPILE 3 "29 July 2019" "PCRE2 10.34"
|
||||
.TH PCRE2_JIT_COMPILE 3 "22 August 2024" "PCRE2 10.46"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
@@ -22,9 +22,17 @@ details are given in the
|
||||
.\"
|
||||
documentation.
|
||||
.P
|
||||
The first argument is a pointer that was returned by a successful call to
|
||||
\fBpcre2_compile()\fP, and the second must contain one or more of the following
|
||||
bits:
|
||||
The availability of JIT support can be tested by calling
|
||||
\fBpcre2_compile_jit()\fP with a single option PCRE2_JIT_TEST_ALLOC (the
|
||||
code argument is ignored, so a NULL value is accepted). Such a call
|
||||
returns zero if JIT is available and has a working allocator. Otherwise
|
||||
it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate
|
||||
executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not
|
||||
compiled.
|
||||
.P
|
||||
Otherwise, the first argument must be a pointer that was returned by a
|
||||
successful call to \fBpcre2_compile()\fP, and the second must contain one or
|
||||
more of the following bits:
|
||||
.sp
|
||||
PCRE2_JIT_COMPLETE compile code for full matching
|
||||
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
|
||||
@@ -34,11 +42,13 @@ There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
|
||||
superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old
|
||||
option is deprecated and may be removed in the future.
|
||||
.P
|
||||
The yield of the function is 0 for success, or a negative error code otherwise.
|
||||
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
|
||||
if an unknown bit is set in \fIoptions\fP. The function can also return
|
||||
PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the
|
||||
compiler, even if it was because of a system security restriction.
|
||||
The yield of the function when called with any of the three options above is 0
|
||||
for success, or a negative error code otherwise. In particular,
|
||||
PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown
|
||||
bit is set in \fIoptions\fP. The function can also return PCRE2_ERROR_NOMEMORY
|
||||
if JIT is unable to allocate executable memory for the compiler, even if it was
|
||||
because of a system security restriction. In a few cases, the function may
|
||||
return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user