feat: update pcre2 to version of 10.46

Signed-off-by: Haryslee <lihao189@huawei.com>
This commit is contained in:
Haryslee
2025-12-01 16:21:58 +08:00
parent 97d5d9b4ae
commit 2941ab2b9d
366 changed files with 61406 additions and 72212 deletions
+5 -4
View File
@@ -13,7 +13,7 @@ action("copy_generic_files") {
inputs = [
"$PCRE2_LIB_DIR/src/config.h.generic",
"$PCRE2_LIB_DIR/src/pcre2.h.generic",
"$PCRE2_LIB_DIR/src/pcre2_chartables.c",
"$PCRE2_LIB_DIR/src/pcre2_chartables.c.dist",
]
outputs = [
"${target_gen_dir}/src/pcre2_chartables.c",
@@ -51,6 +51,7 @@ pcre2_sources = [
"$PCRE2_LIB_DIR/src/pcre2_auto_possess.c",
"$PCRE2_LIB_DIR/src/pcre2_chkdint.c",
"$PCRE2_LIB_DIR/src/pcre2_compile.c",
"$PCRE2_LIB_DIR/src/pcre2_compile_class.c",
"$PCRE2_LIB_DIR/src/pcre2_config.c",
"$PCRE2_LIB_DIR/src/pcre2_context.c",
"$PCRE2_LIB_DIR/src/pcre2_convert.c",
@@ -97,7 +98,7 @@ ohos_shared_library("libpcre2") {
"ramdisk",
"updater",
]
license_file = "$PCRE2_LIB_DIR/LICENCE"
license_file = "$PCRE2_LIB_DIR/LICENCE.md"
innerapi_tags = [
"platformsdk_indirect",
"chipsetsdk_sp_indirect",
@@ -119,7 +120,7 @@ ohos_static_library("libpcre2_static") {
"-DPCRE2_CODE_UNIT_WIDTH=8",
"-w",
]
license_file = "$PCRE2_LIB_DIR/LICENCE"
license_file = "$PCRE2_LIB_DIR/LICENCE.md"
part_name = "pcre2"
subsystem_name = "thirdparty"
}
@@ -139,7 +140,7 @@ ohos_static_library("libpcre2_static_16") {
"-DPCRE2_CODE_UNIT_WIDTH=16",
"-w",
]
license_file = "$PCRE2_LIB_DIR/LICENCE"
license_file = "$PCRE2_LIB_DIR/LICENCE.md"
part_name = "pcre2"
subsystem_name = "thirdparty"
}
+23
View File
@@ -112,6 +112,8 @@
<filteritem type="filepath" name="pcre2/maint/pcre2_chartables.c.non-standard" desc="InvalidCopyright"/>
<filteritem type="filepath" name="pcre2/autogen.sh" desc="InvalidCopyright"/>
<filteritem type="filepath" name="pcre2/maint/utf8.c" desc="InvalidCopyright"/>
<filteritem type="filepath" name="pcre2/maint/CleanTxt" desc="InvalidCopyright"/>
<filteritem type="filepath" name="pcre2/maint/132html" desc="InvalidCopyright"/>
<filteritem type="filepath" name="pcre2/pcre2_fuzzer.dict" desc="InvalidCopyright"/>
<filteritem type="filepath" name="pcre2/pcre2_fuzzer.options" desc="InvalidCopyright"/>
<filteritem type="filepath" name="pcre2/maint/ucptestdata/testinput1" desc="InvalidCopyright"/>
@@ -123,9 +125,27 @@
<filteritem type="filepath" name="pcre2/WORKSPACE.bazel" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/BUILD.bazel" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/.bazelrc" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/.gitmodules" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="BUILD.gn" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/pcre2_fuzzer_32.dict" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/GenerateUcpTables.py" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/UpdateDates.py" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/GenerateTest.py" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/CheckMan" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/RunManifestTest" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/RunPerlTest" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/CheckTxt" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/PrepareRelease" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/manifest-cmakeinstall-windows" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/manifest-cmakeinstall-linux" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/UpdateRelease.py" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/Detrail" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/manifest-cmakeinstall-macos" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/manifest-makeinstall-freebsd" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/UpdateCommon.py" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/manifest-makeinstall-linux" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/RunManifestTest.ps1" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/maint/manifest-tarball" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/pcre2_fuzzer_32.options" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="copy_generic_files.sh" desc="Copyright Header Invalid: NULL"/>
<filteritem type="filepath" name="pcre2/build.zig" desc="Copyright Header Invalid: NULL"/>
@@ -172,6 +192,9 @@
<filteritem type="filepath" name="pcre2/testdata/testoutput10" desc="already checked"/>
<filteritem type="filepath" name="pcre2/testdata/testbtables" desc="already checked"/>
<filteritem type="filepath" name="pcre2/testdata/testinput10" desc="already checked"/>
<filteritem type="filepath" name="pcre2/testdata/grepoutputUN" desc="already checked"/>
<filteritem type="filepath" name="pcre2/testdata/grepinputBad8" desc="already checked"/>
<filteritem type="filepath" name="pcre2/testdata/grepinputBad8_Trail" desc="already checked"/>
</filefilter>
</filefilterlist>
+3 -3
View File
@@ -1,9 +1,9 @@
[
{
"Name": "PCRE2",
"License": "BSD 3-Clause License WITH pcre2 exception",
"License File": "pcre2/LICENSE",
"Version Number": "pcre2-10.44",
"License": "BSD-3-Clause WITH PCRE2-exception",
"License File": "pcre2/LICENSE.md",
"Version Number": "pcre2-10.46",
"Owner": "maliang34@huawei.com",
"Upstream URL": "https://github.com/PhilipHazel/pcre2.git",
"Description": "pcre2 is a re_working of the original PCRE1 library to provide an entirely new API."
+1 -1
View File
@@ -12,7 +12,7 @@ function check_md5_and_copy() {
fi
}
mkdir -P $pcre2_lib_dir/src
mkdir -P $pcre2_gen_dir/src
check_md5_and_copy $pcre2_lib_dir/src/config.h.generic $pcre2_gen_dir/src/config.h
check_md5_and_copy $pcre2_lib_dir/src/pcre2.h.generic $pcre2_gen_dir/src/pcre2.h
check_md5_and_copy $pcre2_lib_dir/src/pcre2_chartables.c.dist $pcre2_gen_dir/src/pcre2_chartables.c
-3
View File
@@ -1,3 +0,0 @@
common --experimental_enable_bzlmod
build --incompatible_enable_cc_toolchain_resolution
build --incompatible_strict_action_env
+2
View File
@@ -0,0 +1,2 @@
testdata/* -text
maint/manifest-* -text
+49
View File
@@ -0,0 +1,49 @@
codecov:
strict_yaml_branch: default
require_ci_to_pass: false
notify:
wait_for_ci: false
notify_error: true
coverage:
range: 75..90
round: nearest
precision: 2
status:
project: false
patch:
default:
target: 100%
threshold: 5%
github_checks:
annotations: false
comment: false
# layout: "condensed_header, condensed_files, condensed_footer"
# hide_project_coverage: true
# require_head: true
# require_base: true
# require_changes: "coverage_drop OR uncovered_patch"
component_management:
individual_components:
- component_id: library
name: "Core library"
paths:
- '!src/(pcre2test|pcre2grep|pcre2_jit_test|pcre2posix_test|pcre2_printint)\.c'
statuses:
- type: project
target: auto
threshold: 0.5%
- component_id: test_binaries
name: "Test binaries"
paths:
- 'src/(pcre2test|pcre2grep|pcre2_jit_test|pcre2posix_test|pcre2_printint)\.c'
statuses:
- type: project
target: auto
threshold: 2%
+6
View File
@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: monthly
+281 -50
View File
@@ -1,23 +1,37 @@
name: Build
on: [push, pull_request]
on:
workflow_dispatch:
push:
branches: [ master, "release/**" ]
pull_request:
branches: [ master ]
permissions:
contents: read
jobs:
linux:
name: Linux
runs-on: ubuntu-latest
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install zlib1g-dev libbz2-dev
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CPPFLAGS='-Wall -Wextra' --enable-jit --enable-pcre2-16 --enable-pcre2-32
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-pcre2grep-libz --enable-pcre2grep-libbz2
- name: Build
run: make -j2
run: make -j3 CPPFLAGS='-Wall -Wextra -Werror'
- name: Test (main test script)
run: ./RunTest
@@ -31,25 +45,32 @@ jobs:
- name: Test (pcre2posix program)
run: ./pcre2posix_test -v
- name: Install
run: |
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
alpine:
name: alpine
runs-on: ubuntu-latest
container: alpine
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup
run: apk add --no-cache automake autoconf gcc libtool make musl-dev #musl-locales
run: apk add --no-cache automake autoconf gcc libtool make musl-dev git zlib zlib-dev bzip2 bzip2-dev #musl-locales
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CPPFLAGS='-Wall -Wextra' --enable-jit --enable-pcre2-16 --enable-pcre2-32
run: ./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-pcre2grep-libz --enable-pcre2grep-libbz2
- name: Build
run: make -j2
run: make -j3 CPPFLAGS='-Wall -Wextra -Werror'
- name: Test (main test script)
run: ./RunTest
@@ -63,54 +84,264 @@ jobs:
- name: Test (pcre2posix program)
run: ./pcre2posix_test -v
- name: Install
run: |
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
macos:
name: macOS universal
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_C_FLAGS='-Wall -Wextra' -B build
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_SUPPORT_LIBZ=ON -DPCRE2_SUPPORT_LIBBZ2=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_C_FLAGS='-Wall -Wextra' -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
- name: Build
run: cmake --build build
- name: Test (main test script)
run: |
cd build
../RunTest
- name: Test (JIT test program)
run: |
cd build
./pcre2_jit_test
- name: Test (pcre2grep test script)
run: |
cd build
../RunGrepTest
- name: Test (pcre2posix program)
run: |
cd build
./pcre2posix_test -v
windows:
name: 32bit Windows
runs-on: windows-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DCMAKE_IGNORE_PREFIX_PATH=C:/Strawberry/c -B build -A Win32
- name: Build
run: cmake --build build
run: cd build && make -j3
- name: Test
run: cd build && ctest -j3 --output-on-failure
- name: Install
run: |
cd build\Debug
..\..\RunTest.bat
./pcre2posix_test -v
cd build
cmake --install . --prefix install-dir
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-macos
windows:
name: Windows
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
arch: ["Win32", "x64"]
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A ${{ matrix.arch }}
- name: Build
run: cmake --build build --config Release
- name: Test
run: cd build && ctest -C Release -j3 --output-on-failure
- name: Install
run: |
cd build
cmake --install . --config Release --prefix install-dir
../maint/RunManifestTest.ps1 install-dir ../maint/manifest-cmakeinstall-windows
freebsd:
name: FreeBSD
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Build & test
uses: vmactions/freebsd-vm@debf37ca7b7fa40e19c542ef7ba30d6054a706a4 # v1.1.5
with:
usesh: true
run: |
set -e
./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32
make -j3 CPPFLAGS='-Wall -Wextra -Werror'
make check
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-freebsd
solaris:
name: Solaris
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Transfer Oracle Studio certificates
env:
PKG_ORACLE_COM_CERTIFICATE_PEM: ${{ secrets.PKG_ORACLE_COM_CERTIFICATE_PEM }}
PKG_ORACLE_COM_KEY_PEM: ${{ secrets.PKG_ORACLE_COM_KEY_PEM }}
run: |
printenv PKG_ORACLE_COM_CERTIFICATE_PEM > pkg.oracle.com.certificate.pem
printenv PKG_ORACLE_COM_KEY_PEM > pkg.oracle.com.key.pem
- name: Prepare
run: ./autogen.sh
- name: Build & test
uses: vmactions/solaris-vm@a89b9438868c70db27e41625f0a5de6ff5e90809 # v1.1.0
with:
usesh: true
# Seriously! Solaris is the only OS to actually ship without a C
# compiler, and not even to provide a simple download to get one!
# You have to actually register with Oracle to get an X.509
# certificate before you can even download their compiler. Whatever.
prepare: |
cp "$GITHUB_WORKSPACE/pkg.oracle.com.key.pem" /root/pkg.oracle.com.key.pem
cp "$GITHUB_WORKSPACE/pkg.oracle.com.certificate.pem" /root/pkg.oracle.com.certificate.pem
sudo pkg set-publisher \
-k /root/pkg.oracle.com.key.pem \
-c /root/pkg.oracle.com.certificate.pem \
-G "*" -g https://pkg.oracle.com/solarisstudio/release solarisstudio
pkg install developer/build/make system/header
pkg install --accept developerstudio-126/cc
run: |
set -e
PATH=/opt/developerstudio12.6/bin:"$PATH"
export PATH
CC=cc
export CC
./configure --enable-jit --enable-pcre2-16 --enable-pcre2-32
make CPPFLAGS='-Wall -Wextra -Werror'
make check
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
distcheck:
name: Build & verify distribution
runs-on: ubuntu-24.04 # TODO: Update to ubuntu-latest when that switches to 24.04
permissions:
id-token: write # Needed to make calls to the Sigstore service
attestations: write # Needed to write the attestation to GitHub's database
contents: read
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: |
./autogen.sh
# Workaround for incorrect filesystem permissions on /usr/share/aclocal, which
# causes the m4 macros to be copied with incorrect permissions.
chmod u=rw,go=r m4/*.m4
- name: Configure
run: ./configure
- name: Distcheck
run: make distcheck -j3
- name: Manifest
run: |
mkdir tarball-dir
tar -C tarball-dir -xzf pcre2-*.tar.gz
# Budge the directory, so we don't back the version number into the
# `manifest-tarball` file:
mv tarball-dir/pcre2-* tarball-dir/pcre2-SNAPSHOT
maint/RunManifestTest tarball-dir maint/manifest-tarball
- name: Upload to GitHub artifacts
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: "Distribution release"
path: |
pcre2-*.tar.bz2
pcre2-*.tar.gz
pcre2-*.zip
if-no-files-found: error
- name: Attest
uses: actions/attest-build-provenance@7668571508540a607bdfd90a87a560489fe372eb # v2.1.0
if: |
github.event_name != 'pull_request' &&
(startsWith(github.ref, 'refs/heads/release/') ||
startsWith(github.ref, 'refs/tags/pcre2-'))
with:
subject-path: 'pcre2-*.tar.bz2, pcre2-*.tar.gz, pcre2-*.zip'
coverage:
name: Code coverage
runs-on: ubuntu-latest
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install zlib1g-dev libbz2-dev libedit-dev
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: CC="clang -fprofile-instr-generate -fcoverage-mapping" cmake -DCMAKE_BUILD_TYPE=Debug -DPCRE2_DEBUG=OFF -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_SUPPORT_LIBZ=ON -DPCRE2_SUPPORT_LIBBZ2=ON -DPCRE2_SUPPORT_LIBEDIT=ON -DPCRE2_SUPPORT_LIBREADLINE=OFF -B build
- name: Build
run: cd build && make -j3
- name: Test
run: cd build && LLVM_PROFILE_FILE="coverage-%m.profraw" ctest -j1 --output-on-failure
- name: Report
run: |
LLVM_VER=`clang --version | head -n1 | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f1`
echo "Using LLVM version $LLVM_VER"
# Merge the profiles gathered
cd build
llvm-profdata-$LLVM_VER merge -sparse coverage-*.profraw -o coverage.profdata
# Output HTML, for archiving and browsing later
llvm-cov-$LLVM_VER show \
-format=html -output-dir=coverage-report -show-line-counts-or-regions -show-branches=percent \
-instr-profile=coverage.profdata \
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
../src/ ./
# Output LCOV-compatible output, for downstream tools
llvm-cov-$LLVM_VER export \
-format=lcov \
-instr-profile=coverage.profdata \
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
../src/ ./ \
> ./coverage-lcov.info
# Output text summary to build log
echo '```' > "$GITHUB_STEP_SUMMARY"
llvm-cov-$LLVM_VER report \
-instr-profile=coverage.profdata \
./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
../src/ ./ \
>> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
- name: Upload report to GitHub artifacts
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: "Coverage report"
path: './build/coverage-report'
if-no-files-found: error
- name: Upload report to Codecov
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
disable_search: true
files: ./build/coverage-lcov.info
+11 -4
View File
@@ -1,23 +1,30 @@
name: CIFuzz
on: [pull_request]
on:
workflow_dispatch:
pull_request:
branches: [ master ]
permissions:
contents: read
jobs:
Fuzzing:
runs-on: ubuntu-latest
steps:
- name: Build Fuzzers
id: build
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@3d38acd485bc848e33396e7523b9a4f2aff9027e # master
with:
oss-fuzz-project-name: 'pcre2'
dry-run: false
- name: Run Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@3d38acd485bc848e33396e7523b9a4f2aff9027e # master
with:
oss-fuzz-project-name: 'pcre2'
fuzz-seconds: 300
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts
+71
View File
@@ -0,0 +1,71 @@
name: Clang Static Analyzer
on:
workflow_dispatch:
push:
branches: [ master, "release/**" ]
pull_request:
branches: [ master ]
permissions:
contents: read
jobs:
Analyze:
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
contents: read
env:
# The @microsoft/sarif-multitool tool actually uses DotnetCore, which in
# turn aborts when it finds that GitHub's CI machine doesn't have ICU.
# Just turn off localisation. A future version of the ubuntu-24.04 or
# ubuntu-latest runners might not need this workaround.
DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install ninja-build clang-tools
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: |
mkdir build
cd build
scan-build cmake -G Ninja -DPCRE2_SUPPORT_JIT=ON -DCMAKE_BUILD_TYPE=Debug ..
- name: Build
run: |
# Inefficiently run clang scan twice; once to generate HTML, and secondly
# to generate SARIF files. Ideally we would have some way to scan once and
# generate one of those outputs from the other, but I don't know a good way
# to do that.
cd build
scan-build -o clang-report/ ninja
ninja clean
scan-build -o clang-sarif -sarif ninja
# Work around issue in GitHub's SARIF ingestion - merge all SARIF files into one
npx -y @microsoft/sarif-multitool merge clang-sarif/*/*.sarif --output-file=clang.sarif
# Upload the browsable HTML report as an artifact.
- name: Upload report
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: "Clang Static Analyzer report"
path: './build/clang-report'
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
with:
sarif_file: build/clang.sarif
category: clang-analyzer
+13 -9
View File
@@ -13,39 +13,43 @@ name: "CodeQL"
on:
push:
branches: [ master ]
branches: [ master, "release/**" ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master ]
schedule:
- cron: '27 6 * * 4'
# Declare default permissions as read only.
permissions: read-all
permissions:
contents: read
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'cpp', 'python' ]
language: [ 'cpp' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -56,7 +60,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
# ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -70,4 +74,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
+312 -15
View File
@@ -1,24 +1,36 @@
name: Dev
on:
workflow_dispatch:
push:
branches: [ master, "release/**" ]
pull_request:
branches:
- master
branches: [ master ]
permissions:
contents: read
jobs:
canary:
name: gcc
# Tests with: Debug & assertions; link-size=4; libedit
name: GCC -O0
runs-on: ubuntu-latest
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install libedit-dev
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CC='gcc -O0 -fsanitize=undefined,address -fsanitize-undefined-trap-on-error' CPPFLAGS='-Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --with-link-size=4
run: ./configure CC='gcc -fsanitize=undefined,address -fsanitize-undefined-trap-on-error' CFLAGS='-O0 -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --enable-pcre2test-libedit --with-link-size=4
- name: Build
run: make -j3
@@ -36,23 +48,32 @@ jobs:
run: ./pcre2posix_test -v
dragon:
name: clang
# Tests with: clang AB/UB; link-size=3
name: Clang
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
opt: ["-O0", "-O2"]
steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CC='clang -fsanitize=undefined,address,integer -fno-sanitize=unsigned-integer-overflow' CPPFLAGS='-Wall -Wextra -Werror -Wno-error=unused-but-set-parameter -Wno-error=deprecated-declarations -Wno-error=incompatible-library-redeclaration' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --with-link-size=3
run: ./configure CC='clang -fsanitize=undefined,address,integer -fno-sanitize-recover=undefined,integer -fno-sanitize=unsigned-integer-overflow,unsigned-shift-base,function' CFLAGS='${{ matrix.opt }} -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter -Wno-error=deprecated-declarations -Wno-error=incompatible-library-redeclaration -Wno-error=incompatible-pointer-types-discards-qualifiers' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug --with-link-size=3
- name: Build
run: make -j3
- name: Test (main test script)
run: ./RunTest
run: |
ulimit -S -s 49152 # Raise stack limit; ASAN with -O0 is very stack-hungry
./RunTest
- name: Test (JIT test program)
run: ./pcre2_jit_test
@@ -63,19 +84,295 @@ jobs:
- name: Test (pcre2posix program)
run: ./pcre2posix_test -v
greatawk:
# Tests with: GCC, -O3, oldest supported Ubuntu (in non-extended support)
name: GCC -O3
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
- name: Build
run: cd build && make -j3
- name: Test
run: cd build && ctest -j3 --output-on-failure
- name: Install
run: |
cd build
cmake --install . --prefix install-dir
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-linux
dodo:
# Tests with: Autconf on oldest supported Ubuntu (in non-extended support)
name: GCC -Os, old Autotools
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Prepare
run: ./autogen.sh
- name: Configure
run: ./configure CFLAGS='-Os -Wall -Wextra -Werror -Wno-error=unused-but-set-parameter' --enable-jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug
- name: Build
run: make -j3
- name: Test
run: make check
- name: Install
run: |
make install "DESTDIR=`pwd`/install-dir"
maint/RunManifestTest install-dir maint/manifest-makeinstall-linux
wasp:
# Tests with: French locale; oldest supported CMake; no JIT; -Os; libreadline
name: GCC -Os, CMake+ninja, no JIT
runs-on: ubuntu-latest
env:
CMAKE_VER: "3.15.7"
steps:
- name: Setup
run: |
sudo apt-get -qq update
sudo apt-get -qq install language-pack-fr ninja-build libreadline-dev
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Cache CMake
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
key: cmake-${{ env.CMAKE_VER }}-Linux-x86_64
path: cmake-${{ env.CMAKE_VER }}-Linux-x86_64.tar.gz
- name: Install CMake
run: |
[ -f cmake-${CMAKE_VER}-Linux-x86_64.tar.gz ] || curl -L -S -O "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-Linux-x86_64.tar.gz"
tar -xz -f cmake-${CMAKE_VER}-Linux-x86_64.tar.gz
realpath "cmake-${CMAKE_VER}-Linux-x86_64/bin" >> "$GITHUB_PATH"
- name: Configure
run: |
cmake --version | grep "version ${CMAKE_VER}" || (echo "CMake version mismatch" && exit 1)
CC='clang' CFLAGS='-fsanitize=undefined,address,integer -fno-sanitize-recover=undefined,integer -fno-sanitize=unsigned-shift-base,function -pedantic -Wall -Wextra -Wpedantic -Wdeclaration-after-statement -Wshadow -Wno-overlength-strings -Werror -Wno-error=incompatible-pointer-types-discards-qualifiers' cmake -G Ninja -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DPCRE2_DEBUG=ON -DPCRE2_SUPPORT_LIBREADLINE=ON -DCMAKE_BUILD_TYPE=MinSizeRel -B build
- name: Build
run: ninja -C build
- name: Test
run: cd build && ctest -j3 --output-on-failure
- name: Install
run: |
cd build
cmake --install . --prefix install-dir
../maint/RunManifestTest install-dir ../maint/manifest-cmakeinstall-linux
bat:
# Tests with: MSVC 32-bit, and a variety of CMake options
name: Windows (Win32)
runs-on: windows-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2GREP_SUPPORT_CALLOUT_FORK=OFF -DPCRE2_DEBUG=ON -DPCRE2_NEWLINE=ANYCRLF -DPCRE2_STATIC_PIC=ON -DPCRE2_STATIC_RUNTIME=ON -DPCRE2_SUPPORT_BSR_ANYCRLF=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A Win32
- name: Build
run: cmake --build build --config RelWithDebInfo
- name: Test
run: cd build && ctest -C RelWithDebInfo -j3 --output-on-failure
pterodactyl:
# Tests with: MSVC 64-bit, Debug, shared libraries
name: Windows (x64)
runs-on: windows-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_DEBUG=ON -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=OFF -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -B build -A x64
- name: Build
run: cmake --build build --config Debug
- name: Test
run: cd build && ctest -C Debug -j3 --output-on-failure
bigbird:
# Job to execute ManyConfigTests
name: manyconfig
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Prepare
- name: Setup
run: |
sudo apt-get update
sudo apt-get install -y valgrind
sudo apt-get -qq update
sudo apt-get -qq install -y valgrind
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Run
run: |
./autogen.sh
./maint/ManyConfigTests
camel:
# Job to execute RunPerlTest
name: perl
runs-on: ubuntu-latest
container: perl:devel
steps:
- name: Setup
run: |
apt-get -qq update
apt-get -qq install cmake ninja-build
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: yes
- name: Configure
run: cmake -G Ninja -B build -DPCRE2_BUILD_PCRE2_8=OFF -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_NEVER_BACKSLASH_C=ON -DPCRE2_DEBUG=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
- name: Build
run: ninja -C build
- name: Test
run: |
cd build
ctest -j3 --output-on-failure
cd ..
perl -v
maint/RunPerlTest
chaffinch:
# Job to verify that the CMake "unity" build (single-file / jumbo build) passes.
# If this fails, it's usually because two different files define some file-static
# functions or macros which collide.
name: CMake unity build
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Configure
run: cmake -DCMAKE_UNITY_BUILD=ON -DCMAKE_UNITY_BUILD_BATCH_SIZE=0 -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
- name: Build
run: cd build && make -j3
- name: Test
run: cd build && ctest -j3 --output-on-failure
zebrilus:
# Tests with: Zig compiler
name: Zig
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
steps:
- name: Setup
run: |
sudo snap install zig --classic --beta
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Build
run: zig build
- name: Test
run: |
# Zig does something weird with the stack - it uses more space than the
# equivalent plain C program.
ulimit -S -s 16384
srcdir=`pwd` pcre2test=`pwd`/zig-out/bin/pcre2test ./RunTest
bazel:
# Tests with: Bazel build system
name: Bazel
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest", "windows-latest"]
runs-on: ${{ matrix.os }}
if: github.event_name != 'pull_request'
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
- name: Build
run: bazelisk build //... --enable_runfiles --incompatible_strict_action_env
- name: Test
run: bazelisk test //... --enable_runfiles --incompatible_strict_action_env --test_output=all
heron:
# Job to verify that the tasks performed by PrepareRelease have been done. It is
# the committer's responsibility (currently) to run PrepareRelease themselves when
# making a PR, so that everything is kept in-sync.
name: Check autogenerated file freshness
runs-on: ubuntu-24.04 # TODO: Update to ubuntu-latest when that switches to 24.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: false
fetch-depth: 0
fetch-tags: false
- name: PrepareRelease
run: maint/PrepareRelease
- name: 'Rebuild *.h.generic'
run: |
./autogen.sh && ./configure
rm -f src/*.generic
make src/config.h.generic src/pcre2.h.generic
# Workaround for incorrect filesystem permissions on /usr/share/aclocal, which
# causes the m4 macros to be copied with incorrect permissions.
chmod u=rw,go=r m4/*.m4
- name: Working directory clean
run: |
if [ -n "`git status --porcelain`" ] ; then
(
echo "Dirty working tree! Affected files:"
git status --porcelain || true
echo ""
echo "Diff:"
git diff || true
) >&2
exit 1
fi
+11 -8
View File
@@ -1,5 +1,6 @@
name: Scorecards supply-chain security
on:
workflow_dispatch:
# Only the default branch is supported.
branch_protection_rule:
schedule:
@@ -7,33 +8,34 @@ on:
push:
branches: [ master ]
# Declare default permissions as read only.
permissions: read-all
jobs:
analysis:
name: Scorecards analysis
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
# Needed to publish the results to Scorecard's service.
id-token: write
actions: read
contents: read
steps:
- name: "Checkout code"
uses: actions/checkout@v4
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@3e15ea8318eee9b333819ec77a36aca8d39df13e # tag=v1.1.1
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # tag=v2.4.0
with:
results_file: results.sarif
results_format: sarif
# Read-only PAT token. To create it,
# follow the steps in https://github.com/ossf/scorecard-action#pat-token-creation.
repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
# repo_token: ${{ secrets.GITHUB_TOKEN }}
# Publish the results to enable scorecard badges. For more details, see
# https://github.com/ossf/scorecard-action#publishing-results.
# For private repositories, `publish_results` will automatically be set to `false`,
@@ -42,7 +44,7 @@ jobs:
# Upload the results as artifacts (optional).
- name: "Upload artifact"
uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2.3.1
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: SARIF file
path: results.sarif
@@ -50,6 +52,7 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # v1.0.26
uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
with:
sarif_file: results.sarif
category: ossf-scorecard
+6 -5
View File
@@ -1,6 +1,7 @@
# Public .gitignore file for PCRE2
build/
build-*/
*.a
*.gcda
@@ -17,7 +18,6 @@ __pycache__
.deps
.libs
INSTALL
Makefile
Makefile.in
RunGrepTest.log
@@ -74,6 +74,7 @@ testtemp1grep
testtemp2
testtemp2grep
testtry
testtry2
testtrygrep
testSinput
testbtables
@@ -86,19 +87,19 @@ m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
maint/ucptest
maint/utf8
src/.deps
src/.dirstamp
src/config.h
src/config.h.in
src/pcre2.h
src/pcre2_chartables.c
src/stamp-h1
/bazel-*
*.bazel.lock
zig-out/
zig-cache/
.zig-cache/
# End
# End
+3
View File
@@ -0,0 +1,3 @@
[submodule "deps/sljit"]
path = deps/sljit
url = https://github.com/zherczeg/sljit.git
-36
View File
@@ -1,36 +0,0 @@
THE MAIN PCRE2 LIBRARY CODE
---------------------------
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
Retired from University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2024 University of Cambridge
All rights reserved
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
--------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2024 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2024 Zoltan Herczeg
All rights reserved.
####
+200
View File
@@ -0,0 +1,200 @@
PCRE2 Authorship and Contributors
=================================
COPYRIGHT
---------
Please see the file [LICENCE](./LICENCE.md) in the PCRE2 distribution for
copyright details.
MAINTAINERS
-----------
The PCRE and PCRE2 libraries were authored and maintained by Philip Hazel.
Since 2024, the contributors with administrator access to the project are now
Nicholas Wilson and Zoltán Herczeg. See the file [SECURITY](./SECURITY.md) for
GPG keys.
Both administrators are volunteers acting in a personal capacity.
<table>
<thead>
<tr>
<th>Name</th>
<th>Role</th>
<tr>
</thead>
<tbody>
<tr>
<td>
Nicholas Wilson<br/>
`nicholas@nicholaswilson.me.uk`<br/>
Currently of Microsoft Research Cambridge, UK
</td>
<td>
* General project administration & maintenance
* Release management
* Code maintenance
</td>
</tr>
<tr>
<td>
Zoltán Herczeg<br/>
`hzmester@freemail.hu`<br/>
Currently of the University of Szeged, Hungary
</td>
<td>
* Code maintenance
* Ownership of `sljit` and PCRE2's JIT
</td>
</tr>
</tbody>
</table>
CONTRIBUTORS
------------
Many others have participated and contributed to PCRE2 over its history.
The maintainers are grateful for all contributions and participation over the
years. We apologise for any names we have forgotten.
We are especially grateful to Philip Hazel, creator of PCRE and PCRE2, and
maintainer from 1997 to 2024.
All names listed alphabetically.
### Contributors to PCRE2
This list includes names up until the PCRE2 10.44 release. New names will be
added from the Git history on each release.
Scott Bell
Carlo Marcelo Arenas Belón
Edward Betts
Jan-Willem Blokland
Ross Burton
Dmitry Cherniachenko
Alexey Chupahin
Jessica Clarke
Alejandro Colomar
Jeremie Courreges-Anglas
Addison Crump
Alex Dowad
Daniel Engberg
Daniel Richard G
David Gaussmann
Andrey Gorbachev
Jordan Griege
Jason Hood
Bumsu Hyeon
Roy Ivy
Martin Joerg
Guillem Jover
Ralf Junker
Ayesh Karunaratne
Michael Kaufmann
Yunho Kim
Joshua Kinard
David Korczynski
Uwe Korn
Jonas Kvinge
Kristian Larsson
Kai Lu
Behzod Mansurov
B. Scott Michel
Nathan Moinvaziri
Mike Munday
Marc Mutz
Fabio Pagani
Christian Persch
Tristan Ross
William A Rowe Jr
David Seifert
Yaakov Selkowitz
Rich Siegel
Karl Skomski
Maciej Sroczyński
Wolfgang Stöggl
Thomas Tempelmann
Greg Thain
Lucas Trzesniewski
Theodore Tsirpanis
Matthew Vernon
Rémi Verschelde
Thomas Voss
Ezekiel Warren
Carl Weaver
Chris Wilson
Amin Yahyaabadi
Joe Zhang
### Contributors to PCRE1
These people contributed either by sending patches or reporting serious issues.
Irfan Adilovic
Alexander Barkov
Daniel Bergström
David Burgess
Ross Burton
David Byron
Fred Cox
Christian Ehrlicher
Tom Fortmann
Lionel Fourquaux
Mike Frysinger
Daniel Richard G
Dair Gran
"Graycode" (Red Hat Product Security)
Viktor Griph
Wen Guanxing
Robin Houston
Martin Jerabek
Peter Kankowski
Stephen Kelly
Yunho Kim
Joshua Kinard
Carsten Klein
Evgeny Kotkov
Ronald Landheer-Cieslak
Alan Lehotsky
Dmitry V. Levin
Nuno Lopes
Kai Lu
Giuseppe Maxia
Dan Mooney
Marc Mutz
Markus Oberhumer
Sheri Pierce
Petr Pisar
Ari Pollak
Bob Rossi
Ruiger Rill
Michael Shigorin
Rich Siegel
Craig Silverstein (C++ wrapper)
Karl Skomski
Paul Sokolovsky
Stan Switzer
Ian Taylor
Mark Tetrode
Jeff Trawick
Steven Van Ingelgem
Lawrence Velazquez
Jiong Wang
Stefan Weber
Chris Wilson
Thanks go to Jeffrey Friedl for testing and debugging assistance.
+105 -8
View File
@@ -1,5 +1,6 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load("@bazel_skylib//rules:native_binary.bzl", "native_test")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
copy_file(
name = "config_h_generic",
@@ -19,7 +20,7 @@ copy_file(
out = "src/pcre2_chartables.c",
)
# Removed src/pcre2_ucptables.c below because it is #included in
# Removed src/pcre2_ucptables.c below because it is #included in
# src/pcre2_tables.c. Also fixed typo: ckdint should be chkdint.
# PH, 22-March-2023.
cc_library(
@@ -28,6 +29,7 @@ cc_library(
"src/pcre2_auto_possess.c",
"src/pcre2_chkdint.c",
"src/pcre2_compile.c",
"src/pcre2_compile_class.c",
"src/pcre2_config.c",
"src/pcre2_context.c",
"src/pcre2_convert.c",
@@ -35,6 +37,7 @@ cc_library(
"src/pcre2_error.c",
"src/pcre2_extuni.c",
"src/pcre2_find_bracket.c",
"src/pcre2_jit_compile.c",
"src/pcre2_maketables.c",
"src/pcre2_match.c",
"src/pcre2_match_data.c",
@@ -52,24 +55,118 @@ cc_library(
"src/pcre2_valid_utf.c",
"src/pcre2_xclass.c",
":pcre2_chartables_c",
],
hdrs = glob(["src/*.h"]) + [
"src/pcre2_compile.h",
"src/pcre2_internal.h",
"src/pcre2_intmodedep.h",
"src/pcre2_ucp.h",
"src/pcre2_util.h",
":config_h_generic",
],
textual_hdrs = [
"src/pcre2_jit_match.c",
"src/pcre2_jit_misc.c",
"src/pcre2_ucptables.c",
],
hdrs = [
":pcre2_h_generic",
],
defines = [
local_defines = [
"HAVE_CONFIG_H",
"HAVE_MEMMOVE",
"PCRE2_CODE_UNIT_WIDTH=8",
"PCRE2_STATIC",
"SUPPORT_UNICODE",
],
includes = ["src"],
strip_include_prefix = "src",
visibility = ["//visibility:public"],
)
cc_binary(
name = "pcre2demo",
srcs = ["src/pcre2demo.c"],
cc_library(
name = "pcre2-posix",
srcs = [
"src/pcre2posix.c",
":config_h_generic",
],
hdrs = [
"src/pcre2posix.h",
],
local_defines = [
"HAVE_CONFIG_H",
"HAVE_MEMMOVE",
"PCRE2_CODE_UNIT_WIDTH=8",
"PCRE2_STATIC",
"SUPPORT_UNICODE",
],
includes = ["src"],
strip_include_prefix = "src",
visibility = ["//visibility:public"],
deps = [":pcre2"],
)
# Totally weird issue in Bazel. It won't let you #include any files unless they
# are declared to the build system. OK, fair enough. But - for a cc_binary it
# uses the file extension to determine whether it's a header or a compilation
# unit. But... we have several .c files which are #included, rather than treated
# as a compilation unit.
#
# For cc_library() above, we can overcome this with textual_hdrs. But that
# doesn't work for cc_binary(). Here's our workaround.
#
# https://github.com/bazelbuild/bazel/issues/680
cc_library(
name = "pcre2test_dotc_headers",
hdrs = [
"src/pcre2_chkdint.c",
"src/pcre2_printint.c",
"src/pcre2_tables.c",
"src/pcre2_ucd.c",
"src/pcre2_valid_utf.c",
],
strip_include_prefix = "src",
visibility = ["//visibility:private"],
)
cc_binary(
name = "pcre2test",
srcs = [
"src/pcre2test.c",
":config_h_generic",
],
local_defines = [
"HAVE_CONFIG_H",
"HAVE_MEMMOVE",
"HAVE_STRERROR",
"PCRE2_STATIC",
"SUPPORT_UNICODE",
"SUPPORT_PCRE2_8",
] + select({
"@platforms//os:windows": [],
"//conditions:default": ["HAVE_UNISTD_H"],
}),
linkopts = select({
"@platforms//os:windows": ["-STACK:2500000"],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [":pcre2test_dotc_headers", ":pcre2", ":pcre2-posix"],
)
filegroup(
name = "testdata",
srcs = glob(["testdata/*"]),
)
native_test(
name = "pcre2_test",
src = select({
"@platforms//os:windows": "RunTest.bat",
"//conditions:default": "RunTest",
}),
out = select({
"@platforms//os:windows": "RunTest.bat",
"//conditions:default": "RunTest",
}),
data = [":pcre2test", ":testdata"],
size = "small",
)
+950 -780
View File
File diff suppressed because it is too large Load Diff
+204
View File
@@ -4,6 +4,210 @@ Change Log for PCRE2
Before the move to GitHub, this was the only record of changes to PCRE2. Now
there is also the log of commit messages.
Internal changes which are not visible to clients of the library are mostly not
listed here.
Version 10.46 27-August-2025
----------------------------
1. (#771) (CVE-2025-58050) Security fix to prevent a read-past-the-end memory
error, of arbitrary length. An attacker-controlled regex pattern is required,
and it cannot be triggered by providing crafted subject (match) text. The
(*ACCEPT) and (*scs:) pattern features must be used together.
Release 10.44 and earlier are not affected.
This could have implications of denial-of-service or information disclosure,
and could potentially be used to escalate other vulnerabilities in a system
(such as information disclosure being used to escalate the severity of an
unrelated bug in another system).
Version 10.45 05-February-2025
------------------------------
1. (#418) Change 6 of 10.44 broke 32-bit tests because pcre2test's reporting of
memory size was changed to the entire compiled data block, instead of just the
pattern and tables data, so as to align with the new length restriction.
Because the block's header contains pointers, this meant the pcre2test output
was different in 32-bit mode. A patch by Carlo reverts to the previous state
and makes sure that any limit set by pcre2_set_max_pattern_compiled_length()
also avoids the internal struct overhead.
2. (#416, #622) Updates to build.zig.
3. (#427, et al.) Various fixes to pacify static analyzers.
4. (#428) Add --posix-pattern-file to pcre2grep to allow processing of empty
patterns through the -f option, as well as patterns that end in space
characters, for compatibility with other grep tools.
5. (4fa5b8bd) Fix a bug in the fuzz support quantifier-limiting code. It ignores
strings of more than 5 digits because they are necessarily numbers greater than
65535, the largest legal quantifier. However, it wasn't ignoring non-significant
leading zeros.
6. (6d82f0cd) The case-independent processing of the letter-matching Unicode
properties Ll, Lt, and Lu have been changed to match Perl (which changed a while
ago). When caseless matching is in force, all three of these properties are now
treated as Lc (cased letter).
7. (#433) The pcre2_jit_compile() function was updated by the addition of a new
option PCRE2_JIT_TEST_ALLOC which, if called with a NULL first argument, tests
not only the availability of JIT, but also its ability to allocate executable
memory. Update pcre2test to use this support to extend the -C option.
8. (75b1025a) The code for parsing Unicode property descriptions for \p and \P
been changed as follows:
. White space etc. before ^ in a negated value such as \p{ ^L } was not being
ignored.
. The code wouldn't have worked if PCRE2 was compiled for UTF-8 support
within an EBCDIC environment. Possibly nobody does this any more, but it
should now work.
. The documentation of the syntax of what can follow \p and \P has been
updated.
9. (1c24ba01) There was an error in the table of lengths for parsed items for
the OPTIONS item, but fortuitously it could never have actually bitten. While
fixing this, some other code that could never be obeyed was discovered and
removed.
10. (674b6640) Removed some incorect optimization code from DFA matching that
has been there since PCRE1, but has just been found to cause a no match return
instead of a partial match in some cases. It involves partial matching when (*F)
is present so is unlikely to have actually affected anyone.
11. (b0f4ac17) Tidy the wording and formatting of some pcre2test error messages
concerned with bad modifiers. Also restrict single-letter modifier sequences to
the first item in a modifier list, as documented and always intended.
12. (1415565c) An iterator at the end of many assertions can always be
auto-possessified, but not at the end of variable-length lookbehinds. There was
a bug in the code that checks for such a lookbehind; it was looking only at the
first branch, which is wrong because some branches can be fixed length when
others are not, for example (?<=AB|CD?). Now all branches are checked for
variability.
13. (ead08288) Matching with pcre2_match() could give an incorrect result if a
variable-length lookbehind was used as the condition in a conditional group.
The condition could erroneously be treated as true if a branch matched but
overran the current position. This bug was in the interpreter only; matching
with JIT was correct.
14. (#443) Split out the sljit sub-project into a "Git submodule". Git users
must now run `git submodule init; git submodule update` after a Git checkout, or
the build will fail due to missing files in deps/sljit.
15. (#441) Add a new error code (PCRE2_ERROR_JIT_UNSUPPORTED) which is yielded
for unsupported jit features.
16. (#444) Fix bug in 'first code unit' and 'last code unit' optimization
combined with lookahead assertions.
17. (#445, #447, #449, #451, #452, #459, #563) Add a new feature called scan
substring. This feature is a new type of assertion which matches the content of
a capturing block to a sub-pattern.
18. (#450) Improvements to 'first code unit' / 'starting code units'
optimisation.
19. (#455) Many, many improvements to the JIT compiler.
20. Item 43 of 10.43 was incomplete because it addressed only \z and not \Z,
which was still misbehaving when matching fragments inside invalid UTF strings.
21. (d29e7290) Octal escapes of the form \045 or \111 were not being recognized
in substitution strings, and if encountered gave an error, though the \o{...}
form was recognized. This bug is now fixed.
22. (#463, #487) Fix 1 byte out-of-bounds read when parsing malformed limits
(e.g. LIMIT_HEAP)
23. Many improvements to test infrastructure. Many more platforms and
configurations are now run in Continuous Integration, and all the platforms now
run the full test suite, rather than a partial subset.
24. (#475) Implement title casing in substitution strings using Perl syntax.
25. (#478, #504) Disallow \x if not followed by { or a hex digit.
26. (#473) Implements Python-style backrefs in substitutions.
27. (#472) Fix error reporting for certain over-large octal escapes.
28. (#482) Fix parsing of named captures in replacement strings, allowing
non-ASCII capture names to be used.
29. (#477, #474, #488, #494, #496, #506, #508, #511, #518, #524, #540) Many
improvements to parsing and optimising of character classes.
30. (#483, #498) Add support for \g<n> and $<name> to replacement strings.
31. (#470) Add option flags PCRE2_EXTRA_NO_BS0 and PCRE2_EXTRA_PYTHON_OCTAL.
32. (#471) Add new API function pcre2_set_optimize() for controlling which
optimizations are enabled.
33. (#491) Adds $& $` $' and $_ to substitution replacements, as well as
interpreting \b and \v as characters.
34. (#499) Add option PCRE2_EXTRA_NEVER_CALLOUT to disable callouts.
35. (#503, #513) Update Unicode support to UCD 16.
36. (#512, #618, #638) Add new function pcre2_set_substitute_case_callout() to
allow clients to provide a custom callback with locale-aware case
transformation.
37. (#516) Fix case-insensitive matching of backreferences when using the
PCRE2_EXTRA_CASELESS_RESTRICT option.
38. (#519) In pcre2grep, add $& as an alias for $0
39. (c9bf8339, #534) Updated perltest.sh to enable locale setting.
40. (#521) Add support for Turkish I casefolding, using new options
PCRE2_EXTRA_TURKISH_CASING, and added pre-pattern flags (*TURKISH_CASING) and
(*CASELESS_RESTRICT).
41. (#523, #546, #547) Add support for UTS#18 compatible character classes,
using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a metacharacter
within character classes and the operators '&&', '--' and '~~', allowing
subtractions and intersections of character classes to be easily expressed.
42. (#553, #586, #596, #597) Add support for Perl-style extended character
classes, using the syntax (?[...]). This also allows expressing subtractions and
intersections of character classes, but using a different syntax to UTS#18.
43. (#554) Fixed a bug in JIT affecting greedy bounded repeats. The upper limit
of repeats inside a repeated bracket might be incorrectly checked.
44. (#556) Fixed a bug in JIT affecting caseful matching of backreferences. When
utf is disabled, and dupnames is enabled, caseless matching was used even
if caseful matching was needed.
45. (f34fc0a3) Fixed a bug in pcre2grep reported by Alejandro Colomar
<alx@kernel.org> (GitHub issue #577). In certain cases, when lines of above and
below context were contiguous, a separator line was incorrectly being inserted.
46. (#594) Fix a small (one/two byte) out-of-bounds read on invalid UTF-8 input
in pcre2grep.
47. (#370) Fix the INSTALL_MSVC_PDB CMake flag.
48. (#366) Install cmake files in prefix/lib/cmake/pcre2 rather than
prefix/cmake. The new CMake flag PCRE2_INSTALL_CMAKEDIR allows customising this
location.
49. (#624, #626, #628, #632, #639, #641) Reduce code size of generated JIT code
for repeated character classes.
50. (#623) Update the Bazel build files.
Version 10.44 07-June-2024
--------------------------
+135 -64
View File
@@ -21,41 +21,27 @@ form, and were quite restricted in what they could do by comparison with Perl.
The interesting part about the algorithm was that the amount of space required
to hold the compiled form of an expression was known in advance. The code to
apply an expression did not operate by backtracking, as the original Henry
Spencer code and current PCRE2 and Perl code does, but instead checked all
possibilities simultaneously by keeping a list of current states and checking
all of them as it advanced through the subject string. In the terminology of
Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a
traditional Finite State Machine (FSM). When the pattern was all used up, all
remaining states were possible matches, and the one matching the longest subset
of the subject string was chosen. This did not necessarily maximize the
individual wild portions of the pattern, as is expected in Unix and Perl-style
regular expressions.
Spencer code and the current PCRE2 pcre2_match() function and Perl code do, but
instead checked all possibilities simultaneously by keeping a list of current
states and checking all of them as it advanced through the subject string. In
the terminology of Jeffrey Friedl's book, it was a "DFA algorithm", though it
was not a traditional Finite State Machine (FSM). When the pattern was all used
up, all remaining states were possible matches, and the one matching the
longest subset of the subject string was chosen. This did not necessarily
maximize the individual wild portions of the pattern, as is expected in Unix
and Perl-style regular expressions.
Historical note 2
-----------------
By contrast, the code originally written by Henry Spencer (which was
subsequently heavily modified for Perl) compiles the expression twice: once in
a dummy mode in order to find out how much store will be needed, and then for
real. (The Perl version may or may not still do this; I'm talking about the
original library.) The execution function operates by backtracking and
maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
matches individual wild portions of the pattern. This is an "NFA algorithm" in
Friedl's terminology.
OK, here's the real stuff
-------------------------
For the set of functions that formed the original PCRE1 library in 1997 (which
are unrelated to those mentioned above), I tried at first to invent an
algorithm that used an amount of store bounded by a multiple of the number of
characters in the pattern, to save on compiling time. However, because of the
greater complexity in Perl regular expressions, I couldn't do this, even though
the then current Perl 5.004 patterns were much simpler than those supported
nowadays. In any case, a first pass through the pattern is helpful for other
reasons.
The code originally written by Henry Spencer (which was subsequently heavily
modified for Perl) compiles the expression twice: once in a dummy mode in order
to find out how much store will be needed, and then for real. (The Perl version
may or may not still do this; I'm talking about the original library.) The
execution function operates by backtracking and maximizing (or, optionally,
minimizing, in Perl) the amount of the subject that matches individual wild
portions of the pattern. This is an "NFA algorithm" in Friedl's terminology.
Support for 16-bit and 32-bit data strings
@@ -98,8 +84,8 @@ were also present in the 7.0 release).
A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there was a downside: compiling ran
more slowly than before (30% or more, depending on the pattern) because it now
did a full analysis of the pattern. My hope was that this would not be a big
issue, and in the event, nobody has commented on it.
did a full analysis of the pattern twice. My hope was that this would not be a
big issue, and in the event, nobody has commented on it.
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
(default 250, settable at build time) so as to put a limit on the amount of
@@ -119,7 +105,7 @@ memory.) The use of duplicate group numbers (the (?| facility) also caused
issues.
To get around these problems I adopted a new approach by adding a third pass
over the pattern (really a "pre-pass"), which did nothing other than identify
over the pattern (really a "pre-pass"), which does nothing other than identify
all the named subpatterns and their corresponding group numbers. This means
that the actual compile (both the memory-computing dummy run and the real
compile) has full knowledge of group names and numbers throughout. Several
@@ -154,17 +140,21 @@ assumption is made that there will be a callout for each pattern code unit
at the end. A default parsed pattern vector is defined on the system stack, to
minimize memory handling, but if this is not big enough, heap memory is used.
As before, the actual compiling function is run twice, the first time to
determine the amount of memory needed for the final compiled pattern. It
now processes the parsed pattern vector, not the pattern itself, although some
of the parsed items refer to strings in the pattern - for example, group
names. As escapes and comments have already been processed, the code is a bit
simpler than before.
If there are any lookbehinds in the pattern, the parsed pattern is scanned in
order to work out their lengths. Then the actual compiling function is run
twice, the first time to determine the amount of memory needed for the final
compiled pattern. The compiling function processes the parsed pattern vector,
not the pattern itself, although some of the parsed items refer to strings in
the pattern - for example, group names.
Most errors can be diagnosed during the parsing scan. For those that cannot
(for example, "lookbehind assertion is not fixed length"), the parsed code
contains offsets into the pattern so that the actual compiling code can
report where errors are.
Some post-processing of the compiled pattern takes place. If there are any
recursion or subroutine calls, there is a scan to convert them into offsets.
Then there are other scans to apply certain optimizations, some of which can be
disabled by setting appropriate options.
Most errors can be diagnosed during the parsing scan. For those that cannot,
the parsed code contains offsets into the pattern so that the actual compiling
code can report where the errors are.
The elements of the parsed pattern vector
@@ -209,6 +199,11 @@ META_RANGE_ESCAPED hyphen in class range with at least one escape
META_RANGE_LITERAL hyphen in class range defined literally
META_SKIP (*SKIP) - no argument (see below for with argument)
META_THEN (*THEN) - no argument (see below for with argument)
META_ECLASS_AND && (or &) in an extended character class
META_ECLASS_OR || (or |, +) in an extended character class
META_ECLASS_SUB -- (or -) in an extended character class
META_ECLASS_XOR ~~ (or ^) in an extended character class
META_ECLASS_NOT ! in an extended character class
The two RANGE values occur only in character classes. They are positioned
between two literals that define the start and end of the range. In an EBCDIC
@@ -240,11 +235,11 @@ occurrence is useful). On 64-bit systems this avoids using more than two parsed
pattern elements for items such as \3. The offset is used when an error occurs
because the reference is to a non-existent group.
META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
element contains the 16-bit type and data property values, packed together.
ESC_g and ESC_k are used only for named references - numerical ones are turned
into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed
by a length and an offset into the pattern to specify the name.
META_ESCAPE is used for escapes such as \d that match a character. It has an
ESC_xxx value as its data. For ESC_P and ESC_p, the next element contains the
16-bit type and data property values, packed together. Escape sequences such as
\g and \k are turned into other items like META_RECURSE or META_BACKREF and
their ESC_xxx values never occur with META_ESCAPE.
The following have one data item that follows in the next vector element:
@@ -268,15 +263,17 @@ META_COND_NAME (?(<name>) or (?('name') or (?(name)
META_COND_RNAME (?(R&name)
META_COND_RNUMBER (?(Rdigits)
META_RECURSE_BYNAME (?&name)
META_BACKREF_BYNAME \k'name'
META_BACKREF_BYNAME \k'name' or \k<name> or \k{name} or \g{name}
META_SCS_NAME (*scs:(<name>)...)
META_COND_RNUMBER is used for names that start with R and continue with digits,
because this is an ambiguous case. It could be a back reference to a group with
that name, or it could be a recursion test on a numbered group.
This one is followed by an offset, for use in error messages, then a number:
These are followed by an offset, for use in error messages, then a number:
META_COND_NUMBER (?([+-]digits)
META_SCS_NUMBER (*scs:(digits)...)
The following is followed just by an offset, for use in error messages:
@@ -286,7 +283,7 @@ The following are at first also followed just by an offset for use in error
messages. After the lengths of the branches of a lookbehind group have been
checked the error offset is no longer needed. The lower 16 bits of the main
word are now set to the maximum length of the first branch of the lookbehind
group, and the second word is set to the mimimum matching length for a
group, and the second word is set to the minimum matching length for a
variable-length lookbehind group, or to LOOKBEHIND_MAX for a group whose
branches are all of fixed length. These values are used when generating
OP_REVERSE or OP_VREVERSE for the first branch. The miminum value is also used
@@ -336,16 +333,28 @@ will use most of the time. If PCRE2 is compiled with just-in-time (JIT)
support, and studying a compiled pattern with JIT is successful, the JIT code
is run instead of the normal pcre2_match() code, but the result is the same.
The interpreter used to implement backtracking by means of recursive function
calls, but this gave rise to regular complaints when patterns with large search
trees ran out of stack. There was for a while a fudge that used the heap
instead, but this was inefficient and slow. In 2017 I re-wrote pcre2_match() as
a single, non-recursive function that implements backtracking via a vector of
"frames" on the heap, each frame representing a backtracking point. As well as
standard information such as the position in the pattern and position in the
subject, each frame has a number of unassigned variables that can be used
locally to preserve values at a backtracking point. C macros are used
extensively to implement all of this.
Supplementary matching function
-------------------------------
There is also a supplementary matching function called pcre2_dfa_match(). This
There is a supplementary matching function called pcre2_dfa_match() that
implements a DFA matching algorithm that searches simultaneously for all
possible matches that start at one point in the subject string. (Going back to
my roots: see Historical Note 1 above.) This function intreprets the same
compiled pattern data as pcre2_match(); however, not all the facilities are
available, and those that are do not always work in quite the same way. See the
available, and those that are do not always work in quite the same way. In
particular, capturing parentheses and backreferences are not supported. See the
user documentation for details.
The algorithm that is used for pcre2_dfa_match() is not a traditional FSM,
@@ -361,8 +370,10 @@ Changeable options
The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and
some others may be changed in the middle of patterns by items such as (?i).
Their processing is handled entirely at compile time by generating different
opcodes for the different settings. The runtime functions do not need to keep
track of an option's state.
opcodes for the different settings. Some options are copied into the opcode's
data, for opcodes such as OP_REFI which depends on the (?r)
(PCRE2_EXTRA_CASELESS_RESTRICT) option. The runtime functions do not need to
keep track of an option's state.
PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
are tracked and processed during the parsing pre-pass. The others are handled
@@ -383,10 +394,10 @@ within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
default value for LINK_SIZE is 2, except for the 32-bit library, where it can
only be 4. The 8-bit library can be compiled to use 3-byte or 4-byte values,
and the 16-bit library can be compiled to use 4-byte values, though this
impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries is
necessary only when patterns whose compiled length is greater than 65535 code
units are going to be processed. When a LINK_SIZE value uses more than one code
unit, the most significant unit is first.
impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries
is necessary only when patterns whose compiled length is greater than 65535
code units are going to be processed. When a LINK_SIZE value uses more than one
code unit, the most significant unit is first.
In this description, we assume the "normal" compilation options. Data values
that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
@@ -396,7 +407,7 @@ that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
Opcodes with no following data
------------------------------
These items are all just one unit long:
These items are all just one code unit long:
OP_END end of pattern
OP_ANY match any one character other than newline
@@ -594,9 +605,13 @@ do.
For classes containing characters with values greater than 255 or that contain
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable
code points are less than 256, followed by a list of pairs (for a range) and/or
single characters and/or properties. In caseless mode, all equivalent
characters are explicitly listed.
code points are less than 256. After the bit map, the properties of the
character class are listed, if they are present. The items in the list
follows the declaration order of the pattern string. The property list
is followed by single characters and/or character ranges, if they are
present. The characters/ranges are sorted in ascending order, and at
least one non-matching character must be present between any two of
them. In caseless mode, all equivalent characters are explicitly listed.
OP_XCLASS is followed by a LINK_SIZE value containing the total length of the
opcode and its data. This is followed by a code unit containing flag bits:
@@ -618,6 +633,42 @@ When XCL_NOT is set, the bit map, if present, contains bits for characters that
are allowed (exactly as for OP_NCLASS), but the list of items that follow it
specifies characters and properties that are not allowed.
The meaning of the bitmap indicated by XCL_MAP is that, if one is present, then
it fully describes which code points < 256 match the class (without needing to
invert the check according to XCL_NOT); the other items in the OP_XCLASS need
not be consulted. However, if a bitmap is not present, then code points < 256
may still match, so the other items in the OP_XCLASS must be consulted.
For classes containing logical expressions, such as "[\p{Greek} && \p{Lu}]" for
"uppercase Greek letters", OP_ECLASS is used. The expression is encoded as a a
stack-based series of operands and operators, in Reverse Polish Notation. Like
an OP_XCLASS, the OP_ECLASS is first followed by a LINK_SIZE value containing
the total length of the opcode and its data. That is followed by a code unit
containing flags: currently just ECL_MAP indicating that a bit map is present.
There follows the bit map, if ECL_MAP is set. Finally, there is a sequence of
items that are either an operand or operator. Each item starts with a single
code unit containing its type:
ECL_AND AND; no additional data
ECL_OR OR; no additional data
ECL_XOR XOR; no additional data
ECL_NOT NOT; no additional data
ECL_XCLASS The additional data which follows ECL_XCLASS is the same as for
an OP_XCLASS, except that this data is preceded by ECL_XCLASS
rather than OP_XCLASS.
Because the OP_ECLASS has its own bitmap (if required), an
ECL_XCLASS should not contain a bitmap.
Additionally, there are two intermediate values used during compilation, but
these are folded away during generation of the opcode, and so never appear
inside an OP_ECLASS at match time. They are:
ECL_ANY match all characters; no additional data
ECL_NONE match no characters; no additional data
The meaning of the bitmap indicated by ECL_MAP is the same as XCL_MAP.
If the bitmap is present, all codepoints < 256 are checked against the bitmap.
Back references
---------------
@@ -631,6 +682,9 @@ generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index
required name, followed by the number of groups with the same name. The
matching code can then search for the first one that is set.
OP_REFI and OP_DNREFI are further followed by an item containing any
case-insensitivity flags.
Repeating character classes and back references
-----------------------------------------------
@@ -750,6 +804,16 @@ In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are
also the number of code units, but in UTF-8/16 mode each character may occupy
more than one code unit.
The "scan substring" assertion compiles as OP_ASSERT_SCS. This opcode is
followed by a list of arguments. Each argument is either an OP_CREF or
OP_DNCREF byte code sequence. The details of these sequences are described
in the next section.
For example (*scs:(1,'NAME')...PATTERN...) is translated to:
[OP_ASSERT_SCS] [OP_CREF] [OP_CREF] ...PATTERN... [OP_KET]
If 'NAME' is a duplicated name, the second [OP_CREF] is [OP_DNCREF] instead.
Conditional subpatterns
-----------------------
@@ -849,5 +913,12 @@ The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is
not a real opcode, but is used to check at compile time that tables indexed by
opcode are the correct length, in order to catch updating errors.
See also
--------
The file maint/README contains additional information.
Philip Hazel
November 2023
August 2024
+42 -33
View File
@@ -1,5 +1,8 @@
PCRE2 LICENCE
-------------
PCRE2 License
=============
| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception |
|---------|-------|
PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
@@ -16,40 +19,46 @@ optimize pattern matching. This is an optional feature that can be omitted when
the library is built.
THE BASIC LIBRARY FUNCTIONS
---------------------------
COPYRIGHT
---------
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
### The basic library functions
Retired from University of Cambridge Computing Service,
Cambridge, England.
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
Copyright (c) 1997-2024 University of Cambridge
All rights reserved.
Retired from University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2007 University of Cambridge
Copyright (c) 2007-2024 Philip Hazel
All rights reserved.
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
--------------------------------------
### PCRE2 Just-In-Time compilation support
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright(c) 2010-2024 Zoltan Herczeg
All rights reserved.
Copyright (c) 2010-2024 Zoltan Herczeg
All rights reserved.
### Stack-less Just-In-Time compiler
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright (c) 2009-2024 Zoltan Herczeg
All rights reserved.
Copyright(c) 2009-2024 Zoltan Herczeg
All rights reserved.
### All other contributions
Many other contributors have participated in the authorship of PCRE2. As PCRE2
has never required a Contributor Licensing Agreement, or other copyright
assignment agreement, all contributions have copyright retained by each
original contributor or their employer.
THE "BSD" LICENCE
@@ -58,16 +67,16 @@ THE "BSD" LICENCE
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notices,
this list of conditions and the following disclaimer.
* Redistributions of source code must retain the above copyright notices,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notices, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Redistributions in binary form must reproduce the above copyright
notices, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of any
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
* Neither the name of the University of Cambridge nor the names of any
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+2 -1
View File
@@ -1,8 +1,9 @@
module(
name = "pcre2",
version = "10.40",
version = "10.46",
compatibility_level = 1,
)
bazel_dep(name = "rules_cc", version = "0.0.1")
bazel_dep(name = "bazel_skylib", version = "1.2.1")
bazel_dep(name = "platforms", version = "0.0.4")
+58 -47
View File
@@ -10,12 +10,13 @@ AM_CPPFLAGS="-I$(srcdir)/src"
## Specify the documentation files that are distributed.
dist_doc_DATA = \
AUTHORS \
AUTHORS.md \
COPYING \
ChangeLog \
LICENCE \
LICENCE.md \
NEWS \
README \
SECURITY.md \
doc/pcre2.txt \
doc/pcre2-config.txt \
doc/pcre2grep.txt \
@@ -86,11 +87,13 @@ dist_html_DATA = \
doc/html/pcre2_set_max_pattern_length.html \
doc/html/pcre2_set_max_varlookbehind.html \
doc/html/pcre2_set_offset_limit.html \
doc/html/pcre2_set_optimize.html \
doc/html/pcre2_set_newline.html \
doc/html/pcre2_set_parens_nest_limit.html \
doc/html/pcre2_set_recursion_limit.html \
doc/html/pcre2_set_recursion_memory_management.html \
doc/html/pcre2_set_substitute_callout.html \
doc/html/pcre2_set_substitute_case_callout.html \
doc/html/pcre2_substitute.html \
doc/html/pcre2_substring_copy_byname.html \
doc/html/pcre2_substring_copy_bynumber.html \
@@ -185,11 +188,13 @@ dist_man_MANS = \
doc/pcre2_set_max_pattern_length.3 \
doc/pcre2_set_max_varlookbehind.3 \
doc/pcre2_set_offset_limit.3 \
doc/pcre2_set_optimize.3 \
doc/pcre2_set_newline.3 \
doc/pcre2_set_parens_nest_limit.3 \
doc/pcre2_set_recursion_limit.3 \
doc/pcre2_set_recursion_memory_management.3 \
doc/pcre2_set_substitute_callout.3 \
doc/pcre2_set_substitute_case_callout.3 \
doc/pcre2_substitute.3 \
doc/pcre2_substring_copy_byname.3 \
doc/pcre2_substring_copy_bynumber.3 \
@@ -272,6 +277,14 @@ EXTRA_DIST += \
NON-AUTOTOOLS-BUILD \
HACKING
# These are support files for building with Bazel or Zig
EXTRA_DIST += \
BUILD.bazel \
MODULE.bazel \
WORKSPACE.bazel \
build.zig
# These are support files for building under VMS
EXTRA_DIST += \
@@ -280,16 +293,6 @@ EXTRA_DIST += \
vms/pcre2.h_patch \
vms/stdint.h
# These files are used in the preparation of a release
EXTRA_DIST += \
PrepareRelease \
CheckMan \
CleanTxt \
Detrail \
132html \
doc/index.html.src
# These files are usable versions of pcre2.h and config.h that are distributed
# for the benefit of people who are building PCRE2 manually, without the
# Autotools support.
@@ -374,6 +377,8 @@ COMMON_SOURCES = \
src/pcre2_auto_possess.c \
src/pcre2_chkdint.c \
src/pcre2_compile.c \
src/pcre2_compile.h \
src/pcre2_compile_class.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_convert.c \
@@ -383,6 +388,7 @@ COMMON_SOURCES = \
src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_char_inc.h \
src/pcre2_jit_compile.c \
src/pcre2_jit_neon_inc.h \
src/pcre2_jit_simd_inc.h \
@@ -401,6 +407,7 @@ COMMON_SOURCES = \
src/pcre2_tables.c \
src/pcre2_ucd.c \
src/pcre2_ucp.h \
src/pcre2_util.h \
src/pcre2_valid_utf.c \
src/pcre2_xclass.c
@@ -460,39 +467,39 @@ CLEANFILES += src/pcre2_chartables.c
# when pcre2_jit_compile.c is processed, so they must be distributed.
EXTRA_DIST += \
src/sljit/sljitConfig.h \
src/sljit/sljitConfigCPU.h \
src/sljit/sljitConfigInternal.h \
src/sljit/sljitLir.c \
src/sljit/sljitLir.h \
src/sljit/sljitNativeARM_32.c \
src/sljit/sljitNativeARM_64.c \
src/sljit/sljitNativeARM_T2_32.c \
src/sljit/sljitNativeLOONGARCH_64.c \
src/sljit/sljitNativeMIPS_32.c \
src/sljit/sljitNativeMIPS_64.c \
src/sljit/sljitNativeMIPS_common.c \
src/sljit/sljitNativePPC_32.c \
src/sljit/sljitNativePPC_64.c \
src/sljit/sljitNativePPC_common.c \
src/sljit/sljitNativeRISCV_32.c \
src/sljit/sljitNativeRISCV_64.c \
src/sljit/sljitNativeRISCV_common.c \
src/sljit/sljitNativeS390X.c \
src/sljit/sljitNativeX86_32.c \
src/sljit/sljitNativeX86_64.c \
src/sljit/sljitNativeX86_common.c \
src/sljit/sljitSerialize.c \
src/sljit/sljitUtils.c \
src/sljit/allocator_src/sljitExecAllocatorApple.c \
src/sljit/allocator_src/sljitExecAllocatorCore.c \
src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c \
src/sljit/allocator_src/sljitExecAllocatorPosix.c \
src/sljit/allocator_src/sljitExecAllocatorWindows.c \
src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c \
src/sljit/allocator_src/sljitProtExecAllocatorPosix.c \
src/sljit/allocator_src/sljitWXExecAllocatorPosix.c \
src/sljit/allocator_src/sljitWXExecAllocatorWindows.c
deps/sljit/sljit_src/sljitConfig.h \
deps/sljit/sljit_src/sljitConfigCPU.h \
deps/sljit/sljit_src/sljitConfigInternal.h \
deps/sljit/sljit_src/sljitLir.c \
deps/sljit/sljit_src/sljitLir.h \
deps/sljit/sljit_src/sljitNativeARM_32.c \
deps/sljit/sljit_src/sljitNativeARM_64.c \
deps/sljit/sljit_src/sljitNativeARM_T2_32.c \
deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c \
deps/sljit/sljit_src/sljitNativeMIPS_32.c \
deps/sljit/sljit_src/sljitNativeMIPS_64.c \
deps/sljit/sljit_src/sljitNativeMIPS_common.c \
deps/sljit/sljit_src/sljitNativePPC_32.c \
deps/sljit/sljit_src/sljitNativePPC_64.c \
deps/sljit/sljit_src/sljitNativePPC_common.c \
deps/sljit/sljit_src/sljitNativeRISCV_32.c \
deps/sljit/sljit_src/sljitNativeRISCV_64.c \
deps/sljit/sljit_src/sljitNativeRISCV_common.c \
deps/sljit/sljit_src/sljitNativeS390X.c \
deps/sljit/sljit_src/sljitNativeX86_32.c \
deps/sljit/sljit_src/sljitNativeX86_64.c \
deps/sljit/sljit_src/sljitNativeX86_common.c \
deps/sljit/sljit_src/sljitSerialize.c \
deps/sljit/sljit_src/sljitUtils.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c \
deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c \
deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c \
deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c \
deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c \
deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c
# Some of the JIT sources are also in separate files that are #included.
@@ -710,9 +717,12 @@ EXTRA_DIST += \
testdata/grepinput \
testdata/grepinput3 \
testdata/grepinput8 \
testdata/grepinputBad8 \
testdata/grepinputBad8_Trail \
testdata/grepinputC.bz2 \
testdata/grepinputC.gz \
testdata/grepinputM \
testdata/grepinputUN \
testdata/grepinputv \
testdata/grepinputx \
testdata/greplist \
@@ -755,6 +765,7 @@ EXTRA_DIST += \
testdata/testinput24 \
testdata/testinput25 \
testdata/testinput26 \
testdata/testinput27 \
testdata/testinputEBC \
testdata/testinputheap \
testdata/testoutput1 \
@@ -799,6 +810,7 @@ EXTRA_DIST += \
testdata/testoutput24 \
testdata/testoutput25 \
testdata/testoutput26 \
testdata/testoutput27 \
testdata/testoutputEBC \
testdata/testoutputheap-16 \
testdata/testoutputheap-32 \
@@ -819,7 +831,7 @@ CLEANFILES += \
test3outputB \
testtry \
teststdout \
teststderr \
teststderr \
teststderrgrep \
testtemp1grep \
testtemp2grep \
@@ -957,7 +969,6 @@ endif # WITH_GCOV
EXTRA_DIST += \
cmake/COPYING-CMAKE-SCRIPTS \
cmake/FindEditline.cmake \
cmake/FindPackageHandleStandardArgs.cmake \
cmake/FindReadline.cmake \
cmake/pcre2-config-version.cmake.in \
cmake/pcre2-config.cmake.in \
+107 -2
View File
@@ -1,6 +1,111 @@
News about PCRE2 releases
-------------------------
Version 10.46 27-August-2025
----------------------------
This is a security-only release, to address CVE-2025-58050.
Compared to 10.45, this release has only a minimal code change to prevent a
read-past-the-end memory error, of arbitrary length. An attacker-controlled
regex pattern is required, and it cannot be triggered by providing crafted
subject (match) text. The (*ACCEPT) and (*scs:) pattern features must be used
together.
Release 10.44 and earlier are not affected.
This could have implications of denial-of-service or information disclosure,
and could potentially be used to escalate other vulnerabilities in a system
(such as information disclosure being used to escalate the severity of an
unrelated bug in another system).
Version 10.45 05-February-2025
------------------------------
This is a comparatively large release, incorporating new features, some
bugfixes, and a few changes with slight backwards compatibility implications.
Please see the ChangeLog and Git log for further details.
Only changes to behaviour, changes to the API, and major changes to the pattern
syntax are described here.
This release is the first to be available as a (signed) Git tag, or
alternatively as a (signed) tarball of the Git tag.
This is also the first release to be made by the new maintainers of PCRE2, and
we would like to thank Philip Hazel, creator and maintainer of PCRE and PCRE2.
* (Git change) The sljit project has been split out into a separate Git
repository. Git users must now run `git submodule init; git submodule update`
after a Git checkout.
* (Behaviour change) Update Unicode support to UCD 16.
* (Match behaviour change) Case-insensitive matching of Unicode properties
Ll, Lt, and Lu has been changed to match Perl. Previously, /\p{Ll}/i would
match only lower-case characters (even though case-insensitive matching was
specified). This also affects case-insensitive matching of POSIX classes such
as [:lower:].
* (Minor match behaviour change) Case-insensitive matching of backreferences now
respects the PCRE2_EXTRA_CASELESS_RESTRICT option.
* (Minor pattern syntax change) Parsing of the \x escape is stricter, and is
no longer parsed as an escape for the NUL character if not followed by '{' or
a hexadecimal digit. Use \x00 instead.
* (Major new feature) Add a new feature called scan substring. This is a new
type of assertion which matches the content of a capturing block to a
sub-pattern.
Example: to find a word that contains the rare (in English) sequence of
letters "rh" not at the start:
\b(\w++)(*scan_substring:(1).+rh)
The first group captures a word which is then scanned by the
(*scan_substring:(1) ... ) assertion, which tests whether the pattern ".+rh"
matches the capture group "(1)".
* (Major new feature) Add support for UTS#18 compatible character classes,
using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a
metacharacter within character classes and the operators '&&', '--' and '~~',
allowing subtractions and intersections of character classes to be easily
expressed.
Example: to match Thai or Greek letters (but not letters or other characters
in those scripts), use [\p{L}&&[\p{Thai}||\p{Greek}]].
* (Major new feature) Add support for Perl-style extended character classes,
using the syntax (?[...]). This also allows expressing subtractions and
intersections of character classes, but using a different syntax to UTS#18.
Example: to match Thai or Greek letters (but not letters or other characters
in those scripts), use (?[\p{L} & (\p{Thai} + \p{Greek})]).
* (Minor feature) Significant improvements to the character class match engine.
Compiled character classes are now more compact, and have faster matching
for large or complex character sets, using binary search through the set.
* JIT compilation now fails with the new error code PCRE2_ERROR_JIT_UNSUPPORTED
for patterns which use features not supported by the JIT compiler.
* (Minor feature) New options PCRE2_EXTRA_NO_BS0 (disallow \0 as an escape for
the NUL character); PCRE2_EXTRA_PYTHON_OCTAL (use Python disambiguation rules
for deciding whether \12 is a backreference or an octal escape);
PCRE2_EXTRA_NEVER_CALLOUT (disable callout syntax entirely);
PCRE2_EXTRA_TURKISH_CASING (use Turkish rules for case-insensitive matching).
* (Minor feature) Add new API function pcre2_set_optimize() for controlling
which optimizations are enabled.
* (Minor new features) A variety of extensions have been made to
pcre2_substitute() and its syntax for replacement strings. These now support:
\123 octal escapes; titlecasing \u\L; \1 backreferences; \g<1> and $<NAME>
backreferences; $& $` $' and $_; new function
pcre2_set_substitute_case_callout() to allow locale-aware case transformation.
Version 10.44 07-June-2024
--------------------------
@@ -13,7 +118,7 @@ increased to 128. Some auxiliary files for building under VMS are added.
Version 10.43 16-February-2024
------------------------------
There are quite a lot of changes in this release (see ChangeLog and git log for
There are quite a lot of changes in this release (see ChangeLog and Git log for
a list). Those that are not bugfixes or code tidies are:
* The JIT code no longer supports ARMv5 architecture.
@@ -52,7 +157,7 @@ a list). Those that are not bugfixes or code tidies are:
matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can
be used to keep it ASCII only.
* Make PCRE2_UCP the default in UTF mode in pcre2grep and add -no_ucp,
* Make PCRE2_UCP the default in UTF mode in pcre2grep and add --no-ucp,
--case-restrict and --posix-digit.
* Add --group-separator and --no-group-separator to pcre2grep.
+54 -42
View File
@@ -105,6 +105,7 @@ example.
pcre2_chkdint.c
pcre2_chartables.c
pcre2_compile.c
pcre2_compile_class.c
pcre2_config.c
pcre2_context.c
pcre2_convert.c
@@ -138,7 +139,7 @@ example.
Note that you must compile pcre2_jit_compile.c, even if you have not
defined SUPPORT_JIT in src/config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre2_jit_compile.c #includes other files from the sljit subdirectory,
pcre2_jit_compile.c #includes other files from the sljit dependency,
all of whose names begin with "sljit". It also #includes
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
those yourself.
@@ -301,56 +302,66 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the
cache can be deleted by selecting "File > Delete Cache".
If you are using CMake and encounter errors, deleting the CMake cache and
restarting from a fresh build may fix the error. In the CMake GUI, the cache can
be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can
be deleted.
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
4. Run cmake-gui from the Shell environment of your build tool, for example,
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
to start Cmake from the Windows Start menu, as this can lead to errors.
4. Run CMake.
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
- Using the CLI, simply run `cmake ..` inside the `build/` directory. You can
use the `ccmake` ncurses GUI to select and configure PCRE2 features.
6. Hit the "Configure" button.
- Using the CMake GUI:
7. Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
a) Run cmake-gui from the Shell environment of your build tool, for
example, Msys for Msys/MinGW or Visual Studio Command Prompt for
VC/VC++.
8. The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
9. Hit "Configure" again. The adjacent "Generate" button should now be
active.
c) Press the "Configure" button.
10. Hit "Generate".
d) Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
e) The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
12. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
f) Press "Configure" again. The adjacent "Generate" button should now be
active.
g) Press "Generate".
5. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
Regardless of build system used, `cmake --build .` will build it.
6. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
Regardless of build system used, `ctest` will run the tests.
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
@@ -425,6 +436,7 @@ OpenVMS. They are in the "vms" directory in the distribution tarball. Please
read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep
programs contain some VMS-specific code.
===========================
Last Updated: 16 April 2024
===========================
==============================
Last updated: 26 December 2024
==============================
+86 -72
View File
@@ -385,7 +385,7 @@ library. They are also documented in the pcre2build man page.
If this is done, when pcre2test's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licenced, so if you distribute a binary of
Note that libreadline is GPL-licensed, so if you distribute a binary of
pcre2test linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
@@ -411,20 +411,19 @@ library. They are also documented in the pcre2build man page.
Instead of %td or %zu, %lu is used, with a cast for size_t values.
. There is a special option called --enable-fuzz-support for use by people who
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
be built, but not installed. This contains a single function called
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
length of the string. When called, this function tries to compile the string
as a pattern, and if that succeeds, to match it. This is done both with no
options and with some random options bits that are generated from the string.
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
be created. This is normally run under valgrind or used when PCRE2 is
compiled with address sanitizing enabled. It calls the fuzzing function and
outputs information about what it is doing. The input strings are specified
by arguments: if an argument starts with "=" the rest of it is a literal
input string. Otherwise, it is assumed to be a file name, and the contents
of the file are the test string.
want to run fuzzing tests on PCRE2. If set, it causes an extra library
called libpcre2-fuzzsupport.a to be built, but not installed. This contains
a single function called LLVMFuzzerTestOneInput() whose arguments are a
pointer to a string and the length of the string. When called, this function
tries to compile the string as a pattern, and if that succeeds, to match
it. This is done both with no options and with some random options bits that
are generated from the string. Setting --enable-fuzz-support also causes an
executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally
run under valgrind or used when PCRE2 is compiled with address sanitizing
enabled. It calls the fuzzing function and outputs information about what it
is doing. The input strings are specified by arguments: if an argument
starts with "=" the rest of it is a literal input string. Otherwise, it is
assumed to be a file name, and the contents of the file are the test string.
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
which caused pcre2_match() to use individual blocks on the heap for
@@ -510,6 +509,7 @@ system. The following are installed (file names are all relative to the
LICENCE
NEWS
README
SECURITY
pcre2.txt (a concatenation of the man(3) pages)
pcre2test.txt the pcre2test man page
pcre2grep.txt the pcre2grep man page
@@ -607,8 +607,9 @@ zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the PrepareRelease script before making a distribution. This
script creates the .txt and HTML forms of the documentation from the man pages.
should first run the maint/PrepareRelease script before making a distribution.
This script creates the .txt and HTML forms of the documentation from the man
pages.
Testing PCRE2
@@ -822,37 +823,38 @@ The distribution should contain the files listed below.
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_chkdint.c )
src/pcre2_compile.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_ucptables.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_chkdint.c )
src/pcre2_compile.c )
src/pcre2_compile_class.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_ucptables.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2_printint.c debugging function that is used by pcre2test,
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
@@ -860,13 +862,16 @@ The distribution should contain the files listed below.
src/config.h.in template for config.h, when built by "configure"
src/pcre2.h.in template for pcre2.h when built by "configure"
src/pcre2posix.h header for the external POSIX wrapper API
src/pcre2_compile.h header for internal use
src/pcre2_internal.h header for internal use
src/pcre2_intmodedep.h a mode-specific internal header
src/pcre2_jit_char_inc.h header used by JIT
src/pcre2_jit_neon_inc.h header used by JIT
src/pcre2_jit_simd_inc.h header used by JIT
src/pcre2_ucp.h header for Unicode property handling
src/pcre2_util.h header for internal utils
sljit/* source files for the JIT compiler
deps/sljit/sljit_src/* source files for the JIT compiler
(B) Source files for programs that use PCRE2:
@@ -878,48 +883,49 @@ The distribution should contain the files listed below.
(C) Auxiliary files:
132html script to turn "man" pages into HTML
AUTHORS information about the author of PCRE2
AUTHORS.md information about the authors of PCRE2
ChangeLog log of changes to the code
CleanTxt script to clean nroff output for txt man pages
Detrail script to remove trailing spaces
HACKING some notes about the internals of PCRE2
INSTALL generic installation instructions
LICENCE conditions for the use of PCRE2
LICENCE.md conditions for the use of PCRE2
COPYING the same, using GNU's standard name
SECURITY.md information on reporting vulnerabilities
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
PrepareRelease script to make preparations for "make dist"
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcre2grep tests
RunTest.bat a Windows batch file for running tests
RunGrepTest.bat a Windows batch file for pcre2grep tests
aclocal.m4 m4 macros (generated by "aclocal")
config.guess ) files used by libtool,
config.sub ) used only when building a shared library
m4/* m4 macros (used by autoconf)
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
depcomp ) script to find program dependencies, generated by
) automake
doc/*.3 man page sources for PCRE2
doc/*.1 man page sources for pcre2grep and pcre2test
doc/index.html.src the base HTML page
doc/html/* HTML documentation
doc/pcre2.txt plain text version of the man pages
doc/pcre2-config.txt plain text documentation of pcre2-config script
doc/pcre2grep.txt plain text documentation of grep utility program
doc/pcre2test.txt plain text documentation of test program
install-sh a shell script for installing files
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
ltmain.sh file used to build a libtool script
missing ) common stub for a few missing GNU programs while
) installing, generated by automake
mkinstalldirs script for making install directories
ar-lib )
config.guess )
config.sub )
depcomp ) helper tools generated by libtool and
compile ) automake, used internally by ./configure
install-sh )
ltmain.sh )
missing )
test-driver )
perltest.sh Script for running a Perl test program
pcre2-config.in source of script which retains PCRE2 information
testdata/testinput* test data for main library tests
@@ -927,12 +933,13 @@ The distribution should contain the files listed below.
testdata/grep* input and output for pcre2grep tests
testdata/* other supporting test files
(D) Auxiliary files for cmake support
(D) Auxiliary files for CMake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindPackageHandleStandardArgs.cmake
cmake/FindEditline.cmake
cmake/FindReadline.cmake
cmake/pcre2-config-version.cmake.in
cmake/pcre2-config.cmake.in
CMakeLists.txt
config-cmake.h.in
@@ -943,14 +950,21 @@ The distribution should contain the files listed below.
src/config.h.generic ) a version of config.h for use in non-"configure"
) environments
(F) Auxiliary files for building PCRE2 under OpenVMS
(F) Auxiliary files for building PCRE2 using other build systems
BUILD.bazel )
MODULE.bazel ) files used by the Bazel build system
WORKSPACE.bazel )
build.zig file used by zig's build system
(G) Auxiliary files for building PCRE2 under OpenVMS
vms/configure.com )
vms/openvms_readme.txt ) These files were contributed by a PCRE2 user.
vms/pcre2.h_patch )
vms/stdint.h )
Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
Last updated: 15 April 2024
==============================
Last updated: 18 December 2024
==============================
+115 -25
View File
@@ -25,8 +25,8 @@ unset cp ls mv rm
# valgrind settings when requested.
builddir=`pwd`
pcre2grep=$builddir/pcre2grep
pcre2test=$builddir/pcre2test
: ${pcre2grep:=$builddir/pcre2grep}
: ${pcre2test:=$builddir/pcre2test}
if [ ! -x $pcre2grep ] ; then
echo "** $pcre2grep does not exist or is not executable."
@@ -41,22 +41,17 @@ fi
valgrind=
while [ $# -gt 0 ] ; do
case $1 in
valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file";;
valgrind|-valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file --error-exitcode=70";;
*) echo "RunGrepTest: Unknown argument $1"; exit 1;;
esac
shift
done
vjs=
pcre2grep_version=`$pcre2grep -V`
if [ "$valgrind" = "" ] ; then
echo "Testing $pcre2grep_version"
else
echo "Testing $pcre2grep_version using valgrind"
$pcre2test -C jit >/dev/null
if [ $? -ne 0 ]; then
vjs="--suppressions=./testdata/valgrind-jit.supp"
fi
fi
# Set up a suitable "diff" command for comparison. Some systems have a diff
@@ -105,6 +100,16 @@ if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
fi
fi
# Set up the path to the valgrind JIT suppressions
vjs=
if [ "$valgrind" != "" ] ; then
$pcre2test -C jit >/dev/null
if [ $? -ne 0 ]; then
vjs="--suppressions=`realpath "$srcdir"`/testdata/valgrind-jit.supp"
fi
fi
# Check for the availability of UTF-8 support
$pcre2test -C unicode >/dev/null
@@ -275,7 +280,7 @@ echo "---------------------------- Test 35 -----------------------------" >>test
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude='grepinput(Bad)?8' --exclude=grepinputM --exclude=grepinputUN --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
@@ -318,8 +323,11 @@ echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -e 'unopened)' -e abc ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --regex=123 -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
@@ -530,25 +538,28 @@ echo "---------------------------- Test 95 -----------------------------" >>test
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MC] 'fox' ./test* | sort) >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MCU] 'fox' ./test* | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "grepinputBad8" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MC] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "grepinputBad8" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >testtemp2grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "grepinputBad8" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
@@ -618,7 +629,7 @@ echo "---------------------------- Test 115 -----------------------------" >>tes
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 116 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MC] -th 'the' testdata/grepinput*) >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MCU] -th 'the' testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 117 -----------------------------" >>testtrygrep
@@ -637,6 +648,8 @@ echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$&:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
@@ -761,7 +774,7 @@ echo "---------------------------- Test 140 -----------------------------" >>tes
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 141 -----------------------------" >>testtrygrep
printf "$srcdir/testdata/grepinputv\n-\n" >testtemp1grep
printf "%s/testdata/grepinputv\n-\n" "$srcdir" >testtemp1grep
printf 'This is a line from stdin.' >testtemp2grep
$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" <testtemp2grep >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
@@ -842,16 +855,17 @@ echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 150 -----------------------------" >>testtrygrep
which locale >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "pcre2grep: Failed to set locale badlocale (obtained from LC_CTYPE)" >>testtrygrep
echo "pcre2grep: Failed to set locale locale.bad (obtained from LC_CTYPE)" >>testtrygrep
echo "RC=2" >>testtrygrep
else
(cd $srcdir; unset LC_ALL; env LC_CTYPE=badlocale $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1
(cd $srcdir; unset LC_ALL; LC_CTYPE=locale.bad $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
fi
echo "---------------------------- Test 151 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 152 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --group-separator='++' 'four' ./testdata/grepinputx) >>testtrygrep
@@ -861,6 +875,42 @@ echo "---------------------------- Test 153 -----------------------------" >>tes
(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --no-group-separator 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 154 -----------------------------" >>testtrygrep
>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 155 -----------------------------" >>testtrygrep
echo "" >testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 156 -----------------------------" >>testtrygrep
echo "" >testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file --file $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 157 -----------------------------" >>testtrygrep
echo "spaces " >testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -o --posix-pattern-file --file=$builddir/testtemp1grep ./testdata/grepinputv >$builddir/testtemp2grep && $valgrind $vjs $pcre2grep -q "s " $builddir/testtemp2grep) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 158 -----------------------------" >>testtrygrep
echo "spaces." >testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 159 -----------------------------" >>testtrygrep
printf "spaces.\r\n" >testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file -f$builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 160 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -nC3 '^(ert|jkl)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -n -B4 -A2 '^(ert|dfg)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
# Now compare the results.
@@ -886,13 +936,11 @@ if [ $utf8 -ne 0 ] ; then
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U4 ------------------------------" >>testtrygrep
printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' $builddir/testtemp1grep) >>testtrygrep 2>&1
(cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' ./testdata/grepinputBad8) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U5 ------------------------------" >>testtrygrep
printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' ./testdata/grepinputBad8) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
@@ -935,31 +983,48 @@ printf 'abc\rdef\r\nghi\njkl' >testNinputgrep
printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep
$valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep -B1 -n -N CR "^def" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep
pattern=`printf 'def\rjkl'`
$valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test N4 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
printf 'xyz\0abc\0def' >testNinputgrep
$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep -B1 -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test N8 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -na --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "" >>testtrygrep
$cf $srcdir/testdata/grepoutputN testtrygrep
@@ -972,8 +1037,13 @@ if [ $utf8 -ne 0 ] ; then
echo "Testing pcre2grep newline settings with UTF-8 features"
printf '%c--------------------------- Test UN1 ------------------------------\r\n' - >testtrygrep
printf 'abc\341\210\264def\nxyz' >testNinputgrep
$valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" testNinputgrep >>testtrygrep
$valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" $srcdir/testdata/grepinputUN >>testtrygrep
echo "RC=$?" >>testtrygrep
printf '%c--------------------------- Test UN2 ------------------------------\r\n' - >testtrygrep
$valgrind $vjs $pcre2grep -nauU --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "" >>testtrygrep
$cf $srcdir/testdata/grepoutputUN testtrygrep
@@ -990,12 +1060,24 @@ fi
if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then
echo "Testing pcre2grep script callouts"
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
echo "--- Test 1 ---" >testtrygrep
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "--- Test 2 ---" >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "--- Test 3 ---" >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "--- Test 4 ---" >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"/bin/echo|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "--- Test 5 ---" >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "--- Test 6 ---" >>testtrygrep
$valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
nonfork=1
@@ -1010,8 +1092,12 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
if [ $utf8 -ne 0 ] ; then
echo "Testing pcre2grep script callout with UTF-8 features"
$valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >testtrygrep
echo "--- Test 1 ---" >testtrygrep
$valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "--- Test 2 ---" >>testtrygrep
$valgrind $vjs $pcre2grep -u '(T)(?C"/bin/echo|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep
echo "RC=$?" >>testtrygrep
if [ $nonfork = 1 ] ; then
$cf $srcdir/testdata/grepoutputCNU testtrygrep
@@ -1019,7 +1105,11 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
$cf $srcdir/testdata/grepoutputCU testtrygrep
fi
if [ $? != 0 ] ; then exit 1; fi
else
echo "Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library"
fi
unset nonfork
else
echo "Script callouts are not supported"
fi
+443 -33
View File
@@ -19,8 +19,9 @@ set GREP_COLOR=
:: Remember the current (build) directory and set the program to be tested.
set builddir="%CD%"
set pcre2grep=%builddir%\pcre2grep.exe
set pcre2test=%builddir%\pcre2test.exe
if [%pcre2grep%]==[] set pcre2grep=%builddir%\pcre2grep.exe
if [%pcre2test%]==[] set pcre2test=%builddir%\pcre2test.exe
if NOT exist %pcre2grep% (
echo ** %pcre2grep% does not exist.
@@ -81,11 +82,16 @@ if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" (
)
:: Create a simple printf via cscript/JScript (an actual printf may translate
:: LF to CRLF, which this one does not).
:: LF to CRLF, which this one does not). We only support the barebones we need:
:: \r, \n, \0, and %s (but only once).
echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n")) >printf.js
echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n").replace(/\\0/g, "\x00").replace(/%%s/g, function() { return WScript.Arguments(1) })) >printf.js
set printf=cscript //nologo printf.js
:: Create a simple 'tr' via cscript/JScript.
echo WScript.StdOut.Write(WScript.StdIn.ReadAll().replace(/\x00/g, "@")) >trnull.js
set trnull=cscript //nologo trnull.js
:: ------ Normal tests ------
echo Testing pcre2grep main features
@@ -232,7 +238,7 @@ echo ---------------------------- Test 35 ----------------------------->>testtry
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 36 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude "grepinput$" --exclude=grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -L -r --include="grepinput[^C]" --exclude "grepinput$" --exclude="grepinput(Bad)?8" --exclude=grepinputM --exclude=grepinputUN --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 37 ----------------------------->>testtrygrep
@@ -274,8 +280,14 @@ echo ---------------------------- Test 45 ------------------------------>>testtr
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 46 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -e "unopened)" -e abc ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% --regex=123 -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 47 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -Fx AB.VE^
@@ -320,11 +332,11 @@ echo ---------------------------- Test 55 ----------------------------->>testtry
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 56 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -c lazy ./testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -c --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 57 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -c -l lazy ./testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -c -l --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 58 ----------------------------->>testtrygrep
@@ -378,6 +390,12 @@ echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 70 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 71 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
@@ -481,25 +499,28 @@ echo ---------------------------- Test 95 ----------------------------->>testtry
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 96 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" "fox" ./test* | sort & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" --exclude=grepinput[MCU] "fox" ./test* | sort & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 97 ----------------------------->>testtrygrep
echo grepinput$>testtemp1grep
echo grepinput8>>testtemp1grep
(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
echo grepinputBad8>>testtemp1grep
(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 98 ----------------------------->>testtrygrep
echo grepinput$>testtemp1grep
echo grepinput8>>testtemp1grep
(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
echo grepinputBad8>>testtemp1grep
(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 99 ----------------------------->>testtrygrep
echo grepinput$>testtemp1grep
echo grepinput8>testtemp2grep
(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
echo grepinputBad8>>testtemp1grep
(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 100 ------------------------------>>testtrygrep
@@ -533,7 +554,7 @@ echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 107 ----------------------------->>testtrygrep
echo a>testtemp1grep
echo aaaaa>>testtemp1grep
(pushd %srcdir% & %pcre2grep% --line-offsets "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1
(pushd %srcdir% & %pcre2grep% --line-offsets --allow-lookaround-bsk "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 108 ------------------------------>>testtrygrep
@@ -541,7 +562,7 @@ echo ---------------------------- Test 108 ------------------------------>>testt
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 109 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -cq lazy ./testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -cq --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 110 ----------------------------->>testtrygrep
@@ -557,27 +578,27 @@ echo ---------------------------- Test 112 ----------------------------->>testtr
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 113 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% --total-count "the" testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% --total-count --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 114 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -tc "the" testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -tc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 115 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -tlc "the" testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -tlc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 116 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -th "the" testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% --exclude=grepinput[MCU] -th "the" testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 117 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -tch "the" testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -tch --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 118 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -tL "the" testdata/grepinput* & popd) >>testtrygrep
(pushd %srcdir% & %pcre2grep% -tL --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 119 ----------------------------->>testtrygrep
@@ -588,6 +609,266 @@ echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 120 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -HO "$&:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -m 1 -O "$0:$a$b$e$f$r$t$v" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -HO "${X}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -HO "XX$" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -O "$x{12345678}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -O "$x{123Z" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% --output "$x{1234}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 121 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -F "\E and (regex)" testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 122 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -w "cat|dog" testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 123 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -w "dog|cat" testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 124 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -Mn --colour=always "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -Mn --colour=always -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -Mn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -Mn -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 125 ----------------------------->>testtrygrep
%printf% "abcd\n" >testNinputgrep
%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K.)" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --colour=always --allow-lookaround-bsk "(?=.\K)" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K[ac])" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
set GREP_COLORS=ms=1;20
%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep
set GREP_COLORS=
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 126 ----------------------------->>testtrygrep
%printf% "Next line pattern has binary zero\nABC\0XYZ\n" >testtemp1grep
%printf% "ABC\0XYZ\nABCDEF\nDEFABC\n" >testtemp2grep
%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%printf% "Next line pattern is erroneous.\n^abc)(xy" >testtemp1grep
%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 127 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -o --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 128 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -m1M -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 129 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -m 2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 130 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -o -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 131 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -oc -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 132 ----------------------------->>testtrygrep
:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel
:: level, are not even numbered). Use a subshell instead.
(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& head -1) <testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 133 ----------------------------->>testtrygrep
:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel
:: level, are not even numbered). Use a subshell instead.
(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& %pcre2grep% -m1 -A3 "^match") <testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 134 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% --max-count=1 -nH -O "=$x{41}$x423$o{103}$o1045=" "fox" - & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 135 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -lZ "word" ./testdata/grepinputv ./testdata/grepinputv & popd) | %trnull% >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -A 1 -B 1 -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -MHZn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 136 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -m1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% --max-count=1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 137 ----------------------------->>testtrygrep
%printf% "Last line\nhas no newline" >testtemp1grep
%pcre2grep% -A1 Last testtemp1grep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 138 ----------------------------->>testtrygrep
%printf% "AbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\n" >testtemp1grep
%pcre2grep% --no-jit --heap-limit=0 b testtemp1grep >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 139 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% --line-buffered "fox" testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 140 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% --buffer-size=10 -A1 "brown" testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 141 ----------------------------->>testtrygrep
%printf% "%%s\testdata\grepinputv\n-\n" "%srcdir%" >testtemp1grep
%printf% "This is a line from stdin." >testtemp2grep
%pcre2grep% --file-list testtemp1grep "line from stdin" <testtemp2grep >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 142 ----------------------------->>testtrygrep
%printf% "/does/not/exist\n" >testtemp1grep
%printf% "This is a line from stdin." >testtemp2grep
%pcre2grep% --file-list testtemp1grep "line from stdin" >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 143 ----------------------------->>testtrygrep
%printf% "fox|cat" >testtemp1grep
%pcre2grep% -f - %srcdir%\testdata\grepinputv <testtemp1grep >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 144 ----------------------------->>testtrygrep
%pcre2grep% -f /non/exist %srcdir%\testdata\grepinputv >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 145 ----------------------------->>testtrygrep
%printf% "*meta*\rdog." >testtemp1grep
%pcre2grep% -Ncr -F -f testtemp1grep %srcdir%\testdata\grepinputv >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 146 ----------------------------->>testtrygrep
%printf% "A123B" >testtemp1grep
%pcre2grep% -H -e "123|fox" - <testtemp1grep >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -h -e "123|fox" - %srcdir%\testdata\grepinputv <testtemp1grep >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% - %srcdir%\testdata\grepinputv <testtemp1grep >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 147 ----------------------------->>testtrygrep
%pcre2grep% -e "123|fox" -- -nonfile >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 148 ----------------------------->>testtrygrep
%pcre2grep% --nonexist >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -n-n-bad >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --context >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --only-matching --output=xx >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --colour=badvalue >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --newline=badvalue >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -d badvalue >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -D badvalue >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --buffer-size=0 >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --exclude "(badpat" abc /dev/null >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --exclude-from /non/exist abc /dev/null >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --include-from /non/exist abc /dev/null >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% --file-list=/non/exist abc /dev/null >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 149 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% --binary-files=binary "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% --binary-files=wrong "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 150 ----------------------------->>testtrygrep
:: The Unix version of this tests checks for whether locales are supported. On Windows,
:: we assume they always are.
set LC_ALL=
set LC_CTYPE=locale.bad
(pushd %srcdir% & %pcre2grep% abc /dev/null & popd) >>testtrygrep 2>&1
echo RC=^%ERRORLEVEL%>>testtrygrep
set LC_CTYPE=
echo ---------------------------- Test 151 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% --colour=always -e this -e The -e "The wo" testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 152 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -nA3 --group-separator="++" "four" ./testdata/grepinputx & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 153 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -nA3 --no-group-separator "four" ./testdata/grepinputx & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 154 ----------------------------->>testtrygrep
echo. >nul 2>testtemp1grep
(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 155 ----------------------------->>testtrygrep
echo. >testtemp1grep
(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 156 ----------------------------->>testtrygrep
%printf% "\n" >testtemp1grep
(pushd %srcdir% & %pcre2grep% --posix-pattern-file --file %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 157 ----------------------------->>testtrygrep
%printf% "spaces \n" >testtemp1grep
(pushd %srcdir% & %pcre2grep% -o --posix-pattern-file --file=%builddir%\testtemp1grep ./testdata/grepinputv >%builddir%\testtemp2grep && %pcre2grep% -q "s " %builddir%\testtemp2grep & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 158 ----------------------------->>testtrygrep
%printf% "spaces.\n" >testtemp1grep
(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 159 ----------------------------->>testtrygrep
%printf% "spaces.\r\n" >testtemp1grep
(pushd %srcdir% & %pcre2grep% --posix-pattern-file -f%builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 160 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -nC3 "^(ert|jkl)" ./testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% -n -B4 -A2 "^(ert|dfg)" ./testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
:: Now compare the results.
@@ -602,15 +883,43 @@ if %utf8% neq 0 (
echo ---------------------------- Test U1 ------------------------------>testtrygrep
(pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U2 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U3 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
(pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any --allow-lookaround-bsk "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U4 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -u -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep 2>&1
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U5 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -U -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U6 ----------------------------->>testtrygrep
(pushd %srcdir% & %pcre2grep% -u -m1 -O "=$x{1d3}$o{744}=" "fox" & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U7 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -ui --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U8 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -UiEP --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U9 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -u --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test U10 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -u --posix-digit --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
%cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout%
if ERRORLEVEL 1 exit /b 1
@@ -631,58 +940,159 @@ echo Testing pcre2grep newline settings
echo ---------------------------- Test N1 ------------------------------>testtrygrep
%pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -B1 -n -N CR "^def" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test N2 ------------------------------>>testtrygrep
%pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test N3 ------------------------------>>testtrygrep
for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a
%pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test N4 ------------------------------>>testtrygrep
%pcre2grep% -n --newline=crlf -F -f %srcdir%/testdata/greppatN4 testNinputgrep >>testtrygrep
%pcre2grep% -n --newline=crlf -F -f %srcdir%\testdata\greppatN4 testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test N5 ------------------------------>>testtrygrep
%pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test N6 ------------------------------>>testtrygrep
%pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test N7 ------------------------------>>testtrygrep
%printf% "xyz\0abc\0def" >testNinputgrep
%pcre2grep% -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%pcre2grep% -B1 -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test N8 ------------------------------>>testtrygrep
%pcre2grep% -na --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
%printf% "\n" >>testtrygrep
%cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout%
if ERRORLEVEL 1 exit /b 1
:: If pcre2grep supports script callouts, run some tests on them.
:: These newline tests need UTF support.
if %utf8% neq 0 (
echo Testing pcre2grep newline settings with UTF-8 features
echo ---------------------------- Test UN1 ------------------------------>testtrygrep
%pcre2grep% -nau --newline=anycrlf "^(abc|def)" %srcdir%\testdata\grepinputUN >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo ---------------------------- Test UN2 ------------------------------>testtrygrep
%pcre2grep% -nauU --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
%printf% "\n" >>testtrygrep
%cf% %srcdir%\testdata\grepoutputUN testtrygrep %cfout%
if ERRORLEVEL 1 exit /b 1
) else (
echo Skipping pcre2grep newline UTF-8 tests: no UTF-8 support in PCRE2 library
)
:: If pcre2grep supports script callouts, run some tests on them. It is possible
:: to restrict these callouts to the non-fork case, either for security, or for
:: environments that do not support fork(). This is handled by comparing to a
:: different output.
%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported"
if %ERRORLEVEL% equ 0 (
echo Testing pcre2grep script callouts
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported"
if %ERRORLEVEL% equ 0 (
echo --- Test 1 --->testtrygrep
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo --- Test 2 --->>testtrygrep
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo --- Test 3 --->>testtrygrep
%pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo --- Test 4 --->>testtrygrep
%pcre2grep% "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo --- Test 5 --->>testtrygrep
%pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo --- Test 6 --->>testtrygrep
%pcre2grep% -m1 "(T)(?C'|$0:$1:$x{41}$o{101}$n')" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
%pcre2grep% --help | %pcre2grep% -q "Non-fork callout scripts in patterns are supported"
if ^!ERRORLEVEL! equ 0 (
set nonfork=1
%cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout%
) else (
set nonfork=0
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
)
if ERRORLEVEL 1 exit /b 1
@REM These callout tests need UTF support.
if %utf8% neq 0 (
echo Testing pcre2grep script callout with UTF-8 features
echo --- Test 1 --->testtrygrep
%pcre2grep% -u "(T)(?C'|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
echo --- Test 2 --->>testtrygrep
%pcre2grep% -u "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep
echo RC=^!ERRORLEVEL!>>testtrygrep
if ^!nonfork! equ 1 (
%cf% %srcdir%\testdata\grepoutputCNU testtrygrep %cfout%
) else (
%cf% %srcdir%\testdata\grepoutputCU testtrygrep %cfout%
)
if ERRORLEVEL 1 exit /b 1
) else (
echo Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library
)
) else (
echo Script callouts are not supported
)
:: Finally, some tests to exercise code that is not tested above, just to be
:: sure that it runs OK. Doing this improves the coverage statistics. The output
:: is not checked.
echo Testing miscellaneous pcre2grep arguments (unchecked)
%printf% "" >testtrygrep
echo. >nul 2>testtrygrep
call :checkspecial "-xxxxx" 2 || exit /b 1
call :checkspecial "--help" 0 || exit /b 1
call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1
call :checkspecial "--line-buffered --color abc nul" 1 || exit /b 1
call :checkspecial "-dskip abc ." 1 || exit /b 1
call :checkspecial "-Dread -Dskip abc nul" 1 || exit /b 1
:: Clean up local working files
del testcf printf.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
del testcf printf.js trnull.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
exit /b 0
+96 -65
View File
@@ -88,8 +88,9 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)"
title23="Test 23: \C disabled test"
title24="Test 24: Non-UTF pattern conversion tests"
title25="Test 25: UTF pattern conversion tests"
title26="Test 26: Auto-generated unicode property tests"
maxtest=26
title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)"
title27="Test 27: Auto-generated unicode property tests"
maxtest=27
titleheap="Test 'heap': Environment-specific heap tests"
if [ $# -eq 1 -a "$1" = "list" ]; then
@@ -120,6 +121,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title24
echo $title25
echo $title26
echo $title27
echo ""
echo $titleheap
echo ""
@@ -183,7 +185,7 @@ checkresult()
checkspecial()
{
$valgrind $vjs ./pcre2test $1 >>testtry
$sim $valgrind $vjs $pcre2test $1 >>testtry
if [ $? -ne 0 ] ; then
echo "** pcre2test $1 failed - check testtry"
exit 1
@@ -191,24 +193,7 @@ checkspecial()
}
# ------ Special EBCDIC Test -------
if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
$valgrind ./pcre2test -C ebcdic >/dev/null
ebcdic=$?
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
fi
for opt in "" "-dfa"; do
./pcre2test -q $opt $testdata/testinputEBC >testtry
checkresult $? EBC "$opt"
done
exit 0
fi
# ------ Normal Tests ------
# ------ Test setup ------
# Default values
@@ -221,10 +206,16 @@ sim=
skip=
valgrind=
vjs=
: ${pcre2test:=./pcre2test}
# This is in case the caller has set aliases (as I do - PH)
unset cp ls mv rm
if [ ! -x $pcre2test ] ; then
echo "** $pcre2test does not exist or is not executable."
exit 1
fi
# Process options and select which tests to run; for those that are explicitly
# requested, check that the necessary optional facilities are available.
@@ -255,7 +246,9 @@ do23=no
do24=no
do25=no
do26=no
do27=no
doheap=no
doebcdic=no
while [ $# -gt 0 ] ; do
case $1 in
@@ -286,14 +279,16 @@ while [ $# -gt 0 ] ; do
24) do24=yes;;
25) do25=yes;;
26) do26=yes;;
heap) doheap=yes;;
-8) arg8=yes;;
27) do27=yes;;
heap) doheap=yes;;
ebcdic) doebcdic=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
bigstack|-bigstack) bigstack=yes;;
nojit|-nojit) nojit=yes;;
sim|-sim) shift; sim=$1;;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file";;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --error-exitcode=70";;
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";;
~*)
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
@@ -325,7 +320,7 @@ done
# Find which optional facilities are available.
$sim ./pcre2test -C linksize >/dev/null
$sim $pcre2test -C linksize >/dev/null
link_size=$?
if [ $link_size -lt 2 ] ; then
echo "RunTest: Failed to find internal link size"
@@ -339,10 +334,10 @@ fi
# If it is possible to set the system stack size and -bigstack was given,
# set up a large stack.
$sim ./pcre2test -S 64 /dev/null /dev/null
$sim $pcre2test -S 32 /dev/null /dev/null
support_setstack=$?
if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then
setstack="-S 64"
setstack="-S 32"
else
setstack=""
fi
@@ -350,16 +345,16 @@ fi
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
# one need be.
$sim ./pcre2test -C pcre2-8 >/dev/null
$sim $pcre2test -C pcre2-8 >/dev/null
support8=$?
$sim ./pcre2test -C pcre2-16 >/dev/null
$sim $pcre2test -C pcre2-16 >/dev/null
support16=$?
$sim ./pcre2test -C pcre2-32 >/dev/null
$sim $pcre2test -C pcre2-32 >/dev/null
support32=$?
# \C may be disabled
$sim ./pcre2test -C backslash-C >/dev/null
$sim $pcre2test -C backslash-C >/dev/null
supportBSC=$?
# Initialize all bitsizes skipped
@@ -411,7 +406,7 @@ fi
# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
# UTF-32 support.
$sim ./pcre2test -C unicode >/dev/null
$sim $pcre2test -C unicode >/dev/null
utf=$?
# When JIT is used with valgrind, we need to set up valgrind suppressions as
@@ -419,7 +414,7 @@ utf=$?
# the hardware supports SSE2.
jitopt=
$sim ./pcre2test -C jit >/dev/null
$sim $pcre2test -C jit >/dev/null
jit=$?
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
jitopt=-jit
@@ -437,7 +432,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
$do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \
$do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \
$doheap = no -a $doebcdic = no \
]; then
do0=yes
do1=yes
@@ -466,6 +462,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
do24=yes
do25=yes
do26=yes
do27=yes
fi
# Handle any explicit skips at this stage, so that an argument list may consist
@@ -477,9 +474,12 @@ for i in $skip; do eval do$i=no; done
echo ""
echo PCRE2 C library tests using test data from $testdata
$sim ./pcre2test /dev/null
$sim $pcre2test /dev/null
echo ""
# ------ Normal Tests ------
for bmode in "$test8" "$test16" "$test32"; do
case "$bmode" in
skip) continue;;
@@ -512,7 +512,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do1 = yes ] ; then
echo $title1
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry
checkresult $? 1 "$opt"
done
fi
@@ -524,10 +524,10 @@ for bmode in "$test8" "$test16" "$test32"; do
echo $title2 "(excluding UTF-$bits)"
cp $testdata/testbtables .
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
saverc=$?
if [ $saverc = 0 ] ; then
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,300 >>testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $bmode $opt -error -80,-62,-2,-1,0,100,101,191,300 >>testtry
checkresult $? 2 "$opt"
else
checkresult $saverc 2 "$opt"
@@ -553,7 +553,7 @@ for bmode in "$test8" "$test16" "$test32"; do
locale -a | grep "^$loc\$" >/dev/null
if [ $? -eq 0 ] ; then
echo "/a/locale=$loc" | \
$sim $valgrind ./pcre2test -q $bmode | \
$sim $valgrind $pcre2test -q $bmode | \
grep "Failed to set locale" >/dev/null
if [ $? -ne 0 ] ; then
locale=$loc
@@ -580,7 +580,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ "$locale" != "" ] ; then
echo $title3 "(using '$locale' locale)"
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $infile testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $infile testtry
if [ $? = 0 ] ; then
case "$opt" in
-jit) with=" with JIT";;
@@ -617,7 +617,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry
checkresult $? 4 "$opt"
done
fi
@@ -629,7 +629,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry
checkresult $? 5 "$opt"
done
fi
@@ -639,7 +639,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do6 = yes ] ; then
echo $title6
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput6 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput6 testtry
checkresult $? 6 ""
fi
@@ -648,7 +648,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry
checkresult $? 7 ""
fi
fi
@@ -666,7 +666,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput8 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput8 testtry
checkresult $? 8-$bits-$link_size ""
fi
fi
@@ -679,7 +679,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped when running 16/32-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry
checkresult $? 9 "$opt"
done
fi
@@ -695,7 +695,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry
checkresult $? 10 "$opt"
done
fi
@@ -709,7 +709,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped when running 8-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry
checkresult $? 11-$bits "$opt"
done
fi
@@ -726,7 +726,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry
checkresult $? 12-$bits "$opt"
done
fi
@@ -739,7 +739,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput13 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput13 testtry
checkresult $? 13 ""
fi
fi
@@ -751,7 +751,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry
checkresult $? 14-$bits ""
fi
fi
@@ -760,7 +760,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do15 = yes ] ; then
echo $title15
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput15 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput15 testtry
checkresult $? 15 ""
fi
@@ -771,7 +771,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $jit -ne 0 ] ; then
echo " Skipped because JIT is available"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput16 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput16 testtry
checkresult $? 16 ""
fi
fi
@@ -783,7 +783,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
echo " Skipped because JIT is not available or nojit was specified"
else
$sim $valgrind $vjs ./pcre2test -q $setstack $bmode $testdata/testinput17 testtry
$sim $valgrind $vjs $pcre2test -q $setstack $bmode $testdata/testinput17 testtry
checkresult $? 17 ""
fi
fi
@@ -795,7 +795,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput18 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput18 testtry
checkresult $? 18 ""
fi
fi
@@ -809,7 +809,7 @@ for bmode in "$test8" "$test16" "$test32"; do
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput19 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput19 testtry
checkresult $? 19 ""
fi
fi
@@ -818,7 +818,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do20 = yes ] ; then
echo $title20
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput20 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput20 testtry
checkresult $? 20 ""
fi
@@ -830,7 +830,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because \C is disabled"
else
for opt in "" $jitopt -dfa; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry
checkresult $? 21 "$opt"
done
fi
@@ -846,7 +846,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry
checkresult $? 22-$bits "$opt"
done
fi
@@ -859,7 +859,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $supportBSC -ne 0 ] ; then
echo " Skipped because \C is not disabled"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput23 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput23 testtry
checkresult $? 23 ""
fi
fi
@@ -868,7 +868,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ "$do24" = yes ] ; then
echo $title24
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput24 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput24 testtry
checkresult $? 24 ""
fi
@@ -879,12 +879,12 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput25 testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput25 testtry
checkresult $? 25 ""
fi
fi
# Auto-generated unicode property tests
# Unicode property tests
if [ $do26 = yes ] ; then
echo $title26
@@ -892,24 +892,55 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
checkresult $? 26 "$opt"
done
fi
fi
# Auto-generated Unicode property tests
if [ $do27 = yes ] ; then
echo $title27
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput27 testtry
checkresult $? 27 "$opt"
done
fi
fi
# Manually selected heap tests - output may vary in different environments,
# which is why that are not automatically run.
if [ $doheap = yes ] ; then
echo $titleheap
$sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry
$sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinputheap testtry
checkresult $? heap-$bits ""
fi
# End of loop for 8/16/32-bit tests
done
# ------ Special EBCDIC Test -------
if [ $doebcdic = yes ] ; then
$sim $valgrind $pcre2test -C ebcdic >/dev/null
ebcdic=$?
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
fi
for opt in "" "-dfa"; do
$sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry
checkresult $? EBC "$opt"
done
fi
# Clean up local working files
rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
+68 -32
View File
@@ -13,7 +13,7 @@
@rem line. Added argument validation and added error reporting.
@rem
@rem Sheri Pierce added logic to skip feature dependent tests
@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support
@rem tests 4 5 7 10 12 14 19 22 25 and 26 require Unicode support
@rem 8 requires Unicode and link size 2
@rem 16 requires absence of jit support
@rem 17 requires presence of jit support
@@ -27,8 +27,8 @@
@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
@rem PH added missing "set type" for test 22, April 2016.
@rem PH added copy command for new testbtables file, November 2020
@rem PH caused it to show comparison output when comparison faile, July 2023
@rem PH updated unknown error number in test
@rem PH caused it to show comparison output when comparison failed, July 2023
@rem PH updated unknown error number in test
setlocal enabledelayedexpansion
@@ -39,7 +39,7 @@ if exist ..\testdata\ set srcdir=..)
if [%srcdir%]==[] (
if exist ..\..\testdata\ set srcdir=..\..)
if NOT exist %srcdir%\testdata\ (
Error: echo distribution testdata folder not found!
echo Error: distribution testdata folder not found!
call :conferror
exit /b 1
goto :eof
@@ -82,7 +82,7 @@ if not exist testout16 md testout16
if not exist testoutjit16 md testoutjit16
)
if %support16% EQU 1 (
if %support32% EQU 1 (
if not exist testout32 md testout32
if not exist testoutjit32 md testoutjit32
)
@@ -110,20 +110,24 @@ set do20=no
set do21=no
set do22=no
set do23=no
set do24=no
set do25=no
set do26=no
set do27=no
set all=yes
for %%a in (%*) do (
set valid=no
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27) do if %%v == %%a set valid=yes
if "!valid!" == "yes" (
set do%%a=yes
set all=no
) else (
) else (
echo Invalid test number - %%a!
echo Usage %0 [ test_number ] ...
echo Where test_number is one or more optional test numbers 1 through 23, default is all tests.
exit /b 1
)
echo Usage %0 [ test_number ] ...
echo Where test_number is one or more optional test numbers 1 through 27, default is all tests.
exit /b 1
)
)
set failed="no"
@@ -137,9 +141,9 @@ if "%all%" == "yes" (
set do7=yes
set do8=yes
set do9=yes
set do10=no
set do10=yes
set do11=yes
set do12=no
set do12=yes
set do13=yes
set do14=yes
set do15=yes
@@ -151,6 +155,10 @@ if "%all%" == "yes" (
set do21=yes
set do22=yes
set do23=yes
set do24=yes
set do25=yes
set do26=yes
set do27=yes
)
@echo RunTest.bat's pcre2test output is written to newly created subfolders
@@ -202,6 +210,10 @@ if "%do20%" == "yes" call :do20
if "%do21%" == "yes" call :do21
if "%do22%" == "yes" call :do22
if "%do23%" == "yes" call :do23
if "%do24%" == "yes" call :do24
if "%do25%" == "yes" call :do25
if "%do26%" == "yes" call :do26
if "%do27%" == "yes" call :do27
:modeSkip
if "%mode%" == "" (
set mode=-16
@@ -247,7 +259,15 @@ if [%3] == [] (
)
if %1 == 8 (
set outnum=8-%bits%-%link_size%
set outnum=%1-%bits%-%link_size%
) else if %1 == 11 (
set outnum=%1-%bits%
) else if %1 == 12 (
set outnum=%1-%bits%
) else if %1 == 14 (
set outnum=%1-%bits%
) else if %1 == 22 (
set outnum=%1-%bits%
) else (
set outnum=%1
)
@@ -266,24 +286,10 @@ if errorlevel 1 (
set failed="yes"
goto :eof
) else if [%1]==[2] (
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput%
%pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -80,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput%
)
set type=
if [%1]==[11] (
set type=-%bits%
)
if [%1]==[12] (
set type=-%bits%
)
if [%1]==[14] (
set type=-%bits%
)
if [%1]==[22] (
set type=-%bits%
)
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% >NUL
if errorlevel 1 (
echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
@@ -294,7 +300,7 @@ if errorlevel 1 (
echo.
goto :eof
)
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput%
fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
set failed="yes"
goto :eof
@@ -309,7 +315,7 @@ if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do2
copy /y %srcdir%\testdata\testbtables testbtables
copy /y %srcdir%\testdata\testbtables testbtables
call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit
goto :eof
@@ -504,6 +510,36 @@ if %supportBSC% EQU 1 (
call :runsub 23 testout "Backslash-C disabled test" -q
goto :eof
:do24
call :runsub 24 testout "Non-UTF pattern conversion tests" -q
goto :eof
:do25
if %unicode% EQU 0 (
echo Test 25 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 25 testout "UTF pattern conversion tests" -q
goto :eof
:do26
if %unicode% EQU 0 (
echo Test 26 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 26 testout "Unicode property tests (Compatible with Perl >= 5.38)" -q
if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -jit
goto :eof
:do27
if %unicode% EQU 0 (
echo Test 27 Skipped due to absence of Unicode support.
goto :eof
)
call :runsub 27 testout "Auto-generated unicode property tests" -q
if %jit% EQU 1 call :runsub 27 testoutjit "Test with JIT Override" -q -jit
goto :eof
:conferror
@echo.
@echo Either your build is incomplete or you have a configuration error.
+55
View File
@@ -0,0 +1,55 @@
# Security policies
## Release security
The PCRE2 project provides source-only releases, with no binaries.
These source releases can be downloaded from the
[GitHub Releases](https://github.com/PCRE2Project/pcre2/releases) page. Each
release file is GPG-signed.
* Releases up to and including 10.44 are signed by Philip Hazel (GPG key:
<kbd>45F68D54BBE23FB3039B46E59766E084FB0F43D8</kbd>)
* Releases from 10.45 onwards will be signed by Nicholas Wilson (GPG key:
<kbd>A95536204A3BB489715231282A98E77EB6F24CA8</kbd>, cross-signed by Philip
Hazel's key for release continuity)
From releases 10.45 onwards, the source code will additionally be provided via
Git checkout of the (GPG-signed) release tag.
Please contact the maintainers for any queries about release integrity or the
project's supply-chain.
## Previous vulnerabilities
* CVE-2025-58050 (August 2025). Affects 10.45 only (not earlier), and is fixed
in 10.46.
## Reporting vulnerabilities
The PCRE2 project prioritises security. We appreciate third-party testing and
security research, and would be grateful if you could responsibly disclose your
findings to us. We will make every effort to acknowledge your contributions.
To report a security issue, please use the GitHub Security Advisory
["Report a Vulnerability"](https://github.com/PCRE2Project/pcre2/security/advisories/new)
tab. (Alternatively, if you prefer you may send a GPG-encrypted email to one of
the maintainers.)
### Timeline
As a very small volunteer team, we cannot guarantee rapid response, but would
aim to respond within 1 week, or perhaps 2 during holidays.
### Response procedure
PCRE2 has in the past made at least one rapid release in response to
security incidents.
We have never produced an embargoed release, or provided preferential
access to security fixes to any clients.
We would aim to notify security managers from trusted downstream distributors,
such as major Linux distributions, via the `pcre2-dev` mailing list, by
publicly signalling an upcoming security release before disclosing the
vulnerability publicly, where advance notification is possible.
+107 -17
View File
@@ -12,9 +12,39 @@ pub fn build(b: *std.Build) !void {
const linkage = b.option(std.builtin.LinkMode, "linkage", "whether to statically or dynamically link the library") orelse @as(std.builtin.LinkMode, if (target.result.isGnuLibC()) .dynamic else .static);
const codeUnitWidth = b.option(CodeUnitWidth, "code-unit-width", "Sets the code unit width") orelse .@"8";
const copyFiles = b.addWriteFiles();
_ = copyFiles.addCopyFile(.{ .path = "src/config.h.generic" }, "config.h");
_ = copyFiles.addCopyFile(.{ .path = "src/pcre2.h.generic" }, "pcre2.h");
const pcre2_header_dir = b.addWriteFiles();
const pcre2_header = pcre2_header_dir.addCopyFile(b.path("src/pcre2.h.generic"), "pcre2.h");
const config_header = b.addConfigHeader(
.{
.style = .{ .cmake = b.path("config-cmake.h.in") },
.include_path = "config.h",
},
.{
.HAVE_ASSERT_H = true,
.HAVE_UNISTD_H = (target.result.os.tag != .windows),
.HAVE_WINDOWS_H = (target.result.os.tag == .windows),
.HAVE_MEMMOVE = true,
.HAVE_STRERROR = true,
.SUPPORT_PCRE2_8 = codeUnitWidth == CodeUnitWidth.@"8",
.SUPPORT_PCRE2_16 = codeUnitWidth == CodeUnitWidth.@"16",
.SUPPORT_PCRE2_32 = codeUnitWidth == CodeUnitWidth.@"32",
.SUPPORT_UNICODE = true,
.PCRE2_EXPORT = null,
.PCRE2_LINK_SIZE = 2,
.PCRE2_HEAP_LIMIT = 20000000,
.PCRE2_MATCH_LIMIT = 10000000,
.PCRE2_MATCH_LIMIT_DEPTH = "MATCH_LIMIT",
.PCRE2_MAX_VARLOOKBEHIND = 255,
.NEWLINE_DEFAULT = 2,
.PCRE2_PARENS_NEST_LIMIT = 250,
},
);
// pcre2-8/16/32.so
const lib = std.Build.Step.Compile.create(b, .{
.name = b.fmt("pcre2-{s}", .{@tagName(codeUnitWidth)}),
@@ -27,27 +57,26 @@ pub fn build(b: *std.Build) !void {
.linkage = linkage,
});
lib.defineCMacro("HAVE_CONFIG_H", null);
lib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
if (linkage == .static) {
try lib.root_module.c_macros.append(b.allocator, "-DPCRE2_STATIC");
lib.defineCMacro("PCRE2_STATIC", null);
}
lib.root_module.addCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
lib.addConfigHeader(config_header);
lib.addIncludePath(pcre2_header_dir.getDirectory());
lib.addIncludePath(b.path("src"));
lib.addCSourceFile(.{
.file = copyFiles.addCopyFile(.{ .path = "src/pcre2_chartables.c.dist" }, "pcre2_chartables.c"),
.flags = &.{
"-DHAVE_CONFIG_H",
},
.file = b.addWriteFiles().addCopyFile(b.path("src/pcre2_chartables.c.dist"), "pcre2_chartables.c"),
});
lib.addIncludePath(.{ .path = b.pathFromRoot("src") });
lib.addIncludePath(copyFiles.getDirectory());
lib.addCSourceFiles(.{
.files = &.{
"src/pcre2_auto_possess.c",
"src/pcre2_chkdint.c",
"src/pcre2_compile.c",
"src/pcre2_compile_class.c",
"src/pcre2_config.c",
"src/pcre2_context.c",
"src/pcre2_convert.c",
@@ -55,6 +84,7 @@ pub fn build(b: *std.Build) !void {
"src/pcre2_error.c",
"src/pcre2_extuni.c",
"src/pcre2_find_bracket.c",
"src/pcre2_jit_compile.c",
"src/pcre2_maketables.c",
"src/pcre2_match.c",
"src/pcre2_match_data.c",
@@ -72,12 +102,72 @@ pub fn build(b: *std.Build) !void {
"src/pcre2_valid_utf.c",
"src/pcre2_xclass.c",
},
.flags = &.{
"-DHAVE_CONFIG_H",
"-DPCRE2_STATIC",
},
});
lib.installHeader(.{ .path = "src/pcre2.h.generic" }, "pcre2.h");
lib.installHeader(pcre2_header, "pcre2.h");
b.installArtifact(lib);
// pcre2test
const pcre2test = b.addExecutable(.{
.name = "pcre2test",
.target = target,
.optimize = optimize,
});
// pcre2-posix.so
if (codeUnitWidth == CodeUnitWidth.@"8") {
const posixLib = std.Build.Step.Compile.create(b, .{
.name = "pcre2-posix",
.root_module = .{
.target = target,
.optimize = optimize,
.link_libc = true,
},
.kind = .lib,
.linkage = linkage,
});
posixLib.defineCMacro("HAVE_CONFIG_H", null);
posixLib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth));
if (linkage == .static) {
posixLib.defineCMacro("PCRE2_STATIC", null);
}
posixLib.addConfigHeader(config_header);
posixLib.addIncludePath(pcre2_header_dir.getDirectory());
posixLib.addIncludePath(b.path("src"));
posixLib.addCSourceFiles(.{
.files = &.{
"src/pcre2posix.c",
},
});
posixLib.installHeader(b.path("src/pcre2posix.h"), "pcre2posix.h");
b.installArtifact(posixLib);
pcre2test.linkLibrary(posixLib);
}
// pcre2test (again)
pcre2test.defineCMacro("HAVE_CONFIG_H", null);
pcre2test.addConfigHeader(config_header);
pcre2test.addIncludePath(pcre2_header_dir.getDirectory());
pcre2test.addIncludePath(b.path("src"));
pcre2test.addCSourceFile(.{
.file = b.path("src/pcre2test.c"),
});
pcre2test.linkLibC();
pcre2test.linkLibrary(lib);
b.installArtifact(pcre2test);
}
+1 -1
View File
@@ -7,7 +7,7 @@ are met:
2. Redistributions in binary form must reproduce the copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+8 -11
View File
@@ -2,15 +2,12 @@
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
set(EDITLINE_FOUND TRUE)
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES
editline
edit/readline
)
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
else()
find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline)
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
find_library(EDITLINE_LIBRARY NAMES edit)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
endif()
@@ -1,58 +0,0 @@
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
# This macro is intended to be used in FindXXX.cmake modules files.
# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
# it also sets the <UPPERCASED_NAME>_FOUND variable.
# The package is found if all variables listed are TRUE.
# Example:
#
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
#
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
# independent whether QUIET was used or not.
# If it is found, the location is reported using the VAR1 argument, so
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
# If the second argument is DEFAULT_MSG, the message in the failure case will
# be "Could NOT find LibXml2", if you don't like this message you can specify
# your own custom failure message there.
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
IF (${_NAME}_FIND_REQUIRED)
SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
ELSE (${_NAME}_FIND_REQUIRED)
SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
ENDIF (${_NAME}_FIND_REQUIRED)
ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
SET(_FAIL_MESSAGE "${_FAIL_MSG}")
ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
STRING(TOUPPER ${_NAME} _NAME_UPPER)
SET(${_NAME_UPPER}_FOUND TRUE)
IF(NOT ${_VAR1})
SET(${_NAME_UPPER}_FOUND FALSE)
ENDIF(NOT ${_VAR1})
FOREACH(_CURRENT_VAR ${ARGN})
IF(NOT ${_CURRENT_VAR})
SET(${_NAME_UPPER}_FOUND FALSE)
ENDIF(NOT ${_CURRENT_VAR})
ENDFOREACH(_CURRENT_VAR)
IF (${_NAME_UPPER}_FOUND)
IF (NOT ${_NAME}_FIND_QUIETLY)
MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
ELSE (${_NAME_UPPER}_FOUND)
IF (${_NAME}_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
ELSE (${_NAME}_FIND_REQUIRED)
IF (NOT ${_NAME}_FIND_QUIETLY)
MESSAGE(STATUS "${_FAIL_MESSAGE}")
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
ENDIF (${_NAME}_FIND_REQUIRED)
ENDIF (${_NAME_UPPER}_FOUND)
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
+18 -20
View File
@@ -5,25 +5,23 @@
# GNU Readline library finder
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
set(READLINE_FOUND TRUE)
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
/usr/include/readline
)
# 2008-04-22 The next clause used to read like this:
#
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
# include(FindPackageHandleStandardArgs)
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
#
# I was advised to modify it such that it will find an ncurses library if
# required, but not if one was explicitly given, that is, it allows the
# default to be overridden. PH
else()
find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline)
FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
# 2008-04-22 The next clause used to read like this:
#
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
# include(FindPackageHandleStandardArgs)
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
#
# I was advised to modify it such that it will find an ncurses library if
# required, but not if one was explicitly given, that is, it allows the
# default to be overridden. PH
MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
find_library(READLINE_LIBRARY NAMES readline)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY)
mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY)
endif()
+1 -2
View File
@@ -4,8 +4,7 @@ set(PACKAGE_VERSION_PATCH 0)
set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
# Check whether the requested PACKAGE_FIND_VERSION is compatible
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR
PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
set(PACKAGE_VERSION_COMPATIBLE TRUE)
+68 -48
View File
@@ -30,33 +30,49 @@ set(PCRE2_16BIT_NAME pcre2-16)
set(PCRE2_32BIT_NAME pcre2-32)
set(PCRE2_POSIX_NAME pcre2-posix)
find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
if (PCRE2_USE_STATIC_LIBS)
if (MSVC)
if(PCRE2_USE_STATIC_LIBS)
if(MSVC)
set(PCRE2_8BIT_NAME pcre2-8-static)
set(PCRE2_16BIT_NAME pcre2-16-static)
set(PCRE2_32BIT_NAME pcre2-32-static)
set(PCRE2_POSIX_NAME pcre2-posix-static)
endif ()
endif()
set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
else ()
else()
set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
set(PCRE2_PREFIX "")
endif ()
endif()
set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
set(PCRE2_SUFFIX "-0.dll")
elseif(MSVC)
set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
endif ()
endif ()
find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library")
find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library")
find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library")
find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library")
endif()
endif()
find_library(
PCRE2_8BIT_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX}
DOC "8 bit PCRE2 library"
)
find_library(
PCRE2_16BIT_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX}
DOC "16 bit PCRE2 library"
)
find_library(
PCRE2_32BIT_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX}
DOC "32 bit PCRE2 library"
)
find_library(
PCRE2_POSIX_LIBRARY
NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX}
DOC "8 bit POSIX PCRE2 library"
)
unset(PCRE2_NON_STANDARD_LIB_PREFIX)
unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
unset(PCRE2_8BIT_NAME)
@@ -65,51 +81,55 @@ unset(PCRE2_32BIT_NAME)
unset(PCRE2_POSIX_NAME)
# Set version
if (PCRE2_INCLUDE_DIR)
if(PCRE2_INCLUDE_DIR)
set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
endif ()
endif()
# Which components have been found.
if (PCRE2_8BIT_LIBRARY)
if(PCRE2_8BIT_LIBRARY)
set(PCRE2_8BIT_FOUND TRUE)
endif ()
if (PCRE2_16BIT_LIBRARY)
endif()
if(PCRE2_16BIT_LIBRARY)
set(PCRE2_16BIT_FOUND TRUE)
endif ()
if (PCRE2_32BIT_LIBRARY)
endif()
if(PCRE2_32BIT_LIBRARY)
set(PCRE2_32BIT_FOUND TRUE)
endif ()
if (PCRE2_POSIX_LIBRARY)
endif()
if(PCRE2_POSIX_LIBRARY)
set(PCRE2_POSIX_FOUND TRUE)
endif ()
endif()
# Check if at least one component has been specified.
list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
if (PCRE2_NCOMPONENTS LESS 1)
if(PCRE2_NCOMPONENTS LESS 1)
message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
endif ()
endif()
unset(PCRE2_NCOMPONENTS)
# When POSIX component has been specified make sure that also 8BIT component is specified.
set(PCRE2_8BIT_COMPONENT FALSE)
set(PCRE2_POSIX_COMPONENT FALSE)
foreach(component ${PCRE2_FIND_COMPONENTS})
if (component STREQUAL "8BIT")
if(component STREQUAL "8BIT")
set(PCRE2_8BIT_COMPONENT TRUE)
elseif (component STREQUAL "POSIX")
elseif(component STREQUAL "POSIX")
set(PCRE2_POSIX_COMPONENT TRUE)
endif ()
endif()
endforeach()
if (PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
message(FATAL_ERROR "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component.")
if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
message(
FATAL_ERROR
"The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component."
)
endif()
unset(PCRE2_8BIT_COMPONENT)
unset(PCRE2_POSIX_COMPONENT)
include(FindPackageHandleStandardArgs)
set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
find_package_handle_standard_args(PCRE2
find_package_handle_standard_args(
PCRE2
FOUND_VAR PCRE2_FOUND
REQUIRED_VARS PCRE2_INCLUDE_DIR
HANDLE_COMPONENTS
@@ -118,31 +138,31 @@ find_package_handle_standard_args(PCRE2
)
set(PCRE2_LIBRARIES)
if (PCRE2_FOUND)
if(PCRE2_FOUND)
foreach(component ${PCRE2_FIND_COMPONENTS})
if (PCRE2_USE_STATIC_LIBS)
if(PCRE2_USE_STATIC_LIBS)
add_library(PCRE2::${component} STATIC IMPORTED)
target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
else ()
else()
add_library(PCRE2::${component} SHARED IMPORTED)
endif ()
set_target_properties(PCRE2::${component} PROPERTIES
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
endif()
set_target_properties(
PCRE2::${component}
PROPERTIES
IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
)
if (component STREQUAL "POSIX")
set_target_properties(PCRE2::${component} PROPERTIES
INTERFACE_LINK_LIBRARIES "PCRE2::8BIT"
LINK_LIBRARIES "PCRE2::8BIT"
if(component STREQUAL "POSIX")
set_target_properties(
PCRE2::${component}
PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT"
)
endif ()
endif()
set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
mark_as_advanced(PCRE2_${component}_LIBRARY)
endforeach()
endif ()
endif()
mark_as_advanced(
PCRE2_INCLUDE_DIR
)
mark_as_advanced(PCRE2_INCLUDE_DIR)
+7 -5
View File
@@ -1,6 +1,9 @@
/* config.h for CMake builds */
#cmakedefine HAVE_ASSERT_H 1
#cmakedefine HAVE_BUILTIN_ASSUME 1
#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
#cmakedefine HAVE_BUILTIN_UNREACHABLE 1
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_SYS_STAT_H 1
@@ -17,7 +20,6 @@
#cmakedefine SUPPORT_PCRE2_8 1
#cmakedefine SUPPORT_PCRE2_16 1
#cmakedefine SUPPORT_PCRE2_32 1
#cmakedefine PCRE2_DEBUG 1
#cmakedefine DISABLE_PERCENT_ZT 1
#cmakedefine SUPPORT_LIBBZ2 1
@@ -39,11 +41,11 @@
#cmakedefine HEAP_MATCH_RECURSE 1
#cmakedefine NEVER_BACKSLASH_C 1
#define PCRE2_EXPORT @PCRE2_EXPORT@
#define LINK_SIZE @PCRE2_LINK_SIZE@
#define PCRE2_EXPORT @PCRE2_EXPORT@
#define LINK_SIZE @PCRE2_LINK_SIZE@
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
+63 -25
View File
@@ -9,23 +9,32 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre2_major, [10])
m4_define(pcre2_minor, [44])
m4_define(pcre2_minor, [46])
m4_define(pcre2_prerelease, [])
m4_define(pcre2_date, [2024-06-07])
m4_define(pcre2_date, [2025-08-27])
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre2_8_version, [13:0:13])
m4_define(libpcre2_16_version, [13:0:13])
m4_define(libpcre2_32_version, [13:0:13])
m4_define(libpcre2_posix_version, [3:5:0])
m4_define(libpcre2_8_version, [14:0:14])
m4_define(libpcre2_16_version, [14:0:14])
m4_define(libpcre2_32_version, [14:0:14])
m4_define(libpcre2_posix_version, [3:6:0])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
AC_PREREQ([2.62])
AC_PREREQ([2.60])
AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
AC_CONFIG_SRCDIR([src/pcre2.h.in])
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign])
ifelse(pcre2_prerelease, [-DEV],
[dnl For development builds, ./configure is not checked in to Git, so we are
dnl happy to have it regenerated as needed.
AM_MAINTAINER_MODE([enable])],
[dnl For a release build (or RC), the ./configure script we ship in the
dnl tarball (and check in to the Git tag) should not be regenerated
dnl implicitly. This is important if users want to check out a release tag
dnl using Git.
AM_MAINTAINER_MODE])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_CONFIG_HEADERS(src/config.h)
@@ -73,6 +82,40 @@ AC_SYS_LARGEFILE
PCRE2_VISIBILITY
# Check for Clang __attribute__((uninitialized)) feature
AC_MSG_CHECKING([for __attribute__((uninitialized))])
AC_LANG_PUSH([C])
tmp_CFLAGS=$CFLAGS
if test $WORKING_WERROR -eq 1; then
CFLAGS="$CFLAGS -Werror"
fi
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
[[char buf[128] __attribute__((uninitialized));(void)buf]])],
[pcre2_cc_cv_attribute_uninitialized=yes],
[pcre2_cc_cv_attribute_uninitialized=no])
AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
supports __attribute__((uninitialized))])
fi
CFLAGS=$tmp_CFLAGS
AC_LANG_POP([C])
# Check for the assume() builtin
AC_MSG_CHECKING([for __assume()])
AC_LANG_PUSH([C])
AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[__assume(1)]])],
[pcre2_cc_cv_builtin_assume=yes],
[pcre2_cc_cv_builtin_assume=no])
AC_MSG_RESULT([$pcre2_cc_cv_builtin_assume])
if test "$pcre2_cc_cv_builtin_assume" = yes; then
AC_DEFINE([HAVE_BUILTIN_ASSUME], 1,
[Define this if your compiler provides __assume()])
fi
AC_LANG_POP([C])
# Check for the mul_overflow() builtin
AC_MSG_CHECKING([for __builtin_mul_overflow()])
@@ -95,22 +138,18 @@ if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then
fi
AC_LANG_POP([C])
# Check for Clang __attribute__((uninitialized)) feature
# Check for the unreachable() builtin
AC_MSG_CHECKING([for __attribute__((uninitialized))])
AC_MSG_CHECKING([for __builtin_unreachable()])
AC_LANG_PUSH([C])
tmp_CFLAGS=$CFLAGS
CFLAGS="$CFLAGS -Werror"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
[[char buf[128] __attribute__((uninitialized));(void)buf]])],
[pcre2_cc_cv_attribute_uninitialized=yes],
[pcre2_cc_cv_attribute_uninitialized=no])
AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
supports __attribute__((uninitialized))])
AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])],
[pcre2_cc_cv_builtin_unreachable=yes],
[pcre2_cc_cv_builtin_unreachable=no])
AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable])
if test "$pcre2_cc_cv_builtin_unreachable" = yes; then
AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1,
[Define this if your compiler provides __builtin_unreachable()])
fi
CFLAGS=$tmp_CFLAGS
AC_LANG_POP([C])
# Versioning
@@ -191,7 +230,7 @@ if test "$enable_jit" = "auto"; then
CPPFLAGS=-I$srcdir
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#define SLJIT_CONFIG_AUTO 1
#include "src/sljit/sljitConfigCPU.h"
#include "deps/sljit/sljit_src/sljitConfigCPU.h"
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
#error unsupported
#endif]])], enable_jit=yes, enable_jit=no)
@@ -285,7 +324,7 @@ AC_ARG_ENABLE(never-backslash-C,
# Handle --enable-ebcdic
AC_ARG_ENABLE(ebcdic,
AS_HELP_STRING([--enable-ebcdic],
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
[assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
, enable_ebcdic=no)
# Handle --enable-ebcdic-nl25
@@ -521,7 +560,7 @@ HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */])
# Checks for header files.
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h)
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
@@ -529,7 +568,6 @@ AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes")
AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
+54 -42
View File
@@ -105,6 +105,7 @@ example.
pcre2_chkdint.c
pcre2_chartables.c
pcre2_compile.c
pcre2_compile_class.c
pcre2_config.c
pcre2_context.c
pcre2_convert.c
@@ -138,7 +139,7 @@ example.
Note that you must compile pcre2_jit_compile.c, even if you have not
defined SUPPORT_JIT in src/config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre2_jit_compile.c #includes other files from the sljit subdirectory,
pcre2_jit_compile.c #includes other files from the sljit dependency,
all of whose names begin with "sljit". It also #includes
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
those yourself.
@@ -301,56 +302,66 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the
cache can be deleted by selecting "File > Delete Cache".
If you are using CMake and encounter errors, deleting the CMake cache and
restarting from a fresh build may fix the error. In the CMake GUI, the cache can
be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can
be deleted.
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
2. Unzip (retaining folder structure) the PCRE2 source tree into a source
directory such as C:\pcre2. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre2\pcre2-xx\build.
4. Run cmake-gui from the Shell environment of your build tool, for example,
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
to start Cmake from the Windows Start menu, as this can lead to errors.
4. Run CMake.
5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
- Using the CLI, simply run `cmake ..` inside the `build/` directory. You can
use the `ccmake` ncurses GUI to select and configure PCRE2 features.
6. Hit the "Configure" button.
- Using the CMake GUI:
7. Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
a) Run cmake-gui from the Shell environment of your build tool, for
example, Msys for Msys/MinGW or Visual Studio Command Prompt for
VC/VC++.
8. The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and
build directories, respectively.
9. Hit "Configure" again. The adjacent "Generate" button should now be
active.
c) Press the "Configure" button.
10. Hit "Generate".
d) Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
e) The GUI will then list several configuration options. This is where
you can disable Unicode support or select other PCRE2 optional features.
12. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
f) Press "Configure" again. The adjacent "Generate" button should now be
active.
g) Press "Generate".
5. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
Regardless of build system used, `cmake --build .` will build it.
6. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
Regardless of build system used, `ctest` will run the tests.
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
@@ -425,6 +436,7 @@ OpenVMS. They are in the "vms" directory in the distribution tarball. Please
read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep
programs contain some VMS-specific code.
===========================
Last Updated: 16 April 2024
===========================
==============================
Last updated: 26 December 2024
==============================
+86 -72
View File
@@ -385,7 +385,7 @@ library. They are also documented in the pcre2build man page.
If this is done, when pcre2test's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licenced, so if you distribute a binary of
Note that libreadline is GPL-licensed, so if you distribute a binary of
pcre2test linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
@@ -411,20 +411,19 @@ library. They are also documented in the pcre2build man page.
Instead of %td or %zu, %lu is used, with a cast for size_t values.
. There is a special option called --enable-fuzz-support for use by people who
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
be built, but not installed. This contains a single function called
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
length of the string. When called, this function tries to compile the string
as a pattern, and if that succeeds, to match it. This is done both with no
options and with some random options bits that are generated from the string.
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
be created. This is normally run under valgrind or used when PCRE2 is
compiled with address sanitizing enabled. It calls the fuzzing function and
outputs information about what it is doing. The input strings are specified
by arguments: if an argument starts with "=" the rest of it is a literal
input string. Otherwise, it is assumed to be a file name, and the contents
of the file are the test string.
want to run fuzzing tests on PCRE2. If set, it causes an extra library
called libpcre2-fuzzsupport.a to be built, but not installed. This contains
a single function called LLVMFuzzerTestOneInput() whose arguments are a
pointer to a string and the length of the string. When called, this function
tries to compile the string as a pattern, and if that succeeds, to match
it. This is done both with no options and with some random options bits that
are generated from the string. Setting --enable-fuzz-support also causes an
executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally
run under valgrind or used when PCRE2 is compiled with address sanitizing
enabled. It calls the fuzzing function and outputs information about what it
is doing. The input strings are specified by arguments: if an argument
starts with "=" the rest of it is a literal input string. Otherwise, it is
assumed to be a file name, and the contents of the file are the test string.
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
which caused pcre2_match() to use individual blocks on the heap for
@@ -510,6 +509,7 @@ system. The following are installed (file names are all relative to the
LICENCE
NEWS
README
SECURITY
pcre2.txt (a concatenation of the man(3) pages)
pcre2test.txt the pcre2test man page
pcre2grep.txt the pcre2grep man page
@@ -607,8 +607,9 @@ zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the PrepareRelease script before making a distribution. This
script creates the .txt and HTML forms of the documentation from the man pages.
should first run the maint/PrepareRelease script before making a distribution.
This script creates the .txt and HTML forms of the documentation from the man
pages.
Testing PCRE2
@@ -822,37 +823,38 @@ The distribution should contain the files listed below.
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_chkdint.c )
src/pcre2_compile.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_ucptables.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_chkdint.c )
src/pcre2_compile.c )
src/pcre2_compile_class.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_script_run.c )
src/pcre2_serialize.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substitute.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_ucptables.c )
src/pcre2_valid_utf.c )
src/pcre2_xclass.c )
src/pcre2_printint.c debugging function that is used by pcre2test,
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
@@ -860,13 +862,16 @@ The distribution should contain the files listed below.
src/config.h.in template for config.h, when built by "configure"
src/pcre2.h.in template for pcre2.h when built by "configure"
src/pcre2posix.h header for the external POSIX wrapper API
src/pcre2_compile.h header for internal use
src/pcre2_internal.h header for internal use
src/pcre2_intmodedep.h a mode-specific internal header
src/pcre2_jit_char_inc.h header used by JIT
src/pcre2_jit_neon_inc.h header used by JIT
src/pcre2_jit_simd_inc.h header used by JIT
src/pcre2_ucp.h header for Unicode property handling
src/pcre2_util.h header for internal utils
sljit/* source files for the JIT compiler
deps/sljit/sljit_src/* source files for the JIT compiler
(B) Source files for programs that use PCRE2:
@@ -878,48 +883,49 @@ The distribution should contain the files listed below.
(C) Auxiliary files:
132html script to turn "man" pages into HTML
AUTHORS information about the author of PCRE2
AUTHORS.md information about the authors of PCRE2
ChangeLog log of changes to the code
CleanTxt script to clean nroff output for txt man pages
Detrail script to remove trailing spaces
HACKING some notes about the internals of PCRE2
INSTALL generic installation instructions
LICENCE conditions for the use of PCRE2
LICENCE.md conditions for the use of PCRE2
COPYING the same, using GNU's standard name
SECURITY.md information on reporting vulnerabilities
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools
PrepareRelease script to make preparations for "make dist"
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcre2grep tests
RunTest.bat a Windows batch file for running tests
RunGrepTest.bat a Windows batch file for pcre2grep tests
aclocal.m4 m4 macros (generated by "aclocal")
config.guess ) files used by libtool,
config.sub ) used only when building a shared library
m4/* m4 macros (used by autoconf)
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
depcomp ) script to find program dependencies, generated by
) automake
doc/*.3 man page sources for PCRE2
doc/*.1 man page sources for pcre2grep and pcre2test
doc/index.html.src the base HTML page
doc/html/* HTML documentation
doc/pcre2.txt plain text version of the man pages
doc/pcre2-config.txt plain text documentation of pcre2-config script
doc/pcre2grep.txt plain text documentation of grep utility program
doc/pcre2test.txt plain text documentation of test program
install-sh a shell script for installing files
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
ltmain.sh file used to build a libtool script
missing ) common stub for a few missing GNU programs while
) installing, generated by automake
mkinstalldirs script for making install directories
ar-lib )
config.guess )
config.sub )
depcomp ) helper tools generated by libtool and
compile ) automake, used internally by ./configure
install-sh )
ltmain.sh )
missing )
test-driver )
perltest.sh Script for running a Perl test program
pcre2-config.in source of script which retains PCRE2 information
testdata/testinput* test data for main library tests
@@ -927,12 +933,13 @@ The distribution should contain the files listed below.
testdata/grep* input and output for pcre2grep tests
testdata/* other supporting test files
(D) Auxiliary files for cmake support
(D) Auxiliary files for CMake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindPackageHandleStandardArgs.cmake
cmake/FindEditline.cmake
cmake/FindReadline.cmake
cmake/pcre2-config-version.cmake.in
cmake/pcre2-config.cmake.in
CMakeLists.txt
config-cmake.h.in
@@ -943,14 +950,21 @@ The distribution should contain the files listed below.
src/config.h.generic ) a version of config.h for use in non-"configure"
) environments
(F) Auxiliary files for building PCRE2 under OpenVMS
(F) Auxiliary files for building PCRE2 using other build systems
BUILD.bazel )
MODULE.bazel ) files used by the Bazel build system
WORKSPACE.bazel )
build.zig file used by zig's build system
(G) Auxiliary files for building PCRE2 under OpenVMS
vms/configure.com )
vms/openvms_readme.txt ) These files were contributed by a PCRE2 user.
vms/pcre2.h_patch )
vms/stdint.h )
Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
Last updated: 15 April 2024
==============================
Last updated: 18 December 2024
==============================
+9
View File
@@ -267,6 +267,9 @@ in the library.
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
<td>&nbsp;&nbsp;Set the offset limit</td></tr>
<tr><td><a href="pcre2_set_optimize.html">pcre2_set_optimize</a></td>
<td>&nbsp;&nbsp;Set an optimization directive</td></tr>
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
<td>&nbsp;&nbsp;Set the parentheses nesting limit</td></tr>
@@ -276,6 +279,12 @@ in the library.
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
<td>&nbsp;&nbsp;Obsolete function that (from 10.30 onwards) does nothing</td></tr>
<tr><td><a href="pcre2_set_substitute_callout.html">pcre2_set_substitute_callout</a></td>
<td>&nbsp;&nbsp;Set a substitution callout function</td></tr>
<tr><td><a href="pcre2_set_substitute_case_callout.html">pcre2_set_substitute_case_callout</a></td>
<td>&nbsp;&nbsp;Set a substitution case callout function</td></tr>
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string and do
substitutions</td></tr>
+11 -11
View File
@@ -16,7 +16,7 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
<li><a name="TOC4" href="#SEC4">AUTHOR</a>
<li><a name="TOC4" href="#SEC4">AUTHORS</a>
<li><a name="TOC5" href="#SEC5">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
@@ -190,22 +190,22 @@ listing), and the short pages for individual functions, are concatenated in
In the "man" and HTML formats, there is also a short page for each C library
function, listing its arguments and results.
</P>
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
<br><a name="SEC4" href="#TOC1">AUTHORS</a><br>
<P>
Philip Hazel
<br>
Retired from University Computing Service
<br>
Cambridge, England.
<br>
The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg.
</P>
<P>
Putting an actual email address here is a spam magnet. If you want to email me,
use my two names separated by a dot at gmail.com.
PCRE2 was written by Philip Hazel, of the University Computing Service,
Cambridge, England. Many others have also contributed.
</P>
<P>
To contact the maintainers, please use the GitHub issues tracker or PCRE2
mailing list, as described at the project page:
<a href="https://github.com/PCRE2Project/pcre2">https://github.com/PCRE2Project/pcre2</a>
</P>
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
<P>
Last updated: 27 August 2021
Last updated: 18 December 2024
<br>
Copyright &copy; 1997-2021 University of Cambridge.
<br>
+1
View File
@@ -57,6 +57,7 @@ The primary option bits are:
PCRE2_ALLOW_EMPTY_CLASS Allow empty classes
PCRE2_ALT_BSUX Alternative handling of \u, \U, and \x
PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode
PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax
PCRE2_ALT_VERBNAMES Process backslashes in verb names
PCRE2_AUTO_CALLOUT Compile automatic callouts
PCRE2_CASELESS Do caseless matching
+19 -8
View File
@@ -33,9 +33,18 @@ details are given in the
documentation.
</P>
<P>
The first argument is a pointer that was returned by a successful call to
<b>pcre2_compile()</b>, and the second must contain one or more of the following
bits:
The availability of JIT support can be tested by calling
<b>pcre2_compile_jit()</b> with a single option PCRE2_JIT_TEST_ALLOC (the
code argument is ignored, so a NULL value is accepted). Such a call
returns zero if JIT is available and has a working allocator. Otherwise
it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate
executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not
compiled.
</P>
<P>
Otherwise, the first argument must be a pointer that was returned by a
successful call to <b>pcre2_compile()</b>, and the second must contain one or
more of the following bits:
<pre>
PCRE2_JIT_COMPLETE compile code for full matching
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
@@ -46,11 +55,13 @@ superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF. The old
option is deprecated and may be removed in the future.
</P>
<P>
The yield of the function is 0 for success, or a negative error code otherwise.
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
if an unknown bit is set in <i>options</i>. The function can also return
PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the
compiler, even if it was because of a system security restriction.
The yield of the function when called with any of the three options above is 0
for success, or a negative error code otherwise. In particular,
PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown
bit is set in <i>options</i>. The function can also return PCRE2_ERROR_NOMEMORY
if JIT is unable to allocate executable memory for the compiler, even if it was
because of a system security restriction. In a few cases, the function may
return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features.
</P>
<P>
There is a complete description of the PCRE2 native API in the
@@ -43,6 +43,10 @@ options are:
PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
PCRE2_EXTRA_MATCH_WORD Pattern matches "words"
PCRE2_EXTRA_NEVER_CALLOUT Disallow callouts in pattern
PCRE2_EXTRA_NO_BS0 Disallow \0 (but not \00 or \000)
PCRE2_EXTRA_PYTHON_OCTAL Use Python rules for octal
PCRE2_EXTRA_TURKISH_CASING Use Turkish I case folding
</pre>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
@@ -27,9 +27,9 @@ DESCRIPTION
</b><br>
<P>
This function sets, in a compile context, the maximum size (in bytes) for the
memory needed to hold the compiled version of a pattern that is compiled with
this context. The result is always zero. If a pattern that is passed to
<b>pcre2_compile()</b> with this context needs more memory, an error is
memory needed to hold the compiled version of a pattern that is using this
context. The result is always zero. If a pattern that is passed to
<b>pcre2_compile()</b> referencing this context needs more memory, an error is
generated. The default is the largest number that a PCRE2_SIZE variable can
hold, which is effectively unlimited.
</P>
+57
View File
@@ -0,0 +1,57 @@
<html>
<head>
<title>pcre2_set_optimize specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_optimize man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_optimize(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>directive</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function controls which performance optimizations will be applied
by <b>pcre2_compile()</b>. It can be called multiple times with the same compile
context; the effects are cumulative, with the effects of later calls taking
precedence over earlier ones.
</P>
<P>
The result is zero for success, PCRE2_ERROR_NULL if <i>ccontext</i> is NULL,
or PCRE2_ERROR_BADOPTION if <i>directive</i> is unknown. The latter could be
useful to detect if a certain optimization is available.
</P>
<P>
The list of possible values for the <i>directive</i> parameter are:
<pre>
PCRE2_OPTIMIZATION_FULL Enable all optimizations (default)
PCRE2_OPTIMIZATION_NONE Disable all optimizations
PCRE2_AUTO_POSSESS Enable auto-possessification
PCRE2_AUTO_POSSESS_OFF Disable auto-possessification
PCRE2_DOTSTAR_ANCHOR Enable implicit dotstar anchoring
PCRE2_DOTSTAR_ANCHOR_OFF Disable implicit dotstar anchoring
PCRE2_START_OPTIMIZE Enable start-up optimizations at match time
PCRE2_START_OPTIMIZE_OFF Disable start-up optimizations at match time
</pre>
There is a complete description of the PCRE2 native API, including detailed
descriptions <i>directive</i> parameter values in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
@@ -20,7 +20,7 @@ SYNOPSIS
</P>
<P>
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *),</b>
<b> int (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
<b> void *<i>callout_data</i>);</b>
</P>
<br><b>
@@ -0,0 +1,45 @@
<html>
<head>
<title>pcre2_set_substitute_case_callout specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_substitute_case_callout man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
<b> int, void *),</b>
<b> void *<i>callout_data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function sets the substitute case callout fields in a match context (the
first argument). The second argument specifies a callout function, and the third
argument is an opaque data item that is passed to it. The result of this
function is always zero.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page and a description of the POSIX API in the
<a href="pcre2posix.html"><b>pcre2posix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
+411 -101
View File
@@ -179,6 +179,10 @@ document for an overview of all the PCRE2 documentation.
<br>
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
<br>
<br>
<b>int pcre2_set_optimize(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>directive</i>);</b>
</P>
<br><a name="SEC5" href="#TOC1">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a><br>
<P>
@@ -203,6 +207,13 @@ document for an overview of all the PCRE2 documentation.
<b> void *<i>callout_data</i>);</b>
<br>
<br>
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
<b> int, void *),</b>
<b> void *<i>callout_data</i>);</b>
<br>
<br>
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE <i>value</i>);</b>
<br>
@@ -808,6 +819,7 @@ following compile-time parameters:
The compile time nested parentheses limit
The maximum length of the pattern string
The extra options bits (none set by default)
Which performance optimizations the compiler should apply
</pre>
A compile context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of
@@ -952,6 +964,110 @@ The first argument to the callout function gives the current depth of
nesting, and the second is user data that is set up by the last argument of
<b>pcre2_set_compile_recursion_guard()</b>. The callout function should return
zero if all is well, or non-zero to force an error.
<br>
<br>
<b>int pcre2_set_optimize(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>directive</i>);</b>
<br>
<br>
PCRE2 can apply various performance optimizations during compilation, in order
to make matching faster. For example, the compiler might convert some regex
constructs into an equivalent construct which <b>pcre2_match()</b> can execute
faster. By default, all available optimizations are enabled. However, in rare
cases, one might wish to disable specific optimizations. For example, if it is
known that some optimizations cannot benefit a certain regex, it might be
desirable to disable them, in order to speed up compilation.
</P>
<P>
The permitted values of <i>directive</i> are as follows:
<pre>
PCRE2_OPTIMIZATION_FULL
</pre>
Enable all optional performance optimizations. This is the default value.
<pre>
PCRE2_OPTIMIZATION_NONE
</pre>
Disable all optional performance optimizations.
<pre>
PCRE2_AUTO_POSSESS
PCRE2_AUTO_POSSESS_OFF
</pre>
Enable/disable "auto-possessification" of variable quantifiers such as * and +.
This optimization, for example, turns a+b into a++b in order to avoid
backtracks into a+ that can never be successful. However, if callouts are in
use, auto-possessification means that some callouts are never taken. You can
disable this optimization if you want the matching functions to do a full,
unoptimized search and run all the callouts.
<pre>
PCRE2_DOTSTAR_ANCHOR
PCRE2_DOTSTAR_ANCHOR_OFF
</pre>
Enable/disable an optimization that is applied when .* is the first significant
item in a top-level branch of a pattern, and all the other branches also start
with .* or with \A or \G or ^. Such a pattern is automatically anchored if
PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set for any
^ items. Otherwise, the fact that any match must start either at the start of
the subject or following a newline is remembered. Like other optimizations,
this can cause callouts to be skipped.
</P>
<P>
Dotstar anchor optimization is automatically disabled for .* if it is inside an
atomic group or a capture group that is the subject of a backreference, or if
the pattern contains (*PRUNE) or (*SKIP).
<pre>
PCRE2_START_OPTIMIZE
PCRE2_START_OPTIMIZE_OFF
</pre>
Enable/disable optimizations which cause matching functions to scan the subject
string for specific code unit values before attempting a match. For example, if
it is known that an unanchored match must start with a specific value, the
matching code searches the subject for that value, and fails immediately if it
cannot find it, without actually running the main matching function. This means
that a special item such as (*COMMIT) at the start of a pattern is not
considered until after a suitable starting point for the match has been found.
Also, when callouts or (*MARK) items are in use, these "start-up" optimizations
can cause them to be skipped if the pattern is never actually used. The start-up
optimizations are in effect a pre-scan of the subject that takes place before
the pattern is run.
</P>
<P>
Disabling start-up optimizations ensures that in cases where the result is "no
match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) are
considered at every possible starting position in the subject string.
</P>
<P>
Disabling start-up optimizations may change the outcome of a matching operation.
Consider the pattern
<pre>
(*COMMIT)ABC
</pre>
When this is compiled, PCRE2 records the fact that a match must start with the
character "A". Suppose the subject string is "DEFABC". The start-up
optimization scans along the subject, finds "A" and runs the first match
attempt from there. The (*COMMIT) item means that the pattern must match the
current starting position, which in this case, it does. However, if the same
match is run without start-up optimizations, the initial scan along the subject
string does not happen. The first match attempt is run starting from "D" and
when this fails, (*COMMIT) prevents any further matches being tried, so the
overall result is "no match".
</P>
<P>
Another start-up optimization makes use of a minimum length for a matching
subject, which is recorded when possible. Consider the pattern
<pre>
(*MARK:1)B(*MARK:2)(X|Y)
</pre>
The minimum length for a match is two characters. If the subject is "XXBB", the
"starting character" optimization skips "XX", then tries to match "BB", which
is long enough. In the process, (*MARK:2) is encountered and remembered. When
the match attempt fails, the next "B" is found, but there is only one character
left, so there are no more attempts, and "no match" is returned with the "last
mark seen" set to "2". Without start-up optimizations, however, matches are
tried at every possible starting position, including at the end of the subject,
where (*MARK:1) is encountered, but there is no "B", so the "last mark seen"
that is returned is "1". In this case, the optimizations do not affect the
overall match result, which is still "no match", but they do affect the
auxiliary information that is returned.
<a name="matchcontext"></a></P>
<br><b>
The match context
@@ -1011,6 +1127,19 @@ made by <b>pcre2_substitute()</b>. Details are given in the section entitled
<a href="#substitutions">below.</a>
<br>
<br>
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
<b> int, void *),</b>
<b> void *<i>callout_data</i>);</b>
<br>
<br>
This sets up a callout function for PCRE2 to call when performing case
transformations inside <b>pcre2_substitute()</b>. Details are given in the
section entitled "Creating a new string with substitutions"
<a href="#substitutions">below.</a>
<br>
<br>
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE <i>value</i>);</b>
<br>
@@ -1228,7 +1357,10 @@ for the amount of heap memory used by <b>pcre2_match()</b> or
The output is a uint32_t integer that is set to one if support for just-in-time
compiling is included in the library; otherwise it is set to zero. Note that
having the support in the library does not guarantee that JIT will be used for
any given match. See the
any given match, and neither does it guarantee that JIT will actually be able
to function, because it may not be able to allocate executable memory in some
environments. There is a special call to <b>pcre2_jit_compile()</b> that can be
used to check this. See the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
documentation for more details.
<pre>
@@ -1431,7 +1563,7 @@ respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
error has occurred.
</P>
<P>
There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
There are over 100 positive error codes that <b>pcre2_compile()</b> may return
if it finds an error in the pattern. There are also some negative error codes
that are used for invalid UTF strings when validity checking is in force. These
are the same as given by <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, and
@@ -1539,6 +1671,16 @@ after any internal newline. However, it does not match after a newline at the
end of the subject, for compatibility with Perl. If you want a multiline
circumflex also to match after a terminating newline, you must set
PCRE2_ALT_CIRCUMFLEX.
<pre>
PCRE2_ALT_EXTENDED_CLASS
</pre>
Alters the parsing of character classes to follow the extended syntax
described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no impact
on the behaviour of the Perl-specific "(?[...])" syntax for extended classes,
but instead enables the alternative syntax of extended class behaviour inside
ordinary "[...]" character classes. See the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation for details of the character classes supported.
<pre>
PCRE2_ALT_VERBNAMES
</pre>
@@ -1569,16 +1711,31 @@ letters in the subject. It is equivalent to Perl's /i option, and it can be
changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
PCRE2_UCP is set, Unicode properties are used for all characters with more than
one other case, and for all characters whose code points are greater than
U+007F. Note that there are two ASCII characters, K and S, that, in addition to
U+007F.
</P>
<P>
Note that there are two ASCII characters, K and S, that, in addition to
their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin
sign) and U+017F (long S) respectively. If you do not want this case
equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT.
</P>
<P>
One language family, Turkish and Azeri, has its own case-insensitivity rules,
which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the
behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131
(small dotless i) characters.
</P>
<P>
For lower valued characters with only one other case, a lookup table is used
for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used
for all code points less than 256, and higher code points (available only in
16-bit or 32-bit mode) are treated as not having another case.
</P>
<P>
From release 10.45 PCRE2_CASELESS also affects what some of the letter-related
Unicode property escapes (\p and \P) match. The properties Lu (upper case
letter), Ll (lower case letter), and Lt (title case letter) are all treated as
LC (cased letter) when PCRE2_CASELESS is set.
<pre>
PCRE2_DOLLAR_ENDONLY
</pre>
@@ -1775,7 +1932,7 @@ This option locks out the use of Unicode properties for handling \B, \b, \D,
for the PCRE2_UCP option below. In particular, it prevents the creator of the
pattern from enabling this facility by starting the pattern with (*UCP). This
option may be useful in applications that process patterns from external
sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
sources. The option combination PCRE2_UCP and PCRE2_NEVER_UCP causes an error.
<pre>
PCRE2_NEVER_UTF
</pre>
@@ -1798,85 +1955,57 @@ though the reference can be by name or by number.
<pre>
PCRE2_NO_AUTO_POSSESS
</pre>
If this option is set, it disables "auto-possessification", which is an
optimization that, for example, turns a+b into a++b in order to avoid
If this (deprecated) option is set, it disables "auto-possessification", which
is an optimization that, for example, turns a+b into a++b in order to avoid
backtracks into a+ that can never be successful. However, if callouts are in
use, auto-possessification means that some callouts are never taken. You can
set this option if you want the matching functions to do a full unoptimized
search and run all the callouts, but it is mainly provided for testing
purposes.
</P>
<P>
If a compile context is available, it is recommended to use
<b>pcre2_set_optimize()</b> with the <i>directive</i> PCRE2_AUTO_POSSESS_OFF rather
than the compile option PCRE2_NO_AUTO_POSSESS. Note that PCRE2_NO_AUTO_POSSESS
takes precedence over the <b>pcre2_set_optimize()</b> optimization directives
PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF.
<pre>
PCRE2_NO_DOTSTAR_ANCHOR
</pre>
If this option is set, it disables an optimization that is applied when .* is
the first significant item in a top-level branch of a pattern, and all the
other branches also start with .* or with \A or \G or ^. The optimization is
automatically disabled for .* if it is inside an atomic group or a capture
group that is the subject of a backreference, or if the pattern contains
(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is
automatically anchored if PCRE2_DOTALL is set for all the .* items and
PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match
must start either at the start of the subject or following a newline is
If this (deprecated) option is set, it disables an optimization that is applied
when .* is the first significant item in a top-level branch of a pattern, and
all the other branches also start with .* or with \A or \G or ^. The
optimization is automatically disabled for .* if it is inside an atomic group
or a capture group that is the subject of a backreference, or if the pattern
contains (*PRUNE) or (*SKIP). When the optimization is not disabled, such a
pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items
and PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any
match must start either at the start of the subject or following a newline is
remembered. Like other optimizations, this can cause callouts to be skipped.
(If a compile context is available, it is recommended to use
<b>pcre2_set_optimize()</b> with the <i>directive</i> PCRE2_DOTSTAR_ANCHOR_OFF
instead.)
<pre>
PCRE2_NO_START_OPTIMIZE
</pre>
This is an option whose main effect is at matching time. It does not change
what <b>pcre2_compile()</b> generates, but it does affect the output of the JIT
compiler.
compiler. Setting this option is equivalent to calling <b>pcre2_set_optimize()</b>
with the <i>directive</i> parameter set to PCRE2_START_OPTIMIZE_OFF.
</P>
<P>
There are a number of optimizations that may occur at the start of a match, in
order to speed up the process. For example, if it is known that an unanchored
match must start with a specific code unit value, the matching code searches
the subject for that value, and fails immediately if it cannot find it, without
actually running the main matching function. This means that a special item
such as (*COMMIT) at the start of a pattern is not considered until after a
suitable starting point for the match has been found. Also, when callouts or
(*MARK) items are in use, these "start-up" optimizations can cause them to be
skipped if the pattern is never actually used. The start-up optimizations are
actually running the main matching function. The start-up optimizations are
in effect a pre-scan of the subject that takes place before the pattern is run.
</P>
<P>
The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
possibly causing performance to suffer, but ensuring that in cases where the
result is "no match", the callouts do occur, and that items such as (*COMMIT)
and (*MARK) are considered at every possible starting position in the subject
string.
</P>
<P>
Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation.
Consider the pattern
<pre>
(*COMMIT)ABC
</pre>
When this is compiled, PCRE2 records the fact that a match must start with the
character "A". Suppose the subject string is "DEFABC". The start-up
optimization scans along the subject, finds "A" and runs the first match
attempt from there. The (*COMMIT) item means that the pattern must match the
current starting position, which in this case, it does. However, if the same
match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
subject string does not happen. The first match attempt is run starting from
"D" and when this fails, (*COMMIT) prevents any further matches being tried, so
the overall result is "no match".
</P>
<P>
As another start-up optimization makes use of a minimum length for a matching
subject, which is recorded when possible. Consider the pattern
<pre>
(*MARK:1)B(*MARK:2)(X|Y)
</pre>
The minimum length for a match is two characters. If the subject is "XXBB", the
"starting character" optimization skips "XX", then tries to match "BB", which
is long enough. In the process, (*MARK:2) is encountered and remembered. When
the match attempt fails, the next "B" is found, but there is only one character
left, so there are no more attempts, and "no match" is returned with the "last
mark seen" set to "2". If NO_START_OPTIMIZE is set, however, matches are tried
at every possible starting position, including at the end of the subject, where
(*MARK:1) is encountered, but there is no "B", so the "last mark seen" that is
returned is "1". In this case, the optimizations do not affect the overall
match result, which is still "no match", but they do affect the auxiliary
information that is returned.
Disabling the start-up optimizations may cause performance to suffer. However,
this may be desirable for patterns which contain callouts or items such as
(*COMMIT) and (*MARK). See the above description of PCRE2_START_OPTIMIZE_OFF
for further details.
<pre>
PCRE2_NO_UTF_CHECK
</pre>
@@ -1931,9 +2060,16 @@ The second effect of PCRE2_UCP is to force the use of Unicode properties for
upper/lower casing operations, even when PCRE2_UTF is not set. This makes it
possible to process strings in the 16-bit UCS-2 code. This option is available
only if PCRE2 has been compiled with Unicode support (which is the default).
The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless
</P>
<P>
The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless
matching such that ASCII characters match only ASCII characters and non-ASCII
characters match only non-ASCII characters.
characters match only non-ASCII characters. The PCRE2_EXTRA_TURKISH_CASING option
(see above) alters the matching of the 'i' characters to follow their behaviour
in Turkish and Azeri languages. For further details on
PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EXTRA_TURKISH_CASING, see the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
page.
<pre>
PCRE2_UNGREEDY
</pre>
@@ -2070,7 +2206,8 @@ characters. The ASCII letter S is case-equivalent to U+017f (long S) and the
ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables
recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a
caseless match, both characters must either be ASCII or non-ASCII. The option
can be changed with a pattern by the (?r) option setting.
can be changed within a pattern by the (*CASELESS_RESTRICT) or (?r) option
settings.
<pre>
PCRE2_EXTRA_ESCAPED_CR_IS_LF
</pre>
@@ -2097,6 +2234,34 @@ and the end. This is achieved by automatically inserting the code for "\b(?:"
at the start of the compiled pattern and ")\b" at the end. The option may be
used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is
also set.
<pre>
PCRE2_EXTRA_NO_BS0
</pre>
If this option is set (note that its final character is the digit 0) it locks
out the use of the sequence \0 unless at least one more octal digit follows.
<pre>
PCRE2_EXTRA_PYTHON_OCTAL
</pre>
If this option is set, PCRE2 follows Python's rules for interpreting octal
escape sequences. The rules for handling sequences such as \14, which could
be an octal number or a back reference are different. Details are given in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation.
<pre>
PCRE2_EXTRA_NEVER_CALLOUT
</pre>
If this option is set, PCRE2 treats callouts in the pattern as a syntax error,
returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if the application
knows that a callout will not be provided to <b>pcre2_match()</b>, so that
callouts in the pattern are not silently ignored.
<pre>
PCRE2_EXTRA_TURKISH_CASING
</pre>
This option alters case-equivalence of the 'i' letters to follow the
alphabet used by Turkish and Azeri languages. The option can be changed within
a pattern by the (*TURKISH_CASING) start-of-pattern setting. Either the UTF or
UCP options must be set. In the 8-bit library, UTF must be set. This option
cannot be combined with PCRE2_EXTRA_CASELESS_RESTRICT.
<a name="jitcompiling"></a></P>
<br><a name="SEC21" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
<P>
@@ -2303,6 +2468,7 @@ following are true:
PCRE2_DOTALL is in force for .*
Neither (*PRUNE) nor (*SKIP) appears in the pattern
PCRE2_NO_DOTSTAR_ANCHOR is not set
Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF
</pre>
For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the
options returned for PCRE2_INFO_ALLOPTIONS.
@@ -3646,9 +3812,10 @@ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
this option is set, however, <b>pcre2_substitute()</b> continues to go through
the motions of matching and substituting (without, of course, writing anything)
in order to compute the size of buffer that is needed. This value is passed
back via the <i>outlengthptr</i> variable, with the result of the function still
being PCRE2_ERROR_NOMEMORY.
in order to compute the size of buffer that is needed, which will include the
extra space for the terminating NUL. This value is passed back via the
<i>outlengthptr</i> variable, with the result of the function still being
PCRE2_ERROR_NOMEMORY.
</P>
<P>
Passing a buffer size of zero is a permitted way of finding out how much memory
@@ -3667,18 +3834,26 @@ If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
in any way. By default, however, a dollar character is an escape character that
can specify the insertion of characters from capture groups and names from
(*MARK) or other control verbs in the pattern. Dollar is the only escape
character (backslash is treated as literal). The following forms are always
character (backslash is treated as literal). The following forms are
recognized:
<pre>
$$ insert a dollar character
$&#60;n&#62; or ${&#60;n&#62;} insert the contents of group &#60;n&#62;
$n or ${n} insert the contents of group <i>n</i>
$0 or $& insert the entire matched substring
$` insert the substring that precedes the match
$' insert the substring that follows the match
$_ insert the entire input string
$*MARK or ${*MARK} insert a control verb name
</pre>
Either a group number or a group name can be given for &#60;n&#62;. Curly brackets are
required only if the following character would be interpreted as part of the
number or name. The number may be zero to include the entire matched string.
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
string "+$1$0$1+", the result is "=+babcb+=".
Either a group number or a group name can be given for <i>n</i>, for example $2 or
$NAME. Curly brackets are required only if the following character would be
interpreted as part of the number or name. The number may be zero to include
the entire matched string. For example, if the pattern a(b)c is matched with
"=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=".
</P>
<P>
The JavaScript form $&#60;name&#62;, where the angle brackets are part of the syntax,
is also recognized for group names, but not for group numbers or *MARK.
</P>
<P>
$*MARK inserts the name from the last encountered backtracking control verb on
@@ -3732,28 +3907,53 @@ not influence the extended substitution syntax described below.
PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
replacement string. Without this option, only the dollar character is special,
and only the group insertion forms listed above are valid. When
PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
PCRE2_SUBSTITUTE_EXTENDED is set, several things change:
</P>
<P>
Firstly, backslash in a replacement string is interpreted as an escape
character. The usual forms such as \n or \x{ddd} can be used to specify
particular character codes, and backslash followed by any non-alphanumeric
character quotes that character. Extended quoting can be coded using \Q...\E,
exactly as in pattern strings.
character. The usual forms such as \x{ddd} can be used to specify particular
character codes, and backslash followed by any non-alphanumeric character
quotes that character. Extended quoting can be coded using \Q...\E, exactly
as in pattern strings. The escapes \b and \v are interpreted as the
characters backspace and vertical tab, respectively.
</P>
<P>
The interpretation of backslash followed by one or more digits is the same as
in a pattern, which in Perl has some ambiguities. Details are given in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
page.
</P>
<P>
The Python form \g&#60;n&#62;, where the angle brackets are part of the syntax and <i>n</i>
is either a group name or number, is recognized as an altertive way of
inserting the contents of a group, for example \g&#60;3&#62;.
</P>
<P>
There are also four escape sequences for forcing the case of inserted letters.
The insertion mechanism has three states: no case forcing, force upper case,
and force lower case. The escape sequences change the current state: \U and
\L change to upper or lower case forcing, respectively, and \E (when not
terminating a \Q quoted sequence) reverts to no case forcing. The sequences
\u and \l force the next character (if it is a letter) to upper or lower
case, respectively, and then the state automatically reverts to no case
forcing. Case forcing applies to all inserted characters, including those from
capture groups and letters within \Q...\E quoted sequences. If either
PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
Case forcing applies to all inserted characters, including those from capture
groups and letters within \Q...\E quoted sequences. The insertion mechanism
has three states: no case forcing, force upper case, and force lower case. The
escape sequences change the current state: \U and \L change to upper or lower
case forcing, respectively, and \E (when not terminating a \Q quoted
sequence) reverts to no case forcing. The sequences \u and \l force the next
character (if it is a letter) to upper or lower case, respectively, and then
the state automatically reverts to no case forcing.
</P>
<P>
However, if \u is immediately followed by \L or \l is immediately followed
by \U, the next character's case is forced by the first escape sequence, and
subsequent characters by the second. This provides a "title casing" facility
that can be applied to group captures. For example, if group 1 has captured
"heLLo", the replacement string "\u\L$1" becomes "Hello".
</P>
<P>
If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
properties are used for case forcing characters whose code points are greater
than 127.
than 127. However, only simple case folding, as determined by the Unicode file
<b>CaseFolding.txt</b> is supported. PCRE2 does not support language-specific
special casing rules such as using different lower case Greek sigmas in the
middle and ends of words (as defined in the Unicode file
<b>SpecialCasing.txt</b>).
</P>
<P>
Note that case forcing sequences such as \U...\E do not nest. For example,
@@ -3762,20 +3962,20 @@ effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
not apply to replacement strings.
</P>
<P>
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
flexibility to capture group substitution. The syntax is similar to that used
by Bash:
<pre>
${&#60;n&#62;:-&#60;string&#62;}
${&#60;n&#62;:+&#60;string1&#62;:&#60;string2&#62;}
${n:-string}
${n:+string1:string2}
</pre>
As before, &#60;n&#62; may be a group number or a name. The first form specifies a
default value. If group &#60;n&#62; is set, its value is inserted; if not, &#60;string&#62; is
expanded and the result inserted. The second form specifies strings that are
expanded and inserted when group &#60;n&#62; is set or unset, respectively. The first
form is just a convenient shorthand for
As in the simple case, <i>n</i> may be a group number or a name. The first form
specifies a default value. If group <i>n</i> is set, its value is inserted; if
not, the string is expanded and the result inserted. The second form specifies
strings that are expanded and inserted when group <i>n</i> is set or unset,
respectively. The first form is just a convenient shorthand for
<pre>
${&#60;n&#62;:+${&#60;n&#62;}:&#60;string&#62;}
${n:+${n}:string}
</pre>
Backslash can be used to escape colons and closing curly brackets in the
replacement strings. A change of the case forcing state within a replacement
@@ -3852,9 +4052,18 @@ Substitution callouts
The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
callout function for <b>pcre2_substitute()</b>. This information is passed in
a match context. The callout function is called after each substitution has
been processed, but it can cause the replacement not to happen. The callout
function is not called for simulated substitutions that happen as a result of
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
been processed, but it can cause the replacement not to happen.
</P>
<P>
The callout function is not called for simulated substitutions that happen as a
result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In this mode, when
substitution processing exceeds the buffer space provided by the caller,
processing continues by counting code units. The simulation is unable to
populate the callout block, and so the simulation is pessimistic about the
required buffer size. Whichever is larger of accepted or rejected substitution
is reported as the required size. Therefore, the returned buffer length may be
an overestimate (without a substitution callout, it is normally an exact
measurement).
</P>
<P>
The first argument of the callout function is a pointer to a substitute callout
@@ -3903,6 +4112,107 @@ PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the
output and the call to <b>pcre2_substitute()</b> exits, returning the number of
matches so far.
</P>
<br><b>
Substitution case callouts
</b><br>
<P>
<b>int pcre2_set_substitute_case_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> PCRE2_SIZE (*<i>callout_function</i>)(PCRE2_SPTR, PCRE2_SIZE,</b>
<b> PCRE2_UCHAR *, PCRE2_SIZE,</b>
<b> int, void *),</b>
<b> void *<i>callout_data</i>);</b>
<br>
<br>
The <b>pcre2_set_substitution_case_callout()</b> function can be used to specify
a callout function for <b>pcre2_substitute()</b> to use when performing case
transformations. This does not affect any case insensitivity behaviour when
performing a match, but only the user-visible transformations performed when
processing a substitution such as:
<pre>
pcre2_substitute(..., "\\U$1", ...)
</PRE>
</P>
<P>
The default case transformations applied by PCRE2 are reasonably complete, and,
in UTF or UCP mode, perform the simple locale-invariant case transformations as
specified by Unicode. This is suitable for the internal (invisible)
case-equivalence procedures used during pattern matching, but an application
may wish to use more sophisticated locale-aware processing for the user-visible
substitution transformations.
</P>
<P>
One example implementation of the <i>callout_function</i> using the ICU
library would be:
<br>
<br>
<pre>
PCRE2_SIZE
icu_case_callout(
PCRE2_SPTR input, PCRE2_SIZE input_len,
PCRE2_UCHAR *output, PCRE2_SIZE output_cap,
int to_case, void *data_ptr)
{
UErrorCode err = U_ZERO_ERROR;
int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER
? u_strToLower(output, output_cap, input, input_len, NULL, &err)
: to_case == PCRE2_SUBSTITUTE_CASE_UPPER
? u_strToUpper(output, output_cap, input, input_len, NULL, &err)
: u_strToTitle(output, output_cap, input, input_len, &first_char_only,
NULL, &err);
if (U_FAILURE(err)) return (~(PCRE2_SIZE)0);
return r;
}
</PRE>
</P>
<P>
The first and second arguments of the case callout function are the Unicode
string to transform.
</P>
<P>
The third and fourth arguments are the output buffer and its capacity.
</P>
<P>
The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER,
PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST.
PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
callout to indicate that the case of the entire callout input should be
case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
only the first character or glyph should be transformed to Unicode titlecase
and the rest to Unicode lowercase (note that titlecasing sometimes uses Unicode
properties to titlecase each word in a string; but PCRE2 is requesting that only
the single leading character is to be titlecased).
</P>
<P>
The sixth argument is the <i>callout_data</i> supplied to
<b>pcre2_set_substitute_case_callout()</b>.
</P>
<P>
The resulting string in the destination buffer may be larger or smaller than the
input, if the casing rules merge or split characters. The return value is the
length required for the output string. If a buffer of sufficient size was
provided to the callout, then the result must be written to the buffer and the
number of code units returned. If the result does not fit in the provided
buffer, then the required capacity must be returned and PCRE2 will not make use
of the output buffer. PCRE2 provides input and output buffers which overlap, so
the callout must support this by suitable internal buffering.
</P>
<P>
Alternatively, if the callout wishes to indicate an error, then it may return
(~(PCRE2_SIZE)0). In this case pcre2_substitute() will immediately fail with
error PCRE2_ERROR_REPLACECASE.
</P>
<P>
When a case callout is combined with the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
option, there are situations when pcre2_substitute() will return an
underestimate of the required buffer size. If you call pcre2_substitute() once
with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the input buffer is too small for
the replacement string to be constructed, then instead of calling the case
callout, pcre2_substitute() will make an estimate of the required buffer size.
The second call should also pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that
second call is not guaranteed to succeed either, if the case callout requires
more buffer space than expected. The caller must make repeated attempts in a
loop.
</P>
<br><a name="SEC38" href="#TOC1">DUPLICATE CAPTURE GROUP NAMES</a><br>
<P>
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
@@ -4177,7 +4487,7 @@ Cambridge, England.
</P>
<br><a name="SEC43" href="#TOC1">REVISION</a><br>
<P>
Last updated: 24 April 2024
Last updated: 26 December 2024
<br>
Copyright &copy; 1997-2024 University of Cambridge.
<br>
+1 -1
View File
@@ -643,7 +643,7 @@ Cambridge, England.
</P>
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
<P>
Last updated: 15 April 2024
Last updated: 16 April 2024
<br>
Copyright &copy; 1997-2024 University of Cambridge.
<br>
+33 -10
View File
@@ -71,7 +71,7 @@ interprets them.
7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
built with Unicode support (the default). The properties that can be tested
with \p and \P are limited to the general category properties such as Lu and
Nd, the derived properties Any and LC (synonym L&), script names such as Greek
Nd, the derived properties Any and Lc (synonym L&), script names such as Greek
or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and
Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See
the
@@ -99,7 +99,12 @@ following examples:
\Q\\E \ \\E
</pre>
The \Q...\E sequence is recognized both inside and outside character classes
by both PCRE2 and Perl.
by both PCRE2 and Perl. Another difference from Perl is that any appearance of
\Q or \E inside what might otherwise be a quantifier causes PCRE2 not to
recognize the sequence as a quantifier. Perl recognizes a quantifier if
(redundantly) either of the numbers is inside \Q...\E, but not if the
separating comma is. When not recognized as a quantifier a sequence such as
{\Q1\E,2} is treated as the literal string "{1,2}".
</P>
<P>
9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
@@ -120,7 +125,9 @@ confined to that group; it does not extend to the surrounding pattern. This is
not always the case in Perl. In particular, if (*THEN) is present in a group
that is called as a subroutine, its action is limited to that group, even if
the group does not contain any | characters. Note that such groups are
processed as anchored at the point where they are tested.
processed as anchored at the point where they are tested. PCRE2 also confines
all control verbs within atomic assertions, again including (*THEN) in
assertions with only one branch.
</P>
<P>
12. If a pattern contains more than one backtracking control verb, the first
@@ -159,11 +166,11 @@ warning features, so it gives an error in these cases because they are almost
certainly user mistakes.
</P>
<P>
17. In PCRE2, the upper/lower case character properties Lu and Ll are not
affected when case-independent matching is specified. For example, \p{Lu}
always matches an upper case letter. I think Perl has changed in this respect;
in the release at the time of writing (5.38), \p{Lu} and \p{Ll} match all
letters, regardless of case, when case independence is specified.
17. In PCRE2, until release 10.45, the upper/lower case character properties Lu
and Ll were not affected when case-independent matching was specified. Perl has
changed in this respect, and PCRE2 has now changed to match. When caseless
matching is in force, Lu, Ll, and Lt (title case) are all treated as Lc (cased
letter).
</P>
<P>
18. From release 5.32.0, Perl locks out the use of \K in lookaround
@@ -231,6 +238,10 @@ and condition references such as (?(4)...). PCRE2 supports relative group
numbers such as +2 and -4 in all three cases. Perl supports both plus and minus
for subroutine calls, but only minus for back references, and no relative
numbering at all for conditions.
<br>
<br>
(m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 extension
that is not available in Perl.
</P>
<P>
20. Perl has different limits than PCRE2. See the
@@ -252,6 +263,18 @@ handled by PCRE2, either by the interpreter or the JIT. An example is
/(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number of repeated
"abcd" substrings at the end of the subject.
</P>
<P>
23. Both PCRE2 and Perl error when \x{ escapes are invalid, but Perl tries to
recover and prints a warning if the problem was that an invalid hexadecimal
digit was found, since PCRE2 doesn't have warnings it returns an error instead.
Additionally, Perl accepts \x{} and generates NUL unlike PCRE2.
</P>
<P>
24. From release 10.45, PCRE2 gives an error if \x is not followed by a
hexadecimal digit or a curly bracket. It used to interpret this as the NUL
character. Perl still generates NUL, but warns when in warning mode in most
cases.
</P>
<br><b>
AUTHOR
</b><br>
@@ -267,9 +290,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
Last updated: 30 November 2023
Last updated: 02 October 2024
<br>
Copyright &copy; 1997-2023 University of Cambridge.
Copyright &copy; 1997-2024 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
+1 -1
View File
@@ -182,7 +182,7 @@ Cambridge, England.
</P>
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
<P>
Last updated: 28 June 2018
Last updated: 14 November 2023
<br>
Copyright &copy; 1997-2018 University of Cambridge.
<br>
+23 -13
View File
@@ -391,9 +391,10 @@ Read patterns from the file, one per line. As is the case with patterns on the
command line, no delimiters should be used. What constitutes a newline when
reading the file is the operating system's default interpretation of \n. The
<b>--newline</b> option has no effect on this option. Trailing white space is
removed from each line, and blank lines are ignored. An empty file contains no
removed from each line, and blank lines are ignored unless the
<b>--posix-pattern-file</b> option is also provided. An empty file contains no
patterns and therefore matches nothing. Patterns read from a file in this way
may contain binary zeros, which are treated as ordinary data characters.
may contain binary zeros, which are treated as ordinary character literals.
<br>
<br>
If this option is given more than once, all the specified files are read. A
@@ -723,9 +724,9 @@ text.
<br>
<br>
$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured substring of the given
decimal number; zero substitutes the whole match. If the number is greater than
the number of capturing substrings, or if the capture is unset, the replacement
is empty.
decimal number; $& (or the legacy $0) substitutes the whole match. If the
number is greater than the number of capturing substrings, or if the capture
is unset, the replacement is empty.
<br>
<br>
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
@@ -808,6 +809,15 @@ when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while
allowing \w to match Unicode letters and digits.
</P>
<P>
<b>--posix-pattern-file</b>
When patterns are provided with the <b>-f</b> option, do not trim trailing
spaces or ignore empty lines in a similar way than other grep tools. To keep
the behaviour consistent with older versions, if the pattern read was
terminated with CRLF (as character literals) then both characters won't be
included as part of it, so if you really need to have pattern ending in '\r',
use a escape sequence or provide it by a different method.
</P>
<P>
<b>-q</b>, <b>--quiet</b>
Work quietly, that is, display nothing except error messages. The exit
status indicates whether or not any matches were found.
@@ -993,7 +1003,7 @@ scripts or echoing specific strings during matching by making use of PCRE2's
callout facility. However, this support can be completely or partially disabled
when <b>pcre2grep</b> is built. You can find out whether your binary has support
for callouts by running it with the <b>--help</b> option. If callout support is
completely disabled, all callouts in patterns are ignored by <b>pcre2grep</b>.
completely disabled, callouts in patterns are forbidden by <b>pcre2grep</b>.
If the facility is partially disabled, calling external programs is not
supported, and callouts that request it are ignored.
</P>
@@ -1015,9 +1025,9 @@ available, provided that callouts were not completely disabled when
zero-terminated string, which means it should not contain any internal binary
zeros. It is written to the output, having first been passed through the same
escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
above). However, $0 cannot be used to insert a matched substring because the
match is still in progress. Instead, the single character '0' is inserted. Any
syntax errors in the string (for example, a dollar not followed by another
above). However, $0 or $& cannot be used to insert a matched substring because
the match is still in progress. Instead, the single character '0' is inserted.
Any syntax errors in the string (for example, a dollar not followed by another
character) causes the callout to be ignored. No terminator is added to the
output string, so if you want a newline, you must include it explicitly using
the escape $n. For example:
@@ -1047,9 +1057,9 @@ arguments:
</pre>
Any substring (including the executable name) may contain escape sequences
started by a dollar character. These are the same as for the <b>--output</b>
(<b>-O</b>) option documented above, except that $0 cannot insert the matched
string because the match is still in progress. Instead, the character '0'
is inserted. If you need a literal dollar or pipe character in any
(<b>-O</b>) option documented above, except that $0 or $& cannot insert the
matched string because the match is still in progress. Instead, the character
'0' is inserted. If you need a literal dollar or pipe character in any
substring, use $$ or $| respectively. Here is an example:
<pre>
echo -e "abcde\n12345" | pcre2grep \
@@ -1116,7 +1126,7 @@ Cambridge, England.
</P>
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
<P>
Last updated: 22 December 2023
Last updated: 04 February 2025
<br>
Copyright &copy; 1997-2023 University of Cambridge.
<br>
+13 -4
View File
@@ -64,7 +64,7 @@ platforms:
If --enable-jit is set on an unsupported platform, compilation fails.
</P>
<P>
A client program can tell if JIT support is available by calling
A client program can tell if JIT support has been compiled by calling
<b>pcre2_config()</b> with the PCRE2_CONFIG_JIT option. The result is one if
PCRE2 was built with JIT support, and zero otherwise. However, having the JIT
code available does not guarantee that it will be used for any particular
@@ -72,11 +72,19 @@ match. One reason for this is that there are a number of options and pattern
items that are
<a href="#unsupported">not supported by JIT</a>
(see below). Another reason is that in some environments JIT is unable to get
memory in which to build its compiled code. The only guarantee from
executable memory in which to build its compiled code. The only guarantee from
<b>pcre2_config()</b> is that if it returns zero, JIT will definitely <i>not</i>
be used.
</P>
<P>
As of release 10.45 there is a more informative way to test for JIT support. If
<b>pcre2_compile_jit()</b> is called with the single option PCRE2_JIT_TEST_ALLOC
it returns zero if JIT is available and has a working allocator. Otherwise it
returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate executable
memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not compiled. The
code argument is ignored, so it can be a NULL value.
</P>
<P>
A simple program does not need to check availability in order to use JIT when
possible. The API is implemented in a way that falls back to the interpretive
code if JIT is not available or cannot be used for a given match. For programs
@@ -126,7 +134,8 @@ option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and
PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore
PCRE2_JIT_COMPLETE and just compile code for partial matching. If
<b>pcre2_jit_compile()</b> is called with no option bits set, it immediately
returns zero. This is an alternative way of testing whether JIT is available.
returns zero. This is an alternative way of testing whether JIT support has
been compiled.
</P>
<P>
At present, it is not possible to free JIT compiled code except when the entire
@@ -487,7 +496,7 @@ Cambridge, England.
</P>
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
<P>
Last updated: 21 February 2024
Last updated: 22 August 2024
<br>
Copyright &copy; 1997-2024 University of Cambridge.
<br>
+1 -1
View File
@@ -96,7 +96,7 @@ Cambridge, England.
REVISION
</b><br>
<P>
Last updated: August 2023
Last updated: 16 August 2023
<br>
Copyright &copy; 1997-2023 University of Cambridge.
<br>
+34 -25
View File
@@ -27,7 +27,7 @@ please consult the man page, in case the conversion went wrong.
This document describes the two different algorithms that are available in
PCRE2 for matching a compiled regular expression against a given subject
string. The "standard" algorithm is the one provided by the <b>pcre2_match()</b>
function. This works in the same as Perl's matching function, and provide a
function. This works in the same as Perl's matching function, and provides a
Perl-compatible matching operation. The just-in-time (JIT) optimization that is
described in the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
@@ -42,7 +42,7 @@ these are described below.
<P>
When there is only one possible way in which a given subject string can match a
pattern, the two algorithms give the same answer. A difference arises, however,
when there are multiple possibilities. For example, if the pattern
when there are multiple possibilities. For example, if the anchored pattern
<pre>
^&#60;.*&#62;
</pre>
@@ -115,9 +115,9 @@ algorithm after the first match (which is necessarily the shortest) is found.
</P>
<P>
Note that the size of vector needed to contain all the results depends on the
number of simultaneous matches, not on the number of parentheses in the
pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the match
data block is therefore not advisable when doing DFA matching.
number of simultaneous matches, not on the number of capturing parentheses in
the pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the
match data block is therefore not advisable when doing DFA matching.
</P>
<P>
Note also that all the matches that are found start at the same point in the
@@ -166,37 +166,43 @@ possibilities, and PCRE2's implementation of this algorithm does not attempt to
do this. This means that no captured substrings are available.
</P>
<P>
3. Because no substrings are captured, backreferences within the pattern are
not supported.
3. Because no substrings are captured, a number of related features are not
available:
<br>
<br>
(a) Backreferences;
<br>
<br>
(b) Conditional expressions that use a backreference as the condition or test
for a specific group recursion;
<br>
<br>
(c) Script runs;
<br>
<br>
(d) Scan substring assertions.
</P>
<P>
4. For the same reason, conditional expressions that use a backreference as the
condition or test for a specific group recursion are not supported.
</P>
<P>
5. Again for the same reason, script runs are not supported.
</P>
<P>
6. Because many paths through the tree may be active, the \K escape sequence,
4. Because many paths through the tree may be active, the \K escape sequence,
which resets the start of the match when encountered (but may be on some paths
and not on others), is not supported.
</P>
<P>
7. Callouts are supported, but the value of the <i>capture_top</i> field is
5. Callouts are supported, but the value of the <i>capture_top</i> field is
always 1, and the value of the <i>capture_last</i> field is always 0.
</P>
<P>
8. The \C escape sequence, which (in the standard algorithm) always matches a
single code unit, even in a UTF mode, is not supported in these modes, because
6. The \C escape sequence, which (in the standard algorithm) always matches a
single code unit, even in a UTF mode, is not supported in UTF modes because
the alternative algorithm moves through the subject string one character (not
code unit) at a time, for all active paths through the tree.
</P>
<P>
9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
</P>
<P>
10. The PCRE2_MATCH_INVALID_UTF option for <b>pcre2_compile()</b> is not
8. The PCRE2_MATCH_INVALID_UTF option for <b>pcre2_compile()</b> is not
supported by <b>pcre2_dfa_match()</b>.
</P>
<br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
@@ -223,15 +229,18 @@ because it has to search for all possible matches, but is also because it is
less susceptible to optimization.
</P>
<P>
2. Capturing parentheses, backreferences, script runs, and matching within
invalid UTF string are not supported.
2. Capturing parentheses and other features such as backreferences that rely on
them are not supported.
</P>
<P>
3. Although atomic groups are supported, their use does not provide the
3. Matching within invalid UTF strings is not supported.
</P>
<P>
4. Although atomic groups are supported, their use does not provide the
performance advantage that it does for the standard algorithm.
</P>
<P>
4. JIT optimization is not supported.
5. JIT optimization is not supported.
</P>
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
<P>
@@ -244,7 +253,7 @@ Cambridge, England.
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
Last updated: 19 January 2024
Last updated: 30 August 2024
<br>
Copyright &copy; 1997-2024 University of Cambridge.
<br>
+1 -1
View File
@@ -399,7 +399,7 @@ Cambridge, England.
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
Last updated: 04 September 2019
Last updated: 27 November 2024
<br>
Copyright &copy; 1997-2019 University of Cambridge.
<br>
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -271,7 +271,7 @@ Cambridge, England.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 27 July 2022
Last updated: 06 December 2022
<br>
Copyright &copy; 1997-2022 University of Cambridge.
<br>
+2 -2
View File
@@ -171,7 +171,7 @@ REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard.
</pre>
When a pattern that is compiled with this flag is passed to
<b>pcre2_regexec()</b> for matching, the <i>nmatch</i> and <i>pmatch</i> arguments
are ignored, and no captured strings are returned. Versions of the PCRE library
are ignored, and no captured strings are returned. Versions of the PCRE2 library
prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this
no longer happens because it disables the use of backreferences.
<pre>
@@ -370,7 +370,7 @@ Cambridge, England.
</P>
<br><a name="SEC10" href="#TOC1">REVISION</a><br>
<P>
Last updated: 19 January 2024
Last updated: 27 November 2024
<br>
Copyright &copy; 1997-2024 University of Cambridge.
<br>
+1 -1
View File
@@ -101,7 +101,7 @@ Cambridge, England.
REVISION
</b><br>
<P>
Last updated: 02 February 2016
Last updated: 14 November 2023
<br>
Copyright &copy; 1997-2016 University of Cambridge.
<br>
+1 -1
View File
@@ -203,7 +203,7 @@ Cambridge, England.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 27 June 2018
Last updated: 19 January 2024
<br>
Copyright &copy; 1997-2018 University of Cambridge.
<br>
+190 -71
View File
@@ -24,34 +24,41 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC9" href="#SEC9">SCRIPT MATCHING WITH \p AND \P</a>
<li><a name="TOC10" href="#SEC10">THE BIDI_CLASS PROPERTY FOR \p AND \P</a>
<li><a name="TOC11" href="#SEC11">CHARACTER CLASSES</a>
<li><a name="TOC12" href="#SEC12">QUANTIFIERS</a>
<li><a name="TOC13" href="#SEC13">ANCHORS AND SIMPLE ASSERTIONS</a>
<li><a name="TOC14" href="#SEC14">REPORTED MATCH POINT SETTING</a>
<li><a name="TOC15" href="#SEC15">ALTERNATION</a>
<li><a name="TOC16" href="#SEC16">CAPTURING</a>
<li><a name="TOC17" href="#SEC17">ATOMIC GROUPS</a>
<li><a name="TOC18" href="#SEC18">COMMENT</a>
<li><a name="TOC19" href="#SEC19">OPTION SETTING</a>
<li><a name="TOC20" href="#SEC20">NEWLINE CONVENTION</a>
<li><a name="TOC21" href="#SEC21">WHAT \R MATCHES</a>
<li><a name="TOC22" href="#SEC22">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
<li><a name="TOC23" href="#SEC23">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
<li><a name="TOC24" href="#SEC24">SCRIPT RUNS</a>
<li><a name="TOC25" href="#SEC25">BACKREFERENCES</a>
<li><a name="TOC26" href="#SEC26">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
<li><a name="TOC27" href="#SEC27">CONDITIONAL PATTERNS</a>
<li><a name="TOC28" href="#SEC28">BACKTRACKING CONTROL</a>
<li><a name="TOC29" href="#SEC29">CALLOUTS</a>
<li><a name="TOC30" href="#SEC30">SEE ALSO</a>
<li><a name="TOC31" href="#SEC31">AUTHOR</a>
<li><a name="TOC32" href="#SEC32">REVISION</a>
<li><a name="TOC12" href="#SEC12">PERL EXTENDED CHARACTER CLASSES</a>
<li><a name="TOC13" href="#SEC13">QUANTIFIERS</a>
<li><a name="TOC14" href="#SEC14">ANCHORS AND SIMPLE ASSERTIONS</a>
<li><a name="TOC15" href="#SEC15">REPORTED MATCH POINT SETTING</a>
<li><a name="TOC16" href="#SEC16">ALTERNATION</a>
<li><a name="TOC17" href="#SEC17">CAPTURING</a>
<li><a name="TOC18" href="#SEC18">ATOMIC GROUPS</a>
<li><a name="TOC19" href="#SEC19">COMMENT</a>
<li><a name="TOC20" href="#SEC20">OPTION SETTING</a>
<li><a name="TOC21" href="#SEC21">NEWLINE CONVENTION</a>
<li><a name="TOC22" href="#SEC22">WHAT \R MATCHES</a>
<li><a name="TOC23" href="#SEC23">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
<li><a name="TOC24" href="#SEC24">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
<li><a name="TOC25" href="#SEC25">SUBSTRING SCAN ASSERTION</a>
<li><a name="TOC26" href="#SEC26">SCRIPT RUNS</a>
<li><a name="TOC27" href="#SEC27">BACKREFERENCES</a>
<li><a name="TOC28" href="#SEC28">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
<li><a name="TOC29" href="#SEC29">CONDITIONAL PATTERNS</a>
<li><a name="TOC30" href="#SEC30">BACKTRACKING CONTROL</a>
<li><a name="TOC31" href="#SEC31">CALLOUTS</a>
<li><a name="TOC32" href="#SEC32">REPLACEMENT STRINGS</a>
<li><a name="TOC33" href="#SEC33">SEE ALSO</a>
<li><a name="TOC34" href="#SEC34">AUTHOR</a>
<li><a name="TOC35" href="#SEC35">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
<P>
The full syntax and semantics of the regular expressions that are supported by
PCRE2 are described in the
The full syntax and semantics of the regular expression patterns that are
supported by PCRE2 are described in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation. This document contains a quick-reference summary of the syntax.
documentation. This document contains a quick-reference summary of the pattern
syntax followed by the syntax of replacement strings in substitution function.
The full description of the latter is in the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation.
</P>
<br><a name="SEC2" href="#TOC1">QUOTING</a><br>
<P>
@@ -60,7 +67,10 @@ documentation. This document contains a quick-reference summary of the syntax.
\Q...\E treat enclosed characters as literal
</pre>
Note that white space inside \Q...\E is always treated as literal, even if
PCRE2_EXTENDED is set, causing most other white space to be ignored.
PCRE2_EXTENDED is set, causing most other white space to be ignored. Note also
that PCRE2's handling of \Q...\E has some differences from Perl's. See the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation for details.
</P>
<br><a name="SEC3" href="#TOC1">BRACED ITEMS</a><br>
<P>
@@ -91,6 +101,11 @@ sequence causes an error.
\xhh character with hex code hh
\x{hh..} character with hex code hh..
</pre>
\N{U+hh..} is synonymous with \x{hh..} but is not supported in environments
that use EBCDIC code (mainly IBM mainframes). Note that \N not followed by an
opening curly bracket has a different meaning (see below).
</P>
<P>
If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the
following are also recognized:
<pre>
@@ -98,7 +113,7 @@ following are also recognized:
\uhhhh character with hex code hhhh
\u{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX
</pre>
When \x is not followed by {, from zero to two hexadecimal digits are read,
When \x is not followed by {, one or two hexadecimal digits are read,
but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be
recognized as a hexadecimal escape; otherwise it matches a literal "x".
Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
@@ -112,9 +127,7 @@ a non-zero digit is complicated; for details see the section
in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation, where details of escape processing in EBCDIC environments are
also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not
supported in EBCDIC environments. Note that \N not followed by an opening
curly bracket has a different meaning (see below).
also given.
</P>
<br><a name="SEC5" href="#TOC1">CHARACTER TYPES</a><br>
<P>
@@ -154,8 +167,9 @@ sequences to matching only ASCII characters.
</P>
<P>
Property descriptions in \p and \P are matched caselessly; hyphens,
underscores, and white space are ignored, in accordance with Unicode's "loose
matching" rules.
underscores, and ASCII white space characters are ignored, in accordance with
Unicode's "loose matching" rules. For example, \p{Bidi_Class=al} is the same
as \p{ bidi class = AL }.
</P>
<br><a name="SEC6" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
<P>
@@ -168,13 +182,13 @@ matching" rules.
Cs Surrogate
L Letter
Lc Cased letter, the union of Ll, Lu, and Lt
L& Synonym of Lc
Ll Lower case letter
Lm Modifier letter
Lo Other letter
Lt Title case letter
Lu Upper case letter
Lc Ll, Lu, or Lt
L& Ll, Lu, or Lt
M Mark
Mc Spacing mark
@@ -205,7 +219,9 @@ matching" rules.
Zl Line separator
Zp Paragraph separator
Zs Space separator
</PRE>
</pre>
From release 10.45, when caseless matching is set, Ll, Lu, and Lt are all
equivalent to Lc.
</P>
<br><a name="SEC7" href="#TOC1">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a><br>
<P>
@@ -268,7 +284,7 @@ The recognized classes are:
RLI right-to-left isolate
RLO right-to-left override
S segment separator
WS which space
WS white space
</PRE>
</P>
<br><a name="SEC11" href="#TOC1">CHARACTER CLASSES</a><br>
@@ -299,7 +315,45 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default,
but some of them use Unicode properties if PCRE2_UCP is set. You can use
\Q...\E inside a character class.
</P>
<br><a name="SEC12" href="#TOC1">QUANTIFIERS</a><br>
<P>
When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes may be
used, allowing nested character classes, combined using set operators.
<pre>
[x&&[^y]] UTS#18 extended character class
x||y set union (OR)
x&&y set intersection (AND)
x--y set difference (AND NOT)
x~~y set symmetric difference (XOR)
</PRE>
</P>
<br><a name="SEC12" href="#TOC1">PERL EXTENDED CHARACTER CLASSES</a><br>
<P>
<pre>
(?[...]) Perl extended character class
(?[\p{Thai} & \p{Nd}]) operators; whitespace ignored
(?[(x - y) & z]) parentheses for grouping
(?[ [^3] & \p{Nd} ]) [...] is a nested ordinary class
(?[ [:alpha:] - [z] ]) POSIX set is allowed outside [...]
(?[ \d - [3] ]) backslash-escaped set is allowed outside [...]
(?[ !\n & [:ascii:] ]) backslash-escaped character is allowed outside [...]
all other characters or ranges must be enclosed in [...]
x|y, x+y set union (OR)
x&y set intersection (AND)
x-y set difference (AND NOT)
x^y set symmetric difference (XOR)
!x set complement (NOT)
</pre>
Inside a Perl extended character class, [...] switches mode to be interpreted
as an ordinary character class. Outside of a nested [...], the only items
permitted are backslash-escapes, POSIX sets, operators, and parentheses. Inside
a nested ordinary class, ^ has its usual meaning (inverts the class when used
as the first character); outside of a nested class, ^ is the XOR operator.
</P>
<br><a name="SEC13" href="#TOC1">QUANTIFIERS</a><br>
<P>
<pre>
? 0 or 1, greedy
@@ -323,7 +377,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
{,m}? zero up to m, lazy
</PRE>
</P>
<br><a name="SEC13" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
<br><a name="SEC14" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
<P>
<pre>
\b word boundary
@@ -341,7 +395,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
\G first matching position in subject
</PRE>
</P>
<br><a name="SEC14" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
<br><a name="SEC15" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
<P>
<pre>
\K set reported start of match
@@ -351,13 +405,13 @@ for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
option is set, the previous behaviour is re-enabled. When this option is set,
\K is honoured in positive assertions, but ignored in negative ones.
</P>
<br><a name="SEC15" href="#TOC1">ALTERNATION</a><br>
<br><a name="SEC16" href="#TOC1">ALTERNATION</a><br>
<P>
<pre>
expr|expr|expr...
</PRE>
</P>
<br><a name="SEC16" href="#TOC1">CAPTURING</a><br>
<br><a name="SEC17" href="#TOC1">CAPTURING</a><br>
<P>
<pre>
(...) capture group
@@ -372,20 +426,20 @@ In non-UTF modes, names may contain underscores and ASCII letters and digits;
in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
both cases, a name must not start with a digit.
</P>
<br><a name="SEC17" href="#TOC1">ATOMIC GROUPS</a><br>
<br><a name="SEC18" href="#TOC1">ATOMIC GROUPS</a><br>
<P>
<pre>
(?&#62;...) atomic non-capture group
(*atomic:...) atomic non-capture group
</PRE>
</P>
<br><a name="SEC18" href="#TOC1">COMMENT</a><br>
<br><a name="SEC19" href="#TOC1">COMMENT</a><br>
<P>
<pre>
(?#....) comment (not nestable)
</PRE>
</P>
<br><a name="SEC19" href="#TOC1">OPTION SETTING</a><br>
<br><a name="SEC20" href="#TOC1">OPTION SETTING</a><br>
<P>
Changes of these options within a group are automatically cancelled at the end
of the group.
@@ -409,7 +463,7 @@ of the group.
(?^) unset imnrsx options
</pre>
(?aP) implies (?aT) as well, though this has no additional effect. However, it
means that (?-aP) is really (?-PT) which disables all ASCII restrictions for
means that (?-aP) also implies (?-aT) and disables all ASCII restrictions for
POSIX classes.
</P>
<P>
@@ -421,20 +475,22 @@ example (?i:...).
</P>
<P>
The following are recognized only at the very start of a pattern or after one
of the newline or \R options with similar syntax. More than one of them may
appear. For the first three, d is a decimal number.
of the newline or \R sequences or options with similar syntax. More than one
of them may appear. For the first three, d is a decimal number.
<pre>
(*LIMIT_DEPTH=d) set the backtracking limit to d
(*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
(*LIMIT_MATCH=d) set the match limit to d
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
(*LIMIT_DEPTH=d) set the backtracking limit to d
(*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
(*LIMIT_MATCH=d) set the match limit to d
(*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
(*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR)
(*NO_JIT) disable JIT optimization
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
(*UTF) set appropriate UTF mode for the library in use
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
(*NO_JIT) disable JIT optimization
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
(*TURKISH_CASING) set PCRE2_EXTRA_TURKISH_CASING when matching
(*UTF) set appropriate UTF mode for the library in use
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
</pre>
Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of
the limits set by the caller of <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>,
@@ -442,7 +498,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The
application can lock out the use of (*UTF) and (*UCP) by setting the
PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.
</P>
<br><a name="SEC20" href="#TOC1">NEWLINE CONVENTION</a><br>
<br><a name="SEC21" href="#TOC1">NEWLINE CONVENTION</a><br>
<P>
These are recognized only at the very start of the pattern or after option
settings with a similar syntax.
@@ -455,7 +511,7 @@ settings with a similar syntax.
(*NUL) the NUL character (binary zero)
</PRE>
</P>
<br><a name="SEC21" href="#TOC1">WHAT \R MATCHES</a><br>
<br><a name="SEC22" href="#TOC1">WHAT \R MATCHES</a><br>
<P>
These are recognized only at the very start of the pattern or after option
setting with a similar syntax.
@@ -464,7 +520,7 @@ setting with a similar syntax.
(*BSR_UNICODE) any Unicode newline sequence
</PRE>
</P>
<br><a name="SEC22" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
<br><a name="SEC23" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
<P>
<pre>
(?=...) )
@@ -490,7 +546,7 @@ the maximum for each branch is limited to a value set by the caller of
(ultimate default 255). If every branch matches a fixed number of characters,
the limit for each branch is 65535 characters.
</P>
<br><a name="SEC23" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
<br><a name="SEC24" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
<P>
These assertions are specific to PCRE2 and are not Perl-compatible.
<pre>
@@ -503,7 +559,24 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
(*non_atomic_positive_lookbehind:...) )
</PRE>
</P>
<br><a name="SEC24" href="#TOC1">SCRIPT RUNS</a><br>
<br><a name="SEC25" href="#TOC1">SUBSTRING SCAN ASSERTION</a><br>
<P>
This feature is not Perl-compatible.
<pre>
(*scan_substring:(grouplist)...) scan captured substring
(*scs:(grouplist)...) scan captured substring
</pre>
The comma-separated list may identify groups in any of the following ways:
<pre>
n absolute reference
+n relative reference
-n relative reference
&#60;name&#62; name
'name' name
</PRE>
</P>
<br><a name="SEC26" href="#TOC1">SCRIPT RUNS</a><br>
<P>
<pre>
(*script_run:...) ) script run, can be backtracked into
@@ -513,7 +586,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
(*asr:...) )
</PRE>
</P>
<br><a name="SEC25" href="#TOC1">BACKREFERENCES</a><br>
<br><a name="SEC27" href="#TOC1">BACKREFERENCES</a><br>
<P>
<pre>
\n reference by number (can be ambiguous)
@@ -530,7 +603,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
(?P=name) reference by name (Python)
</PRE>
</P>
<br><a name="SEC26" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
<br><a name="SEC28" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
<P>
<pre>
(?R) recurse whole pattern
@@ -549,7 +622,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
\g'-n' call subroutine by relative number (PCRE2 extension)
</PRE>
</P>
<br><a name="SEC27" href="#TOC1">CONDITIONAL PATTERNS</a><br>
<br><a name="SEC29" href="#TOC1">CONDITIONAL PATTERNS</a><br>
<P>
<pre>
(?(condition)yes-pattern)
@@ -572,7 +645,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
conditions or recursion tests. Such a condition is interpreted as a reference
condition if the relevant named group exists.
</P>
<br><a name="SEC28" href="#TOC1">BACKTRACKING CONTROL</a><br>
<br><a name="SEC30" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P>
All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
@@ -599,7 +672,7 @@ pattern is not anchored.
The effect of one of these verbs in a group called as a subroutine is confined
to the subroutine call.
</P>
<br><a name="SEC29" href="#TOC1">CALLOUTS</a><br>
<br><a name="SEC31" href="#TOC1">CALLOUTS</a><br>
<P>
<pre>
(?C) callout (assumed number 0)
@@ -610,12 +683,58 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
start and the end), and the starting delimiter { matched with the ending
delimiter }. To encode the ending delimiter within the string, double it.
</P>
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
<br><a name="SEC32" href="#TOC1">REPLACEMENT STRINGS</a><br>
<P>
If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for
<b>pcre2_substitute()</b> is not interpreted. Otherwise, by default, the only
special character is the dollar character in one of the following forms:
<pre>
$$ insert a dollar character
$n or ${n} insert the contents of group <i>n</i>
$&#60;name&#62; insert the contents of named group
$0 or $& insert the entire matched substring
$` insert the substring that precedes the match
$' insert the substring that follows the match
$_ insert the entire input string
$*MARK or ${*MARK} insert a control verb name
</pre>
For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is set,
there is additional interpretation:
</P>
<P>
1. Backslash is an escape character, and the forms described in "ESCAPED
CHARACTERS" above are recognized. Also:
<pre>
\Q...\E can be used to suppress interpretation
\l force the next character to lower case
\u force the next character to upper case
\L force subsequent characters to lower case
\U force subsequent characters to upper case
\u\L force next character to upper case, then all lower
\l\U force next character to lower case, then all upper
\E end \L or \U case forcing
\b backspace character (note: as in character class in pattern)
\v vertical tab character (note: not the same as in a pattern)
</pre>
2. The Python form \g&#60;n&#62;, where the angle brackets are part of the syntax and
<i>n</i> is either a group name or a number, is recognized as an alternative way
of inserting the contents of a group, for example \g&#60;3&#62;.
</P>
<P>
3. Capture substitution supports the following additional forms:
<pre>
${n:-string} default for unset group
${n:+string1:string2} values for set/unset group
</pre>
The substitution strings themselves are expanded. Backslash can be used to
escape colons and closing curly brackets.
</P>
<br><a name="SEC33" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2pattern</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
<b>pcre2matching</b>(3), <b>pcre2</b>(3).
</P>
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
<br><a name="SEC34" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@@ -624,11 +743,11 @@ Retired from University Computing Service
Cambridge, England.
<br>
</P>
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
<br><a name="SEC35" href="#TOC1">REVISION</a><br>
<P>
Last updated: 12 October 2023
Last updated: 27 November 2024
<br>
Copyright &copy; 1997-2023 University of Cambridge.
Copyright &copy; 1997-2024 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
+98 -38
View File
@@ -105,8 +105,8 @@ Input for the 16-bit and 32-bit libraries
<P>
When testing the 16-bit or 32-bit libraries, there is a need to be able to
generate character code points greater than 255 in the strings that are passed
to the library. For subject lines, backslash escapes can be used. In addition,
when the <b>utf</b> modifier (see
to the library. For subject lines and some patterns, backslash escapes can be
used. In addition, when the <b>utf</b> modifier (see
<a href="#optionmodifiers">"Setting compilation options"</a>
below) is set, the pattern and any following subject lines are interpreted as
UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate.
@@ -125,9 +125,8 @@ UTF-8 (in its original definition) is not capable of encoding values greater
than 0x7fffffff, but such values can be handled by the 32-bit library. When
testing this library in non-UTF mode with <b>utf8_input</b> set, if any
character is preceded by the byte 0xff (which is an invalid byte in UTF-8)
0x80000000 is added to the character's value. This is the only way of passing
such code points in a pattern string. For subject strings, using an escape
sequence is preferable.
0x80000000 is added to the character's value. For subject strings, using an
escape sequence is preferable.
</P>
<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
<P>
@@ -178,8 +177,8 @@ functionality is intended for use in scripts such as <b>RunTest</b>. The
following options output the value and set the exit code as indicated:
<pre>
ebcdic-nl the code for LF (= NL) in an EBCDIC environment:
0x15 or 0x25
0 if used in an ASCII environment
either 0x15 or 0x25
0 if used in an ASCII/Unicode environment
exit code is always 0
linksize the configured internal link size (2, 3, or 4)
exit code is set to the link size
@@ -201,6 +200,16 @@ to the same value:
pcre2-8 the 8-bit library was built
unicode Unicode support is available
</pre>
Note that the availability of JIT support in the library does not guarantee
that it can actually be used because in some environments it is unable to
allocate executable memory. The option "jitusable" gives more detailed
information. It returns one of the following values:
<pre>
0 JIT is available and usable
1 JIT is available but cannot allocate executable memory
2 JIT is not available
3 Unexpected return from test call to <b>pcre2_jit_compile()</b>
</pre>
If an unknown option is given, an error message is output; the exit code is 0.
</P>
<P>
@@ -527,39 +536,48 @@ space is removed, and the line is scanned for backslash escapes, unless the
<b>subject_literal</b> modifier was set for the pattern. The following provide a
means of encoding non-printing characters in a visible way:
<pre>
\a alarm (BEL, \x07)
\b backspace (\x08)
\e escape (\x27)
\f form feed (\x0c)
\n newline (\x0a)
\r carriage return (\x0d)
\t tab (\x09)
\v vertical tab (\x0b)
\nnn octal character (up to 3 octal digits); always
a byte unless &#62; 255 in UTF-8 or 16-bit or 32-bit mode
\o{dd...} octal character (any number of octal digits}
\xhh hexadecimal byte (up to 2 hex digits)
\x{hh...} hexadecimal character (any number of hex digits)
\a alarm (BEL, \x07)
\b backspace (\x08)
\e escape (\x27)
\f form feed (\x0c)
\n newline (\x0a)
\N{U+hh...} unicode character (any number of hex digits)
\r carriage return (\x0d)
\t tab (\x09)
\v vertical tab (\x0b)
\ddd octal number (up to 3 octal digits); represent a single
code point unless larger than 255 with the 8-bit library
\o{dd...} octal number (any number of octal digits} representing a
character in UTF mode or a code point
\xhh hexadecimal byte (up to 2 hex digits)
\x{hh...} hexadecimal number (up to 8 hex digits) representing a
character in UTF mode or a code point
</pre>
The use of \x{hh...} is not dependent on the use of the <b>utf</b> modifier on
the pattern. It is recognized always. There may be any number of hexadecimal
digits inside the braces; invalid values provoke error messages.
Invoking \N{U+hh...} or \x{hh...} doesn't require the use of the <b>utf</b>
modifier on the pattern. It is always recognized. There may be any number of
hexadecimal digits inside the braces; invalid values provoke error messages
but when using \N{U+hh...} with some invalid unicode characters they will
be accepted with a warning instead.
</P>
<P>
Note that \xhh specifies one byte rather than one character in UTF-8 mode;
this makes it possible to construct invalid UTF-8 sequences for testing
purposes. On the other hand, \x{hh} is interpreted as a UTF-8 character in
UTF-8 mode, generating more than one byte if the value is greater than 127.
When testing the 8-bit library not in UTF-8 mode, \x{hh} generates one byte
for values less than 256, and causes an error for greater values.
Note that even in UTF-8 mode, \xhh (and depending of how large, \ddd)
describe one byte rather than one character; this makes it possible to
construct invalid UTF-8 sequences for testing purposes. On the other hand,
\x{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only generating
more than one byte if the value is greater than 127. To avoid the ambiguity
it is preferred to use \N{U+hh...} when describing characters. When testing
the 8-bit library not in UTF-8 mode, \x{hh} generates one byte for values
that could fit on it, and causes an error for greater values.
</P>
<P>
In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
possible to construct invalid UTF-16 sequences for testing purposes.
When testing the 16-bit library, not in UTF-16 mode, all 4-digit \x{hhhh}
values are accepted. This makes it possible to construct invalid UTF-16
sequences for testing purposes.
</P>
<P>
In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This makes it
possible to construct invalid UTF-32 sequences for testing purposes.
When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit \x{...}
values are accepted. This makes it possible to construct invalid UTF-32
sequences for testing purposes.
</P>
<P>
There is a special backslash sequence that specifies replication of one or more
@@ -625,6 +643,7 @@ for a description of the effects of these options.
allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
alt_bsux set PCRE2_ALT_BSUX
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
alt_extended_class set PCRE2_ALT_EXTENDED_CLASS
alt_verbnames set PCRE2_ALT_VERBNAMES
anchored set PCRE2_ANCHORED
/a ascii_all set all ASCII options
@@ -653,13 +672,17 @@ for a description of the effects of these options.
match_word set PCRE2_EXTRA_MATCH_WORD
/m multiline set PCRE2_MULTILINE
never_backslash_c set PCRE2_NEVER_BACKSLASH_C
never_callout set PCRE2_EXTRA_NEVER_CALLOUT
never_ucp set PCRE2_NEVER_UCP
never_utf set PCRE2_NEVER_UTF
/n no_auto_capture set PCRE2_NO_AUTO_CAPTURE
no_auto_possess set PCRE2_NO_AUTO_POSSESS
no_bs0 set PCRE2_EXTRA_NO_BS0
no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR
no_start_optimize set PCRE2_NO_START_OPTIMIZE
no_utf_check set PCRE2_NO_UTF_CHECK
python_octal set PCRE2_EXTRA_PYTHON_OCTAL
turkish_casing set PCRE2_EXTRA_TURKISH_CASING
ucp set PCRE2_UCP
ungreedy set PCRE2_UNGREEDY
use_offset_limit set PCRE2_USE_OFFSET_LIMIT
@@ -671,6 +694,23 @@ notation. Otherwise, those less than 0x100 are output in hex without the curly
brackets. Setting <b>utf</b> in 16-bit or 32-bit mode also causes pattern and
subject strings to be translated to UTF-16 or UTF-32, respectively, before
being passed to library functions.
<br>
<br>
The following modifiers enable or disable performance optimizations by
calling <b>pcre2_set_optimize()</b> before invoking the regex compiler.
<pre>
optimization_full enable all optional optimizations
optimization_none disable all optional optimizations
auto_possess auto-possessify variable quantifiers
auto_possess_off don't auto-possessify variable quantifiers
dotstar_anchor anchor patterns starting with .*
dotstar_anchor_off don't anchor patterns starting with .*
start_optimize enable pre-scan of subject string
start_optimize_off disable pre-scan of subject string
</pre>
See the
<a href="pcre2_set_optimize.html"><b>pcre2_set_optimize</b></a>
documentation for details on these optimizations.
<a name="controlmodifiers"></a></P>
<br><b>
Setting compilation controls
@@ -680,14 +720,15 @@ The following modifiers affect the compilation process or request information
about the pattern. There are single-letter abbreviations for some that are
heavily used in the test files.
<pre>
bsr=[anycrlf|unicode] specify \R handling
/B bincode show binary code without lengths
bsr=[anycrlf|unicode] specify \R handling
callout_info show callout information
convert=&#60;options&#62; request foreign pattern conversion
convert_glob_escape=c set glob escape character
convert_glob_separator=c set glob separator character
convert_length set convert buffer length
debug same as info,fullbincode
expand expand repetition syntax in pattern
framesize show matching frame size
fullbincode show binary code with lengths
/I info show info about compiled pattern
@@ -709,6 +750,7 @@ heavily used in the test files.
posix_nosub use the POSIX API with REG_NOSUB
push push compiled pattern onto the stack
pushcopy push a copy onto the stack
pushtablescopy push a copy with tables onto the stack
stackguard=&#60;number&#62; test the stackguard feature
subject_literal treat all subject lines as literal
tables=[0|1|2|3] select internal tables
@@ -1128,6 +1170,7 @@ process.
replace=&#60;string&#62; specify a replacement string
startchar show starting character when relevant
substitute_callout use substitution callouts
substitute_case_callout use substitution case callouts
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
@@ -1217,10 +1260,11 @@ Setting match options
<P>
The following modifiers set options for <b>pcre2_match()</b> or
<b>pcre2_dfa_match()</b>. See
<a href="pcreapi.html"><b>pcreapi</b></a>
<a href="pcre2api.html"><b>pcre2api</b></a>
for a description of their effects.
<pre>
anchored set PCRE2_ANCHORED
copy_matched_subject set PCRE2_COPY_MATCHED_SUBJECT
endanchored set PCRE2_ENDANCHORED
dfa_restart set PCRE2_DFA_RESTART
dfa_shortest set PCRE2_DFA_SHORTEST
@@ -1271,8 +1315,8 @@ pattern, but can be overridden by modifiers on the subject.
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
allvector show the entire ovector
allusedtext show all consulted text (non-JIT only)
allvector show the entire ovector
altglobal alternative global matching
callout_capture show captures at callout time
callout_data=&#60;n&#62; set a value to pass via callouts
@@ -1306,7 +1350,8 @@ pattern, but can be overridden by modifiers on the subject.
startchar show startchar when relevant
startoffset=&#60;n&#62; same as offset=&#60;n&#62;
substitute_callout use substitution callouts
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
substitute_case_callout use substitution case callouts
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
@@ -1592,6 +1637,21 @@ If both are set for the same number, stop takes precedence. Only a single skip
or stop is supported, which is sufficient for testing that the feature works.
</P>
<br><b>
Testing substitute case callouts
</b><br>
<P>
If the <b>substitute_case_callout</b> modifier is set, a substitution
case callout function is set up. The callout function is called for each
substituted chunk which is to be case-transformed.
</P>
<P>
The callout function passed is a fixed function with implementation for certain
behaviours: inputs which shrink when case-transformed; inputs which grow; inputs
with distinct upper/lower/titlecase forms. The characters which are not
special-cased for testing purposes are left unmodified, as if they are caseless
characters.
</P>
<br><b>
Setting the JIT stack size
</b><br>
<P>
@@ -2204,7 +2264,7 @@ Cambridge, England.
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
Last updated: 24 April 2024
Last updated: 26 December 2024
<br>
Copyright &copy; 1997-2024 University of Cambridge.
<br>
+37 -3
View File
@@ -53,7 +53,7 @@ When PCRE2 is built with Unicode support, the escape sequences \p{..},
The Unicode properties that can be tested are a subset of those that Perl
supports. Currently they are limited to the general category properties such as
Lu for an upper case letter or Nd for a decimal number, the derived properties
Any and LC (synonym L&), the Unicode script names such as Arabic or Han,
Any and Lc (synonym L&), the Unicode script names such as Arabic or Han,
Bidi_Class, Bidi_Control, and a few binary properties.
</P>
<P>
@@ -157,6 +157,40 @@ Recognition of these non-ASCII characters as case-equivalent to their ASCII
counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT
option. When this is set, all characters in a case equivalence must either be
ASCII or non-ASCII; there can be no mixing.
<pre>
Without PCRE2_EXTRA_CASELESS_RESTRICT:
'k' = 'K' = U+212A (Kelvin sign)
's' = 'S' = U+017F (long S)
With PCRE2_EXTRA_CASELESS_RESTRICT:
'k' = 'K'
U+212A (Kelvin sign) only case-equivalent to itself
's' = 'S'
U+017F (long S) only case-equivalent to itself
</PRE>
</P>
<P>
One language family, Turkish and Azeri, has its own case-insensitivity rules,
which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the
behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131
(small dotless i) characters.
<pre>
Without PCRE2_EXTRA_TURKISH_CASING:
'i' = 'I'
U+0130 (capital I with dot above) only case-equivalent to itself
U+0131 (small dotless i) only case-equivalent to itself
With PCRE2_EXTRA_TURKISH_CASING:
'i' = U+0130 (capital I with dot above)
U+0131 (small dotless i) = 'I'
</PRE>
</P>
<P>
It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and
PCRE2_EXTRA_TURKISH_CASING together.
</P>
<P>
From release 10.45 the Unicode letter properties Lu (upper case), Ll (lower
case), and Lt (title case) are all treated as Lc (cased letter) when caseless
matching is set by the PCRE2_CASELESS option or (?i) within the pattern.
<a name="scriptruns"></a></P>
<br><b>
SCRIPT RUNS
@@ -513,9 +547,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
Last updated: 12 October 2023
Last updated: 27 November 2024
<br>
Copyright &copy; 1997-2023 University of Cambridge.
Copyright &copy; 1997-2024 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
+9
View File
@@ -267,6 +267,9 @@ in the library.
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
<td>&nbsp;&nbsp;Set the offset limit</td></tr>
<tr><td><a href="pcre2_set_optimize.html">pcre2_set_optimize</a></td>
<td>&nbsp;&nbsp;Set an optimization directive</td></tr>
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
<td>&nbsp;&nbsp;Set the parentheses nesting limit</td></tr>
@@ -276,6 +279,12 @@ in the library.
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
<td>&nbsp;&nbsp;Obsolete function that (from 10.30 onwards) does nothing</td></tr>
<tr><td><a href="pcre2_set_substitute_callout.html">pcre2_set_substitute_callout</a></td>
<td>&nbsp;&nbsp;Set a substitution callout function</td></tr>
<tr><td><a href="pcre2_set_substitute_case_callout.html">pcre2_set_substitute_case_callout</a></td>
<td>&nbsp;&nbsp;Set a substitution case callout function</td></tr>
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string and do
substitutions</td></tr>
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.00"
.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.46"
.SH NAME
pcre2-config - program to return PCRE2 configuration
.SH SYNOPSIS
+1 -2
View File
@@ -1,4 +1,3 @@
PCRE2-CONFIG(1) General Commands Manual PCRE2-CONFIG(1)
@@ -82,4 +81,4 @@ REVISION
Last updated: 28 September 2014
PCRE2 10.00 28 September 2014 PCRE2-CONFIG(1)
PCRE2 10.46 28 September 2014 PCRE2-CONFIG(1)
+13 -10
View File
@@ -1,4 +1,4 @@
.TH PCRE2 3 "27 August 2021" "PCRE2 10.38"
.TH PCRE2 3 "18 December 2024" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH INTRODUCTION
@@ -186,23 +186,26 @@ In the "man" and HTML formats, there is also a short page for each C library
function, listing its arguments and results.
.
.
.SH AUTHOR
.SH AUTHORS
.rs
.sp
.nf
Philip Hazel
Retired from University Computing Service
Cambridge, England.
.fi
The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg.
.P
Putting an actual email address here is a spam magnet. If you want to email me,
use my two names separated by a dot at gmail.com.
PCRE2 was written by Philip Hazel, of the University Computing Service,
Cambridge, England. Many others have also contributed.
.P
To contact the maintainers, please use the GitHub issues tracker or PCRE2
mailing list, as described at the project page:
.\" HTML <a href="https://github.com/PCRE2Project/pcre2">
.\" </a>
https://github.com/PCRE2Project/pcre2
.\"
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 27 August 2021
Last updated: 18 December 2024
Copyright (c) 1997-2021 University of Cambridge.
.fi
+2168 -1452
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.30"
.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
.TH PCRE2_CODE_COPY 3 "16 January 2017" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.32"
.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+2 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_COMPILE 3 "19 January 2024" "PCRE2 10.43"
.TH PCRE2_COMPILE 3 "30 October 2024" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -45,6 +45,7 @@ The primary option bits are:
PCRE2_ALLOW_EMPTY_CLASS Allow empty classes
PCRE2_ALT_BSUX Alternative handling of \eu, \eU, and \ex
PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode
PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax
PCRE2_ALT_VERBNAMES Process backslashes in verb names
PCRE2_AUTO_CALLOUT Compile automatic callouts
PCRE2_CASELESS Do caseless matching
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_COMPILE_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00"
.TH PCRE2_COMPILE_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00"
.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "25 October 2014" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_COMPILE_CONTEXT_FREE 3 "29 June 2018" "PCRE2 10.32"
.TH PCRE2_COMPILE_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.31"
.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CONVERT_CONTEXT_COPY 3 "10 July 2017" "PCRE2 10.30"
.TH PCRE2_CONVERT_CONTEXT_COPY 3 "12 July 2017" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "10 July 2017" "PCRE2 10.30"
.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "12 July 2017" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CONVERT_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
.TH PCRE2_CONVERT_CONTEXT_FREE 3 "13 August 2018" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_CONVERTED_PATTERN_FREE 3 "28 June 2018" "PCRE2 10.32"
.TH PCRE2_CONVERTED_PATTERN_FREE 3 "13 August 2018" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_DFA_MATCH 3 "28 August 2021" "PCRE2 10.38"
.TH PCRE2_DFA_MATCH 3 "31 August 2021" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GENERAL_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00"
.TH PCRE2_GENERAL_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00"
.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "23 January 2023" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.30"
.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GET_MARK 3 "13 October 2017" "PCRE2 10.31"
.TH PCRE2_GET_MARK 3 "13 January 2018" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -1,4 +1,4 @@
.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "13 January 2023" "PCRE2 10.43"
.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "18 January 2023" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GET_MATCH_DATA_SIZE 3 "16 July 2019" "PCRE2 10.34"
.TH PCRE2_GET_MATCH_DATA_SIZE 3 "17 October 2019" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GET_OVECTOR_COUNT 3 "24 October 2014" "PCRE2 10.00"
.TH PCRE2_GET_OVECTOR_COUNT 3 "25 October 2014" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GET_OVECTOR_POINTER 3 "24 October 2014" "PCRE2 10.00"
.TH PCRE2_GET_OVECTOR_POINTER 3 "25 October 2014" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+1 -1
View File
@@ -1,4 +1,4 @@
.TH PCRE2_GET_STARTCHAR 3 "24 October 2014" "PCRE2 10.00"
.TH PCRE2_GET_STARTCHAR 3 "25 October 2014" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
+19 -9
View File
@@ -1,4 +1,4 @@
.TH PCRE2_JIT_COMPILE 3 "29 July 2019" "PCRE2 10.34"
.TH PCRE2_JIT_COMPILE 3 "22 August 2024" "PCRE2 10.46"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -22,9 +22,17 @@ details are given in the
.\"
documentation.
.P
The first argument is a pointer that was returned by a successful call to
\fBpcre2_compile()\fP, and the second must contain one or more of the following
bits:
The availability of JIT support can be tested by calling
\fBpcre2_compile_jit()\fP with a single option PCRE2_JIT_TEST_ALLOC (the
code argument is ignored, so a NULL value is accepted). Such a call
returns zero if JIT is available and has a working allocator. Otherwise
it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate
executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not
compiled.
.P
Otherwise, the first argument must be a pointer that was returned by a
successful call to \fBpcre2_compile()\fP, and the second must contain one or
more of the following bits:
.sp
PCRE2_JIT_COMPLETE compile code for full matching
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
@@ -34,11 +42,13 @@ There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old
option is deprecated and may be removed in the future.
.P
The yield of the function is 0 for success, or a negative error code otherwise.
In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
if an unknown bit is set in \fIoptions\fP. The function can also return
PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the
compiler, even if it was because of a system security restriction.
The yield of the function when called with any of the three options above is 0
for success, or a negative error code otherwise. In particular,
PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown
bit is set in \fIoptions\fP. The function can also return PCRE2_ERROR_NOMEMORY
if JIT is unable to allocate executable memory for the compiler, even if it was
because of a system security restriction. In a few cases, the function may
return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF

Some files were not shown because too many files have changed in this diff Show More