mirror of
https://github.com/vxcontrol/lualibs-lrexlib.git
synced 2026-07-01 09:25:08 -04:00
Minor changes in code, PCRE2 additions to the manual.
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
License of Lrexlib release
|
||||
--------------------------
|
||||
|
||||
Copyright (C) Reuben Thomas 2000-2015
|
||||
Copyright (C) Shmuel Zeigerman 2004-2015
|
||||
Copyright (C) Reuben Thomas 2000-2017
|
||||
Copyright (C) Shmuel Zeigerman 2004-2017
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated
|
||||
|
||||
+4
-3
@@ -5,16 +5,17 @@ Lrexlib
|
||||
| and Shmuel Zeigerman (shmuz@013net.net)
|
||||
|
||||
**Lrexlib** provides bindings of five regular expression library APIs
|
||||
(POSIX_, PCRE_, GNU_, TRE_ and Oniguruma_) to Lua_ >= 5.1.
|
||||
(POSIX_, PCRE_, PCRE2_, GNU_, TRE_ and Oniguruma_) to Lua_ >= 5.1.
|
||||
The bindings for TRE and Oniguruma are not currently complete.
|
||||
|
||||
**Lrexlib** is copyright Reuben Thomas 2000-2015 and copyright Shmuel
|
||||
Zeigerman 2004-2015, and is released under the same license as Lua,
|
||||
**Lrexlib** is copyright Reuben Thomas 2000-2017 and copyright Shmuel
|
||||
Zeigerman 2004-2017, and is released under the same license as Lua,
|
||||
the MIT_ license (otherwise known as the revised BSD license). There
|
||||
is no warranty.
|
||||
|
||||
.. _POSIX: http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
|
||||
.. _PCRE: http://www.pcre.org/pcre.txt
|
||||
.. _PCRE2: http://www.pcre.org/pcre2.txt
|
||||
.. _GNU: ftp://ftp.gnu.org/old-gnu/regex/
|
||||
.. _Oniguruma: https://github.com/kkos/oniguruma
|
||||
.. _TRE: http://laurikari.net/tre/documentation/
|
||||
|
||||
+171
-35
@@ -8,8 +8,8 @@ Lrexlib Reference Manual
|
||||
------------------------------------------------------------
|
||||
|
||||
**Lrexlib** builds into shared libraries called by default *rex_posix.so*,
|
||||
*rex_pcre.so*, *rex_gnu.so*, *rex_tre.so* and *rex_onig.so*, which can be used with
|
||||
*require*.
|
||||
*rex_pcre.so*, *rex_pcre2.so*, *rex_gnu.so*, *rex_tre.so* and *rex_onig.so*,
|
||||
which can be used with *require*.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
@@ -28,8 +28,9 @@ Notes
|
||||
MyFunc (arg1, arg2, [arg3], [arg4])
|
||||
|
||||
3. Throughout this document (unless it causes ambiguity), the identifier **rex**
|
||||
is used in place of either *rex_posix*, *rex_pcre*, *rex_gnu*, *rex_onig* or
|
||||
*rex_tre*, which are the default namespaces for the corresponding libraries.
|
||||
is used in place of either *rex_posix*, *rex_pcre*, *rex_pcre2*, *rex_gnu*,
|
||||
*rex_onig* or *rex_tre*, which are the default namespaces for the corresponding
|
||||
libraries.
|
||||
|
||||
4. All functions that take a regular expression pattern as an argument will
|
||||
generate an error if that pattern is found invalid by the regex library.
|
||||
@@ -53,35 +54,34 @@ Notes
|
||||
7. The default value for *compilation flags* (*cf*) that Lrexlib uses when
|
||||
the parameter is not supplied or ``nil`` is:
|
||||
|
||||
* REG_EXTENDED for POSIX and TRE
|
||||
* 0 for PCRE
|
||||
* ONIG_OPTION_NONE for Oniguruma
|
||||
* SYNTAX_POSIX_EXTENDED for GNU
|
||||
* ``REG_EXTENDED`` for POSIX and TRE
|
||||
* ``0`` for PCRE and PCRE2
|
||||
* ``ONIG_OPTION_NONE`` for Oniguruma
|
||||
* ``SYNTAX_POSIX_EXTENDED`` for GNU
|
||||
|
||||
**PCRE**, **Oniguruma**: *cf* may also be supplied as a string, whose
|
||||
characters stand for compilation flags. Combinations of the following
|
||||
**PCRE**, **PCRE2**, **Oniguruma**: *cf* may also be supplied as a string,
|
||||
whose characters stand for compilation flags. Combinations of the following
|
||||
characters (case sensitive) are supported:
|
||||
|
||||
=============== ================== ==============================
|
||||
**Character** **PCRE flag** **Oniguruma flag**
|
||||
=============== ================== ==============================
|
||||
**i** PCRE_CASELESS ONIG_OPTION_IGNORECASE
|
||||
**m** PCRE_MULTILINE ONIG_OPTION_NEGATE_SINGLELINE
|
||||
**s** PCRE_DOTALL ONIG_OPTION_MULTILINE
|
||||
**x** PCRE_EXTENDED ONIG_OPTION_EXTEND
|
||||
**U** PCRE_UNGREEDY n/a
|
||||
**X** PCRE_EXTRA n/a
|
||||
=============== ================== ==============================
|
||||
=============== ================== ================== ==============================
|
||||
**Character** **PCRE flag** **PCRE2 flag** **Oniguruma flag**
|
||||
=============== ================== ================== ==============================
|
||||
**i** PCRE_CASELESS PCRE2_CASELESS ONIG_OPTION_IGNORECASE
|
||||
**m** PCRE_MULTILINE PCRE2_MULTILINE ONIG_OPTION_NEGATE_SINGLELINE
|
||||
**s** PCRE_DOTALL PCRE2_DOTALL ONIG_OPTION_MULTILINE
|
||||
**x** PCRE_EXTENDED PCRE2_EXTENDED ONIG_OPTION_EXTEND
|
||||
**U** PCRE_UNGREEDY PCRE2_UNGREEDY n/a
|
||||
**X** PCRE_EXTRA n/a n/a
|
||||
=============== ================== ================== ==============================
|
||||
|
||||
.. _ef:
|
||||
|
||||
8. The default value for *execution flags* (*ef*) that Lrexlib uses when
|
||||
the parameter is not supplied or ``nil``, is:
|
||||
|
||||
* 0 for standard POSIX regex library
|
||||
* REG_STARTEND for those POSIX regex libraries that support it,
|
||||
e.g. Spencer's.
|
||||
* 0 for PCRE, Oniguruma and TRE
|
||||
* ``0`` for standard POSIX regex library
|
||||
* ``REG_STARTEND`` for those POSIX regex libraries that support it, e.g. Spencer's
|
||||
* ``0`` for PCRE, PCRE2, Oniguruma and TRE
|
||||
|
||||
.. _larg:
|
||||
|
||||
@@ -432,8 +432,8 @@ The keys in the `tb` table are formed from the names of the corresponding
|
||||
constants in the used library. They are formed as follows:
|
||||
|
||||
* **POSIX**, **TRE**: prefix REG\_ is omitted, e.g. REG_ICASE becomes ``"ICASE"``.
|
||||
* **PCRE:** prefix PCRE\_ is omitted, e.g. PCRE_CASELESS becomes
|
||||
``"CASELESS"``.
|
||||
* **PCRE:** prefix PCRE\_ is omitted, e.g. PCRE_CASELESS becomes ``"CASELESS"``.
|
||||
* **PCRE2:** prefix PCRE2\_ is omitted, e.g. PCRE2_CASELESS becomes ``"CASELESS"``.
|
||||
* **Oniguruma:** names of constants are converted to strings with no alteration,
|
||||
but for ONIG_OPTION_xxx constants, alias strings are created additionally,
|
||||
e.g., the value of ONIG_OPTION_IGNORECASE constant becomes accessible via
|
||||
@@ -504,9 +504,9 @@ string *subj*, starting from offset *init*, subject to execution flags *ef*.
|
||||
result, in a table. This table contains ``false`` in the positions where the
|
||||
corresponding sub-pattern did not participate in the match.
|
||||
|
||||
1. **PCRE**, **Oniguruma**: if *named subpatterns* are used then the table
|
||||
also contains substring matches keyed by their correspondent subpattern
|
||||
names (strings).
|
||||
1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then
|
||||
the table also contains substring matches keyed by their correspondent
|
||||
subpattern names (strings).
|
||||
|
||||
**Returns on failure:**
|
||||
1. ``nil``
|
||||
@@ -542,9 +542,9 @@ string *subj*, starting from offset *init*, subject to execution flags *ef*.
|
||||
positions where the corresponding sub-pattern did not participate in the
|
||||
match.
|
||||
|
||||
1. **PCRE**, **Oniguruma**: if *named subpatterns* are used then the table
|
||||
also contains substring matches keyed by their correspondent subpattern
|
||||
names (strings).
|
||||
1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then
|
||||
the table also contains substring matches keyed by their correspondent
|
||||
subpattern names (strings).
|
||||
|
||||
**Returns on failure:**
|
||||
1. ``nil``
|
||||
@@ -565,10 +565,12 @@ new
|
||||
:funcdef:`rex.new (patt, [cf], [lo])`
|
||||
|
||||
The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a
|
||||
userdata obtained from a call to maketables_. The default value, used when the
|
||||
parameter is not supplied or ``nil``, is the built-in PCRE set of character
|
||||
userdata obtained from a call to maketables__. The default value, used when
|
||||
the parameter is not supplied or ``nil``, is the built-in PCRE set of character
|
||||
tables.
|
||||
|
||||
__ maketables_pcre_
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
fullinfo
|
||||
@@ -584,6 +586,8 @@ The keys are strings formed in the following way:
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
.. _dfa_exec_pcre:
|
||||
|
||||
dfa_exec
|
||||
--------
|
||||
|
||||
@@ -627,6 +631,8 @@ string *subj*, using a DFA matching algorithm.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
.. _maketables_pcre:
|
||||
|
||||
maketables
|
||||
----------
|
||||
|
||||
@@ -668,6 +674,134 @@ and its release date.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
PCRE2-only functions and methods
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
new
|
||||
---
|
||||
|
||||
:funcdef:`rex.new (patt, [cf], [lo])`
|
||||
|
||||
The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a
|
||||
userdata obtained from a call to maketables__. The default value, used when
|
||||
the parameter is not supplied or ``nil``, is the built-in PCRE2 set of character
|
||||
tables.
|
||||
|
||||
__ maketables_pcre2_
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
patterninfo
|
||||
-----------
|
||||
|
||||
[See *pcre2_patterninfo* in the PCRE2 docs.]
|
||||
|
||||
:funcdef:`r:patterninfo ()`
|
||||
|
||||
This function returns a table containing information about the compiled pattern.
|
||||
The keys are strings formed in the following way:
|
||||
``PCRE2_INFO_CAPTURECOUNT`` -> ``"CAPTURECOUNT"``. The values are numbers.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
dfa_exec
|
||||
--------
|
||||
|
||||
[See *pcre2_dfa_exec* in the PCRE2 docs.]
|
||||
|
||||
:funcdef:`r:dfa_exec (subj, [init], [ef], [ovecsize], [wscount])`
|
||||
|
||||
The method matches a compiled regular expression *r* against a given subject
|
||||
string *subj*, using a DFA matching algorithm.
|
||||
|
||||
+----------+-------------------------------------+--------+-------------+
|
||||
|Parameter | Description | Type |Default Value|
|
||||
+==========+=====================================+========+=============+
|
||||
| r |regex object produced by new |userdata| n/a |
|
||||
+----------+-------------------------------------+--------+-------------+
|
||||
| subj |subject | string | n/a |
|
||||
+----------+-------------------------------------+--------+-------------+
|
||||
| [init] |start offset in the subject | number | 1 |
|
||||
| |(can be negative) | | |
|
||||
+----------+-------------------------------------+--------+-------------+
|
||||
| [ef] |execution flags (bitwise OR) | number | ef_ |
|
||||
+----------+-------------------------------------+--------+-------------+
|
||||
|[ovecsize]|size of the array for result offsets | number | 100 |
|
||||
+----------+-------------------------------------+--------+-------------+
|
||||
|[wscount] |number of elements in the working | number | 50 |
|
||||
| |space array | | |
|
||||
+----------+-------------------------------------+--------+-------------+
|
||||
|
||||
**Returns on success (either full or partial match):**
|
||||
1. The start point of the matches found (a number).
|
||||
2. A table containing the end points of the matches found, the longer matches
|
||||
first.
|
||||
3. The return value of the underlying *pcre_dfa_exec* call (a number).
|
||||
|
||||
**Returns on failure (no match):**
|
||||
1. ``nil``
|
||||
|
||||
**Example:**
|
||||
If there are 3 matches found starting at offset 10 and ending at offsets 15, 20
|
||||
and 25 then the function returns the following: 10, { 25,20,15 }, 3.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
jit_compile
|
||||
-----------
|
||||
|
||||
[See *pcre2_jit_compile* in the PCRE2 docs.]
|
||||
|
||||
:funcdef:`r:jit_compile ([options])`
|
||||
|
||||
Parameter *options* is a number (a bitwise OR of separate options;
|
||||
it defaults to ``PCRE2_JIT_COMPLETE``).
|
||||
|
||||
The method returns ``true`` on success or ``false`` + error message string on failure.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
.. _maketables_pcre2:
|
||||
|
||||
maketables
|
||||
----------
|
||||
|
||||
[See *pcre2_maketables* in the PCRE2 docs.]
|
||||
|
||||
:funcdef:`rex_pcre2.maketables ()`
|
||||
|
||||
Creates a set of character tables corresponding to the current locale and
|
||||
returns it as a userdata. The returned value can be passed to any Lrexlib
|
||||
function accepting the *locale* parameter.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
config
|
||||
------
|
||||
|
||||
[See *pcre2_config* in the PCRE2 docs.]
|
||||
|
||||
:funcdef:`rex_pcre2.config ([tb])`
|
||||
|
||||
This function returns a table containing the values of the configuration
|
||||
parameters used at PCRE2 library build-time. Those parameters (numbers) are
|
||||
keyed by their names (strings). If the table argument *tb* is supplied then it
|
||||
is used as the output table, else a new table is created.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
version
|
||||
-------
|
||||
|
||||
[See *pcre2_config(PCRE2_CONFIG_VERSION)* in the PCRE2 docs.]
|
||||
|
||||
:funcdef:`rex_pcre2.version ()`
|
||||
|
||||
This function returns a string containing the version of the used PCRE2 library
|
||||
and its release date.
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
GNU-only functions and methods
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -919,10 +1053,12 @@ Incompatibilities with previous versions
|
||||
|
||||
**Incompatibilities between versions 2.1 and 2.0:**
|
||||
|
||||
1. match_, find_, tfind_, exec_, dfa_exec_: only one value (a ``nil``) is
|
||||
1. match_, find_, tfind_, exec_, dfa_exec__: only one value (a ``nil``) is
|
||||
returned when the subject does not match the pattern. Any other failure
|
||||
generates an error.
|
||||
|
||||
__ dfa_exec_pcre_
|
||||
|
||||
**Incompatibilities between versions 2.0 and 1.19:**
|
||||
|
||||
1. Lua 5.1 is required
|
||||
|
||||
+3
-2
@@ -6,8 +6,8 @@
|
||||
#include <locale.h>
|
||||
#include <ctype.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <pcre2.h>
|
||||
|
||||
#include "lua.h"
|
||||
#include "lauxlib.h"
|
||||
#include "../common.h"
|
||||
@@ -202,9 +202,10 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TPcre2 **pud) {
|
||||
TPcre2 *ud;
|
||||
|
||||
ud = (TPcre2*)lua_newuserdata (L, sizeof (TPcre2));
|
||||
memset (ud, 0, sizeof (TPcre2)); /* initialize all members to 0 */
|
||||
lua_pushvalue (L, ALG_ENVIRONINDEX);
|
||||
lua_setmetatable (L, -2);
|
||||
memset (ud, 0, sizeof (TPcre2)); /* initialize all members to 0 */
|
||||
|
||||
ud->ccontext = pcre2_compile_context_create(NULL);
|
||||
if (ud->ccontext == NULL)
|
||||
return luaL_error (L, "malloc failed");
|
||||
|
||||
Reference in New Issue
Block a user