mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-05 09:11:55 +00:00
Add regular expression matching support, based on OpenBSD regexec()/regcomp()
implementation. llvm-svn: 80493
This commit is contained in:
parent
dd71643cf1
commit
21897521c1
@ -66,3 +66,4 @@ Autoconf llvm/autoconf
|
|||||||
llvm/projects/sample/autoconf
|
llvm/projects/sample/autoconf
|
||||||
CellSPU backend llvm/lib/Target/CellSPU/README.txt
|
CellSPU backend llvm/lib/Target/CellSPU/README.txt
|
||||||
Google Test llvm/utils/unittest/googletest
|
Google Test llvm/utils/unittest/googletest
|
||||||
|
OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
|
||||||
|
756
docs/re_format.7
Normal file
756
docs/re_format.7
Normal file
@ -0,0 +1,756 @@
|
|||||||
|
.\" $OpenBSD: re_format.7,v 1.14 2007/05/31 19:19:30 jmc Exp $
|
||||||
|
.\"
|
||||||
|
.\" Copyright (c) 1997, Phillip F Knaack. All rights reserved.
|
||||||
|
.\"
|
||||||
|
.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
.\" Copyright (c) 1992, 1993, 1994
|
||||||
|
.\" The Regents of the University of California. All rights reserved.
|
||||||
|
.\"
|
||||||
|
.\" This code is derived from software contributed to Berkeley by
|
||||||
|
.\" Henry Spencer.
|
||||||
|
.\"
|
||||||
|
.\" Redistribution and use in source and binary forms, with or without
|
||||||
|
.\" modification, are permitted provided that the following conditions
|
||||||
|
.\" are met:
|
||||||
|
.\" 1. Redistributions of source code must retain the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer.
|
||||||
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer in the
|
||||||
|
.\" documentation and/or other materials provided with the distribution.
|
||||||
|
.\" 3. Neither the name of the University nor the names of its contributors
|
||||||
|
.\" may be used to endorse or promote products derived from this software
|
||||||
|
.\" without specific prior written permission.
|
||||||
|
.\"
|
||||||
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
.\" SUCH DAMAGE.
|
||||||
|
.\"
|
||||||
|
.\" @(#)re_format.7 8.3 (Berkeley) 3/20/94
|
||||||
|
.\"
|
||||||
|
.Dd $Mdocdate: May 31 2007 $
|
||||||
|
.Dt RE_FORMAT 7
|
||||||
|
.Os
|
||||||
|
.Sh NAME
|
||||||
|
.Nm re_format
|
||||||
|
.Nd POSIX regular expressions
|
||||||
|
.Sh DESCRIPTION
|
||||||
|
Regular expressions (REs),
|
||||||
|
as defined in
|
||||||
|
.St -p1003.1-2004 ,
|
||||||
|
come in two forms:
|
||||||
|
basic regular expressions
|
||||||
|
(BREs)
|
||||||
|
and extended regular expressions
|
||||||
|
(EREs).
|
||||||
|
Both forms of regular expressions are supported
|
||||||
|
by the interfaces described in
|
||||||
|
.Xr regex 3 .
|
||||||
|
Applications dealing with regular expressions
|
||||||
|
may use one or the other form
|
||||||
|
(or indeed both).
|
||||||
|
For example,
|
||||||
|
.Xr ed 1
|
||||||
|
uses BREs,
|
||||||
|
whilst
|
||||||
|
.Xr egrep 1
|
||||||
|
talks EREs.
|
||||||
|
Consult the manual page for the specific application to find out which
|
||||||
|
it uses.
|
||||||
|
.Pp
|
||||||
|
POSIX leaves some aspects of RE syntax and semantics open;
|
||||||
|
.Sq **
|
||||||
|
marks decisions on these aspects that
|
||||||
|
may not be fully portable to other POSIX implementations.
|
||||||
|
.Pp
|
||||||
|
This manual page first describes regular expressions in general,
|
||||||
|
specifically extended regular expressions,
|
||||||
|
and then discusses differences between them and basic regular expressions.
|
||||||
|
.Sh EXTENDED REGULAR EXPRESSIONS
|
||||||
|
An ERE is one** or more non-empty**
|
||||||
|
.Em branches ,
|
||||||
|
separated by
|
||||||
|
.Sq \*(Ba .
|
||||||
|
It matches anything that matches one of the branches.
|
||||||
|
.Pp
|
||||||
|
A branch is one** or more
|
||||||
|
.Em pieces ,
|
||||||
|
concatenated.
|
||||||
|
It matches a match for the first, followed by a match for the second, etc.
|
||||||
|
.Pp
|
||||||
|
A piece is an
|
||||||
|
.Em atom
|
||||||
|
possibly followed by a single**
|
||||||
|
.Sq * ,
|
||||||
|
.Sq + ,
|
||||||
|
.Sq ?\& ,
|
||||||
|
or
|
||||||
|
.Em bound .
|
||||||
|
An atom followed by
|
||||||
|
.Sq *
|
||||||
|
matches a sequence of 0 or more matches of the atom.
|
||||||
|
An atom followed by
|
||||||
|
.Sq +
|
||||||
|
matches a sequence of 1 or more matches of the atom.
|
||||||
|
An atom followed by
|
||||||
|
.Sq ?\&
|
||||||
|
matches a sequence of 0 or 1 matches of the atom.
|
||||||
|
.Pp
|
||||||
|
A bound is
|
||||||
|
.Sq {
|
||||||
|
followed by an unsigned decimal integer,
|
||||||
|
possibly followed by
|
||||||
|
.Sq ,\&
|
||||||
|
possibly followed by another unsigned decimal integer,
|
||||||
|
always followed by
|
||||||
|
.Sq } .
|
||||||
|
The integers must lie between 0 and
|
||||||
|
.Dv RE_DUP_MAX
|
||||||
|
(255**) inclusive,
|
||||||
|
and if there are two of them, the first may not exceed the second.
|
||||||
|
An atom followed by a bound containing one integer
|
||||||
|
.Ar i
|
||||||
|
and no comma matches
|
||||||
|
a sequence of exactly
|
||||||
|
.Ar i
|
||||||
|
matches of the atom.
|
||||||
|
An atom followed by a bound
|
||||||
|
containing one integer
|
||||||
|
.Ar i
|
||||||
|
and a comma matches
|
||||||
|
a sequence of
|
||||||
|
.Ar i
|
||||||
|
or more matches of the atom.
|
||||||
|
An atom followed by a bound
|
||||||
|
containing two integers
|
||||||
|
.Ar i
|
||||||
|
and
|
||||||
|
.Ar j
|
||||||
|
matches a sequence of
|
||||||
|
.Ar i
|
||||||
|
through
|
||||||
|
.Ar j
|
||||||
|
(inclusive) matches of the atom.
|
||||||
|
.Pp
|
||||||
|
An atom is a regular expression enclosed in
|
||||||
|
.Sq ()
|
||||||
|
(matching a part of the regular expression),
|
||||||
|
an empty set of
|
||||||
|
.Sq ()
|
||||||
|
(matching the null string)**,
|
||||||
|
a
|
||||||
|
.Em bracket expression
|
||||||
|
(see below),
|
||||||
|
.Sq .\&
|
||||||
|
(matching any single character),
|
||||||
|
.Sq ^
|
||||||
|
(matching the null string at the beginning of a line),
|
||||||
|
.Sq $
|
||||||
|
(matching the null string at the end of a line),
|
||||||
|
a
|
||||||
|
.Sq \e
|
||||||
|
followed by one of the characters
|
||||||
|
.Sq ^.[$()|*+?{\e
|
||||||
|
(matching that character taken as an ordinary character),
|
||||||
|
a
|
||||||
|
.Sq \e
|
||||||
|
followed by any other character**
|
||||||
|
(matching that character taken as an ordinary character,
|
||||||
|
as if the
|
||||||
|
.Sq \e
|
||||||
|
had not been present**),
|
||||||
|
or a single character with no other significance (matching that character).
|
||||||
|
A
|
||||||
|
.Sq {
|
||||||
|
followed by a character other than a digit is an ordinary character,
|
||||||
|
not the beginning of a bound**.
|
||||||
|
It is illegal to end an RE with
|
||||||
|
.Sq \e .
|
||||||
|
.Pp
|
||||||
|
A bracket expression is a list of characters enclosed in
|
||||||
|
.Sq [] .
|
||||||
|
It normally matches any single character from the list (but see below).
|
||||||
|
If the list begins with
|
||||||
|
.Sq ^ ,
|
||||||
|
it matches any single character
|
||||||
|
.Em not
|
||||||
|
from the rest of the list
|
||||||
|
(but see below).
|
||||||
|
If two characters in the list are separated by
|
||||||
|
.Sq - ,
|
||||||
|
this is shorthand for the full
|
||||||
|
.Em range
|
||||||
|
of characters between those two (inclusive) in the
|
||||||
|
collating sequence, e.g.\&
|
||||||
|
.Sq [0-9]
|
||||||
|
in ASCII matches any decimal digit.
|
||||||
|
It is illegal** for two ranges to share an endpoint, e.g.\&
|
||||||
|
.Sq a-c-e .
|
||||||
|
Ranges are very collating-sequence-dependent,
|
||||||
|
and portable programs should avoid relying on them.
|
||||||
|
.Pp
|
||||||
|
To include a literal
|
||||||
|
.Sq ]\&
|
||||||
|
in the list, make it the first character
|
||||||
|
(following a possible
|
||||||
|
.Sq ^ ) .
|
||||||
|
To include a literal
|
||||||
|
.Sq - ,
|
||||||
|
make it the first or last character,
|
||||||
|
or the second endpoint of a range.
|
||||||
|
To use a literal
|
||||||
|
.Sq -
|
||||||
|
as the first endpoint of a range,
|
||||||
|
enclose it in
|
||||||
|
.Sq [.
|
||||||
|
and
|
||||||
|
.Sq .]
|
||||||
|
to make it a collating element (see below).
|
||||||
|
With the exception of these and some combinations using
|
||||||
|
.Sq [
|
||||||
|
(see next paragraphs),
|
||||||
|
all other special characters, including
|
||||||
|
.Sq \e ,
|
||||||
|
lose their special significance within a bracket expression.
|
||||||
|
.Pp
|
||||||
|
Within a bracket expression, a collating element
|
||||||
|
(a character,
|
||||||
|
a multi-character sequence that collates as if it were a single character,
|
||||||
|
or a collating-sequence name for either)
|
||||||
|
enclosed in
|
||||||
|
.Sq [.
|
||||||
|
and
|
||||||
|
.Sq .]
|
||||||
|
stands for the sequence of characters of that collating element.
|
||||||
|
The sequence is a single element of the bracket expression's list.
|
||||||
|
A bracket expression containing a multi-character collating element
|
||||||
|
can thus match more than one character,
|
||||||
|
e.g. if the collating sequence includes a
|
||||||
|
.Sq ch
|
||||||
|
collating element,
|
||||||
|
then the RE
|
||||||
|
.Sq [[.ch.]]*c
|
||||||
|
matches the first five characters of
|
||||||
|
.Sq chchcc .
|
||||||
|
.Pp
|
||||||
|
Within a bracket expression, a collating element enclosed in
|
||||||
|
.Sq [=
|
||||||
|
and
|
||||||
|
.Sq =]
|
||||||
|
is an equivalence class, standing for the sequences of characters
|
||||||
|
of all collating elements equivalent to that one, including itself.
|
||||||
|
(If there are no other equivalent collating elements,
|
||||||
|
the treatment is as if the enclosing delimiters were
|
||||||
|
.Sq [.
|
||||||
|
and
|
||||||
|
.Sq .] . )
|
||||||
|
For example, if
|
||||||
|
.Sq x
|
||||||
|
and
|
||||||
|
.Sq y
|
||||||
|
are the members of an equivalence class,
|
||||||
|
then
|
||||||
|
.Sq [[=x=]] ,
|
||||||
|
.Sq [[=y=]] ,
|
||||||
|
and
|
||||||
|
.Sq [xy]
|
||||||
|
are all synonymous.
|
||||||
|
An equivalence class may not** be an endpoint of a range.
|
||||||
|
.Pp
|
||||||
|
Within a bracket expression, the name of a
|
||||||
|
.Em character class
|
||||||
|
enclosed
|
||||||
|
in
|
||||||
|
.Sq [:
|
||||||
|
and
|
||||||
|
.Sq :]
|
||||||
|
stands for the list of all characters belonging to that class.
|
||||||
|
Standard character class names are:
|
||||||
|
.Bd -literal -offset indent
|
||||||
|
alnum digit punct
|
||||||
|
alpha graph space
|
||||||
|
blank lower upper
|
||||||
|
cntrl print xdigit
|
||||||
|
.Ed
|
||||||
|
.Pp
|
||||||
|
These stand for the character classes defined in
|
||||||
|
.Xr ctype 3 .
|
||||||
|
A locale may provide others.
|
||||||
|
A character class may not be used as an endpoint of a range.
|
||||||
|
.Pp
|
||||||
|
There are two special cases** of bracket expressions:
|
||||||
|
the bracket expressions
|
||||||
|
.Sq [[:<:]]
|
||||||
|
and
|
||||||
|
.Sq [[:>:]]
|
||||||
|
match the null string at the beginning and end of a word, respectively.
|
||||||
|
A word is defined as a sequence of
|
||||||
|
characters starting and ending with a word character
|
||||||
|
which is neither preceded nor followed by
|
||||||
|
word characters.
|
||||||
|
A word character is an
|
||||||
|
.Em alnum
|
||||||
|
character (as defined by
|
||||||
|
.Xr ctype 3 )
|
||||||
|
or an underscore.
|
||||||
|
This is an extension,
|
||||||
|
compatible with but not specified by POSIX,
|
||||||
|
and should be used with
|
||||||
|
caution in software intended to be portable to other systems.
|
||||||
|
.Pp
|
||||||
|
In the event that an RE could match more than one substring of a given
|
||||||
|
string,
|
||||||
|
the RE matches the one starting earliest in the string.
|
||||||
|
If the RE could match more than one substring starting at that point,
|
||||||
|
it matches the longest.
|
||||||
|
Subexpressions also match the longest possible substrings, subject to
|
||||||
|
the constraint that the whole match be as long as possible,
|
||||||
|
with subexpressions starting earlier in the RE taking priority over
|
||||||
|
ones starting later.
|
||||||
|
Note that higher-level subexpressions thus take priority over
|
||||||
|
their lower-level component subexpressions.
|
||||||
|
.Pp
|
||||||
|
Match lengths are measured in characters, not collating elements.
|
||||||
|
A null string is considered longer than no match at all.
|
||||||
|
For example,
|
||||||
|
.Sq bb*
|
||||||
|
matches the three middle characters of
|
||||||
|
.Sq abbbc ;
|
||||||
|
.Sq (wee|week)(knights|nights)
|
||||||
|
matches all ten characters of
|
||||||
|
.Sq weeknights ;
|
||||||
|
when
|
||||||
|
.Sq (.*).*
|
||||||
|
is matched against
|
||||||
|
.Sq abc ,
|
||||||
|
the parenthesized subexpression matches all three characters;
|
||||||
|
and when
|
||||||
|
.Sq (a*)*
|
||||||
|
is matched against
|
||||||
|
.Sq bc ,
|
||||||
|
both the whole RE and the parenthesized subexpression match the null string.
|
||||||
|
.Pp
|
||||||
|
If case-independent matching is specified,
|
||||||
|
the effect is much as if all case distinctions had vanished from the
|
||||||
|
alphabet.
|
||||||
|
When an alphabetic that exists in multiple cases appears as an
|
||||||
|
ordinary character outside a bracket expression, it is effectively
|
||||||
|
transformed into a bracket expression containing both cases,
|
||||||
|
e.g.\&
|
||||||
|
.Sq x
|
||||||
|
becomes
|
||||||
|
.Sq [xX] .
|
||||||
|
When it appears inside a bracket expression,
|
||||||
|
all case counterparts of it are added to the bracket expression,
|
||||||
|
so that, for example,
|
||||||
|
.Sq [x]
|
||||||
|
becomes
|
||||||
|
.Sq [xX]
|
||||||
|
and
|
||||||
|
.Sq [^x]
|
||||||
|
becomes
|
||||||
|
.Sq [^xX] .
|
||||||
|
.Pp
|
||||||
|
No particular limit is imposed on the length of REs**.
|
||||||
|
Programs intended to be portable should not employ REs longer
|
||||||
|
than 256 bytes,
|
||||||
|
as an implementation can refuse to accept such REs and remain
|
||||||
|
POSIX-compliant.
|
||||||
|
.Pp
|
||||||
|
The following is a list of extended regular expressions:
|
||||||
|
.Bl -tag -width Ds
|
||||||
|
.It Ar c
|
||||||
|
Any character
|
||||||
|
.Ar c
|
||||||
|
not listed below matches itself.
|
||||||
|
.It \e Ns Ar c
|
||||||
|
Any backslash-escaped character
|
||||||
|
.Ar c
|
||||||
|
matches itself.
|
||||||
|
.It \&.
|
||||||
|
Matches any single character that is not a newline
|
||||||
|
.Pq Sq \en .
|
||||||
|
.It Bq Ar char-class
|
||||||
|
Matches any single character in
|
||||||
|
.Ar char-class .
|
||||||
|
To include a
|
||||||
|
.Ql \&]
|
||||||
|
in
|
||||||
|
.Ar char-class ,
|
||||||
|
it must be the first character.
|
||||||
|
A range of characters may be specified by separating the end characters
|
||||||
|
of the range with a
|
||||||
|
.Ql - ;
|
||||||
|
e.g.\&
|
||||||
|
.Ar a-z
|
||||||
|
specifies the lower case characters.
|
||||||
|
The following literal expressions can also be used in
|
||||||
|
.Ar char-class
|
||||||
|
to specify sets of characters:
|
||||||
|
.Bd -unfilled -offset indent
|
||||||
|
[:alnum:] [:cntrl:] [:lower:] [:space:]
|
||||||
|
[:alpha:] [:digit:] [:print:] [:upper:]
|
||||||
|
[:blank:] [:graph:] [:punct:] [:xdigit:]
|
||||||
|
.Ed
|
||||||
|
.Pp
|
||||||
|
If
|
||||||
|
.Ql -
|
||||||
|
appears as the first or last character of
|
||||||
|
.Ar char-class ,
|
||||||
|
then it matches itself.
|
||||||
|
All other characters in
|
||||||
|
.Ar char-class
|
||||||
|
match themselves.
|
||||||
|
.Pp
|
||||||
|
Patterns in
|
||||||
|
.Ar char-class
|
||||||
|
of the form
|
||||||
|
.Eo [.
|
||||||
|
.Ar col-elm
|
||||||
|
.Ec .]\&
|
||||||
|
or
|
||||||
|
.Eo [=
|
||||||
|
.Ar col-elm
|
||||||
|
.Ec =]\& ,
|
||||||
|
where
|
||||||
|
.Ar col-elm
|
||||||
|
is a collating element, are interpreted according to
|
||||||
|
.Xr setlocale 3
|
||||||
|
.Pq not currently supported .
|
||||||
|
.It Bq ^ Ns Ar char-class
|
||||||
|
Matches any single character, other than newline, not in
|
||||||
|
.Ar char-class .
|
||||||
|
.Ar char-class
|
||||||
|
is defined as above.
|
||||||
|
.It ^
|
||||||
|
If
|
||||||
|
.Sq ^
|
||||||
|
is the first character of a regular expression, then it
|
||||||
|
anchors the regular expression to the beginning of a line.
|
||||||
|
Otherwise, it matches itself.
|
||||||
|
.It $
|
||||||
|
If
|
||||||
|
.Sq $
|
||||||
|
is the last character of a regular expression,
|
||||||
|
it anchors the regular expression to the end of a line.
|
||||||
|
Otherwise, it matches itself.
|
||||||
|
.It [[:<:]]
|
||||||
|
Anchors the single character regular expression or subexpression
|
||||||
|
immediately following it to the beginning of a word.
|
||||||
|
.It [[:>:]]
|
||||||
|
Anchors the single character regular expression or subexpression
|
||||||
|
immediately following it to the end of a word.
|
||||||
|
.It Pq Ar re
|
||||||
|
Defines a subexpression
|
||||||
|
.Ar re .
|
||||||
|
Any set of characters enclosed in parentheses
|
||||||
|
matches whatever the set of characters without parentheses matches
|
||||||
|
(that is a long-winded way of saying the constructs
|
||||||
|
.Sq (re)
|
||||||
|
and
|
||||||
|
.Sq re
|
||||||
|
match identically).
|
||||||
|
.It *
|
||||||
|
Matches the single character regular expression or subexpression
|
||||||
|
immediately preceding it zero or more times.
|
||||||
|
If
|
||||||
|
.Sq *
|
||||||
|
is the first character of a regular expression or subexpression,
|
||||||
|
then it matches itself.
|
||||||
|
The
|
||||||
|
.Sq *
|
||||||
|
operator sometimes yields unexpected results.
|
||||||
|
For example, the regular expression
|
||||||
|
.Ar b*
|
||||||
|
matches the beginning of the string
|
||||||
|
.Qq abbb
|
||||||
|
(as opposed to the substring
|
||||||
|
.Qq bbb ) ,
|
||||||
|
since a null match is the only leftmost match.
|
||||||
|
.It +
|
||||||
|
Matches the singular character regular expression
|
||||||
|
or subexpression immediately preceding it
|
||||||
|
one or more times.
|
||||||
|
.It ?
|
||||||
|
Matches the singular character regular expression
|
||||||
|
or subexpression immediately preceding it
|
||||||
|
0 or 1 times.
|
||||||
|
.Sm off
|
||||||
|
.It Xo
|
||||||
|
.Pf { Ar n , m No }\ \&
|
||||||
|
.Pf { Ar n , No }\ \&
|
||||||
|
.Pf { Ar n No }
|
||||||
|
.Xc
|
||||||
|
.Sm on
|
||||||
|
Matches the single character regular expression or subexpression
|
||||||
|
immediately preceding it at least
|
||||||
|
.Ar n
|
||||||
|
and at most
|
||||||
|
.Ar m
|
||||||
|
times.
|
||||||
|
If
|
||||||
|
.Ar m
|
||||||
|
is omitted, then it matches at least
|
||||||
|
.Ar n
|
||||||
|
times.
|
||||||
|
If the comma is also omitted, then it matches exactly
|
||||||
|
.Ar n
|
||||||
|
times.
|
||||||
|
.It \*(Ba
|
||||||
|
Used to separate patterns.
|
||||||
|
For example,
|
||||||
|
the pattern
|
||||||
|
.Sq cat\*(Badog
|
||||||
|
matches either
|
||||||
|
.Sq cat
|
||||||
|
or
|
||||||
|
.Sq dog .
|
||||||
|
.El
|
||||||
|
.Sh BASIC REGULAR EXPRESSIONS
|
||||||
|
Basic regular expressions differ in several respects:
|
||||||
|
.Bl -bullet -offset 3n
|
||||||
|
.It
|
||||||
|
.Sq \*(Ba ,
|
||||||
|
.Sq + ,
|
||||||
|
and
|
||||||
|
.Sq ?\&
|
||||||
|
are ordinary characters and there is no equivalent
|
||||||
|
for their functionality.
|
||||||
|
.It
|
||||||
|
The delimiters for bounds are
|
||||||
|
.Sq \e{
|
||||||
|
and
|
||||||
|
.Sq \e} ,
|
||||||
|
with
|
||||||
|
.Sq {
|
||||||
|
and
|
||||||
|
.Sq }
|
||||||
|
by themselves ordinary characters.
|
||||||
|
.It
|
||||||
|
The parentheses for nested subexpressions are
|
||||||
|
.Sq \e(
|
||||||
|
and
|
||||||
|
.Sq \e) ,
|
||||||
|
with
|
||||||
|
.Sq (
|
||||||
|
and
|
||||||
|
.Sq )\&
|
||||||
|
by themselves ordinary characters.
|
||||||
|
.It
|
||||||
|
.Sq ^
|
||||||
|
is an ordinary character except at the beginning of the
|
||||||
|
RE or** the beginning of a parenthesized subexpression.
|
||||||
|
.It
|
||||||
|
.Sq $
|
||||||
|
is an ordinary character except at the end of the
|
||||||
|
RE or** the end of a parenthesized subexpression.
|
||||||
|
.It
|
||||||
|
.Sq *
|
||||||
|
is an ordinary character if it appears at the beginning of the
|
||||||
|
RE or the beginning of a parenthesized subexpression
|
||||||
|
(after a possible leading
|
||||||
|
.Sq ^ ) .
|
||||||
|
.It
|
||||||
|
Finally, there is one new type of atom, a
|
||||||
|
.Em back-reference :
|
||||||
|
.Sq \e
|
||||||
|
followed by a non-zero decimal digit
|
||||||
|
.Ar d
|
||||||
|
matches the same sequence of characters matched by the
|
||||||
|
.Ar d Ns th
|
||||||
|
parenthesized subexpression
|
||||||
|
(numbering subexpressions by the positions of their opening parentheses,
|
||||||
|
left to right),
|
||||||
|
so that, for example,
|
||||||
|
.Sq \e([bc]\e)\e1
|
||||||
|
matches
|
||||||
|
.Sq bb\&
|
||||||
|
or
|
||||||
|
.Sq cc
|
||||||
|
but not
|
||||||
|
.Sq bc .
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
The following is a list of basic regular expressions:
|
||||||
|
.Bl -tag -width Ds
|
||||||
|
.It Ar c
|
||||||
|
Any character
|
||||||
|
.Ar c
|
||||||
|
not listed below matches itself.
|
||||||
|
.It \e Ns Ar c
|
||||||
|
Any backslash-escaped character
|
||||||
|
.Ar c ,
|
||||||
|
except for
|
||||||
|
.Sq { ,
|
||||||
|
.Sq } ,
|
||||||
|
.Sq \&( ,
|
||||||
|
and
|
||||||
|
.Sq \&) ,
|
||||||
|
matches itself.
|
||||||
|
.It \&.
|
||||||
|
Matches any single character that is not a newline
|
||||||
|
.Pq Sq \en .
|
||||||
|
.It Bq Ar char-class
|
||||||
|
Matches any single character in
|
||||||
|
.Ar char-class .
|
||||||
|
To include a
|
||||||
|
.Ql \&]
|
||||||
|
in
|
||||||
|
.Ar char-class ,
|
||||||
|
it must be the first character.
|
||||||
|
A range of characters may be specified by separating the end characters
|
||||||
|
of the range with a
|
||||||
|
.Ql - ;
|
||||||
|
e.g.\&
|
||||||
|
.Ar a-z
|
||||||
|
specifies the lower case characters.
|
||||||
|
The following literal expressions can also be used in
|
||||||
|
.Ar char-class
|
||||||
|
to specify sets of characters:
|
||||||
|
.Bd -unfilled -offset indent
|
||||||
|
[:alnum:] [:cntrl:] [:lower:] [:space:]
|
||||||
|
[:alpha:] [:digit:] [:print:] [:upper:]
|
||||||
|
[:blank:] [:graph:] [:punct:] [:xdigit:]
|
||||||
|
.Ed
|
||||||
|
.Pp
|
||||||
|
If
|
||||||
|
.Ql -
|
||||||
|
appears as the first or last character of
|
||||||
|
.Ar char-class ,
|
||||||
|
then it matches itself.
|
||||||
|
All other characters in
|
||||||
|
.Ar char-class
|
||||||
|
match themselves.
|
||||||
|
.Pp
|
||||||
|
Patterns in
|
||||||
|
.Ar char-class
|
||||||
|
of the form
|
||||||
|
.Eo [.
|
||||||
|
.Ar col-elm
|
||||||
|
.Ec .]\&
|
||||||
|
or
|
||||||
|
.Eo [=
|
||||||
|
.Ar col-elm
|
||||||
|
.Ec =]\& ,
|
||||||
|
where
|
||||||
|
.Ar col-elm
|
||||||
|
is a collating element, are interpreted according to
|
||||||
|
.Xr setlocale 3
|
||||||
|
.Pq not currently supported .
|
||||||
|
.It Bq ^ Ns Ar char-class
|
||||||
|
Matches any single character, other than newline, not in
|
||||||
|
.Ar char-class .
|
||||||
|
.Ar char-class
|
||||||
|
is defined as above.
|
||||||
|
.It ^
|
||||||
|
If
|
||||||
|
.Sq ^
|
||||||
|
is the first character of a regular expression, then it
|
||||||
|
anchors the regular expression to the beginning of a line.
|
||||||
|
Otherwise, it matches itself.
|
||||||
|
.It $
|
||||||
|
If
|
||||||
|
.Sq $
|
||||||
|
is the last character of a regular expression,
|
||||||
|
it anchors the regular expression to the end of a line.
|
||||||
|
Otherwise, it matches itself.
|
||||||
|
.It [[:<:]]
|
||||||
|
Anchors the single character regular expression or subexpression
|
||||||
|
immediately following it to the beginning of a word.
|
||||||
|
.It [[:>:]]
|
||||||
|
Anchors the single character regular expression or subexpression
|
||||||
|
immediately following it to the end of a word.
|
||||||
|
.It \e( Ns Ar re Ns \e)
|
||||||
|
Defines a subexpression
|
||||||
|
.Ar re .
|
||||||
|
Subexpressions may be nested.
|
||||||
|
A subsequent backreference of the form
|
||||||
|
.Pf \e Ns Ar n ,
|
||||||
|
where
|
||||||
|
.Ar n
|
||||||
|
is a number in the range [1,9], expands to the text matched by the
|
||||||
|
.Ar n Ns th
|
||||||
|
subexpression.
|
||||||
|
For example, the regular expression
|
||||||
|
.Ar \e(.*\e)\e1
|
||||||
|
matches any string consisting of identical adjacent substrings.
|
||||||
|
Subexpressions are ordered relative to their left delimiter.
|
||||||
|
.It *
|
||||||
|
Matches the single character regular expression or subexpression
|
||||||
|
immediately preceding it zero or more times.
|
||||||
|
If
|
||||||
|
.Sq *
|
||||||
|
is the first character of a regular expression or subexpression,
|
||||||
|
then it matches itself.
|
||||||
|
The
|
||||||
|
.Sq *
|
||||||
|
operator sometimes yields unexpected results.
|
||||||
|
For example, the regular expression
|
||||||
|
.Ar b*
|
||||||
|
matches the beginning of the string
|
||||||
|
.Qq abbb
|
||||||
|
(as opposed to the substring
|
||||||
|
.Qq bbb ) ,
|
||||||
|
since a null match is the only leftmost match.
|
||||||
|
.Sm off
|
||||||
|
.It Xo
|
||||||
|
.Pf \e{ Ar n , m No \e}\ \&
|
||||||
|
.Pf \e{ Ar n , No \e}\ \&
|
||||||
|
.Pf \e{ Ar n No \e}
|
||||||
|
.Xc
|
||||||
|
.Sm on
|
||||||
|
Matches the single character regular expression or subexpression
|
||||||
|
immediately preceding it at least
|
||||||
|
.Ar n
|
||||||
|
and at most
|
||||||
|
.Ar m
|
||||||
|
times.
|
||||||
|
If
|
||||||
|
.Ar m
|
||||||
|
is omitted, then it matches at least
|
||||||
|
.Ar n
|
||||||
|
times.
|
||||||
|
If the comma is also omitted, then it matches exactly
|
||||||
|
.Ar n
|
||||||
|
times.
|
||||||
|
.El
|
||||||
|
.Sh SEE ALSO
|
||||||
|
.Xr ctype 3 ,
|
||||||
|
.Xr regex 3
|
||||||
|
.Sh STANDARDS
|
||||||
|
.St -p1003.1-2004 :
|
||||||
|
Base Definitions, Chapter 9 (Regular Expressions).
|
||||||
|
.Sh BUGS
|
||||||
|
Having two kinds of REs is a botch.
|
||||||
|
.Pp
|
||||||
|
The current POSIX spec says that
|
||||||
|
.Sq )\&
|
||||||
|
is an ordinary character in the absence of an unmatched
|
||||||
|
.Sq ( ;
|
||||||
|
this was an unintentional result of a wording error,
|
||||||
|
and change is likely.
|
||||||
|
Avoid relying on it.
|
||||||
|
.Pp
|
||||||
|
Back-references are a dreadful botch,
|
||||||
|
posing major problems for efficient implementations.
|
||||||
|
They are also somewhat vaguely defined
|
||||||
|
(does
|
||||||
|
.Sq a\e(\e(b\e)*\e2\e)*d
|
||||||
|
match
|
||||||
|
.Sq abbbd ? ) .
|
||||||
|
Avoid using them.
|
||||||
|
.Pp
|
||||||
|
POSIX's specification of case-independent matching is vague.
|
||||||
|
The
|
||||||
|
.Dq one case implies all cases
|
||||||
|
definition given above
|
||||||
|
is the current consensus among implementors as to the right interpretation.
|
||||||
|
.Pp
|
||||||
|
The syntax for word boundaries is incredibly ugly.
|
64
include/llvm/Support/Regex.h
Normal file
64
include/llvm/Support/Regex.h
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file implements a POSIX regular expression matcher.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
|
||||||
|
struct llvm_regex;
|
||||||
|
namespace llvm {
|
||||||
|
class Regex {
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
/// Compile with support for subgroup matches, this is just to make
|
||||||
|
/// constructs like Regex("...", 0) more readable as Regex("...", Sub).
|
||||||
|
Sub=0,
|
||||||
|
/// Compile for matching that ignores upper/lower case distinctions.
|
||||||
|
IgnoreCase=1,
|
||||||
|
/// Compile for matching that need only report success or failure,
|
||||||
|
/// not what was matched.
|
||||||
|
NoSub=2,
|
||||||
|
/// Compile for newline-sensitive matching. With this flag '[^' bracket
|
||||||
|
/// expressions and '.' never match newline. A ^ anchor matches the
|
||||||
|
/// null string after any newline in the string in addition to its normal
|
||||||
|
/// function, and the $ anchor matches the null string before any
|
||||||
|
/// newline in the string in addition to its normal function.
|
||||||
|
Newline=4
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Compiles the given POSIX Extended Regular Expression \arg Regex.
|
||||||
|
/// This implementation supports regexes and matching strings with embedded
|
||||||
|
/// NUL characters.
|
||||||
|
Regex(const StringRef &Regex, unsigned Flags=NoSub);
|
||||||
|
~Regex();
|
||||||
|
|
||||||
|
/// isValid - returns the error encountered during regex compilation, or
|
||||||
|
/// matching, if any.
|
||||||
|
bool isValid(std::string &Error);
|
||||||
|
|
||||||
|
/// matches - Match the regex against a given \arg String.
|
||||||
|
///
|
||||||
|
/// \param Matches - If given, on a succesful match this will be filled in
|
||||||
|
/// with references to the matched group expressions (inside \arg String),
|
||||||
|
/// the first group is always the entire pattern.
|
||||||
|
/// By default the regex is compiled with NoSub, which disables support for
|
||||||
|
/// Matches.
|
||||||
|
/// For this feature to be enabled you must construct the regex using
|
||||||
|
/// Regex("...", Regex::Sub) constructor.
|
||||||
|
|
||||||
|
bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
|
||||||
|
private:
|
||||||
|
struct llvm_regex *preg;
|
||||||
|
int error;
|
||||||
|
bool sub;
|
||||||
|
};
|
||||||
|
}
|
@ -32,6 +32,12 @@ add_llvm_library(LLVMSupport
|
|||||||
Twine.cpp
|
Twine.cpp
|
||||||
raw_os_ostream.cpp
|
raw_os_ostream.cpp
|
||||||
raw_ostream.cpp
|
raw_ostream.cpp
|
||||||
|
Regex.cpp
|
||||||
|
regcomp.c
|
||||||
|
regerror.c
|
||||||
|
regexec.c
|
||||||
|
regfree.c
|
||||||
|
regstrlcpy.c
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries (LLVMSupport LLVMSystem)
|
target_link_libraries (LLVMSupport LLVMSystem)
|
||||||
|
54
lib/Support/COPYRIGHT.regex
Normal file
54
lib/Support/COPYRIGHT.regex
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $
|
||||||
|
|
||||||
|
Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
|
||||||
|
This software is not subject to any license of the American Telephone
|
||||||
|
and Telegraph Company or of the Regents of the University of California.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose on
|
||||||
|
any computer system, and to alter it and redistribute it, subject
|
||||||
|
to the following restrictions:
|
||||||
|
|
||||||
|
1. The author is not responsible for the consequences of use of this
|
||||||
|
software, no matter how awful, even if they arise from flaws in it.
|
||||||
|
|
||||||
|
2. The origin of this software must not be misrepresented, either by
|
||||||
|
explicit claim or by omission. Since few users ever read sources,
|
||||||
|
credits must appear in the documentation.
|
||||||
|
|
||||||
|
3. Altered versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software. Since few users
|
||||||
|
ever read sources, credits must appear in the documentation.
|
||||||
|
|
||||||
|
4. This notice may not be removed or altered.
|
||||||
|
|
||||||
|
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94
|
||||||
|
*/
|
97
lib/Support/Regex.cpp
Normal file
97
lib/Support/Regex.cpp
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file implements a POSIX regular expression matcher.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#include "llvm/Support/Regex.h"
|
||||||
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "regex_impl.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
Regex::Regex(const StringRef ®ex, unsigned Flags)
|
||||||
|
{
|
||||||
|
unsigned flags = 0;
|
||||||
|
preg = new struct llvm_regex;
|
||||||
|
preg->re_endp = regex.end();
|
||||||
|
if (Flags & IgnoreCase)
|
||||||
|
flags |= REG_ICASE;
|
||||||
|
if (Flags & NoSub) {
|
||||||
|
flags |= REG_NOSUB;
|
||||||
|
sub = false;
|
||||||
|
} else {
|
||||||
|
sub = true;
|
||||||
|
}
|
||||||
|
if (Flags & Newline)
|
||||||
|
flags |= REG_NEWLINE;
|
||||||
|
error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Regex::isValid(std::string &Error)
|
||||||
|
{
|
||||||
|
if (!error)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
size_t len = llvm_regerror(error, preg, NULL, 0);
|
||||||
|
char *errbuff = new char[len];
|
||||||
|
llvm_regerror(error, preg, errbuff, len);
|
||||||
|
Error.assign(errbuff);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Regex::~Regex()
|
||||||
|
{
|
||||||
|
llvm_regfree(preg);
|
||||||
|
delete preg;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches)
|
||||||
|
{
|
||||||
|
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
|
||||||
|
|
||||||
|
if (Matches) {
|
||||||
|
assert(sub && "Substring matching requested but pattern compiled without");
|
||||||
|
Matches->clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// pmatch needs to have at least one element.
|
||||||
|
SmallVector<llvm_regmatch_t, 2> pm;
|
||||||
|
pm.resize(nmatch > 0 ? nmatch : 1);
|
||||||
|
pm[0].rm_so = 0;
|
||||||
|
pm[0].rm_eo = String.size();
|
||||||
|
|
||||||
|
int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
|
||||||
|
|
||||||
|
if (rc == REG_NOMATCH)
|
||||||
|
return false;
|
||||||
|
if (rc != 0) {
|
||||||
|
// regexec can fail due to invalid pattern or running out of memory.
|
||||||
|
error = rc;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// There was a match.
|
||||||
|
|
||||||
|
if (Matches) { // match position requested
|
||||||
|
for (unsigned i=0;i<nmatch; i++) {
|
||||||
|
if (pm[i].rm_so == -1) {
|
||||||
|
// this group didn't match
|
||||||
|
Matches->push_back(StringRef());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert(pm[i].rm_eo > pm[i].rm_so);
|
||||||
|
Matches->push_back(StringRef(String.data()+pm[i].rm_so,
|
||||||
|
pm[i].rm_eo-pm[i].rm_so));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
70
lib/Support/regcclass.h
Normal file
70
lib/Support/regcclass.h
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)cclass.h 8.3 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* character-class table */
|
||||||
|
static struct cclass {
|
||||||
|
const char *name;
|
||||||
|
const char *chars;
|
||||||
|
const char *multis;
|
||||||
|
} cclasses[] = {
|
||||||
|
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||||
|
0123456789", ""} ,
|
||||||
|
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
|
||||||
|
""} ,
|
||||||
|
{ "blank", " \t", ""} ,
|
||||||
|
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
|
||||||
|
\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
|
||||||
|
{ "digit", "0123456789", ""} ,
|
||||||
|
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||||
|
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||||
|
""} ,
|
||||||
|
{ "lower", "abcdefghijklmnopqrstuvwxyz",
|
||||||
|
""} ,
|
||||||
|
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||||
|
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
|
||||||
|
""} ,
|
||||||
|
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||||
|
""} ,
|
||||||
|
{ "space", "\t\n\v\f\r ", ""} ,
|
||||||
|
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||||
|
""} ,
|
||||||
|
{ "xdigit", "0123456789ABCDEFabcdef",
|
||||||
|
""} ,
|
||||||
|
{ NULL, 0, "" }
|
||||||
|
};
|
139
lib/Support/regcname.h
Normal file
139
lib/Support/regcname.h
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)cname.h 8.3 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* character-name table */
|
||||||
|
static struct cname {
|
||||||
|
const char *name;
|
||||||
|
char code;
|
||||||
|
} cnames[] = {
|
||||||
|
{ "NUL", '\0' },
|
||||||
|
{ "SOH", '\001' },
|
||||||
|
{ "STX", '\002' },
|
||||||
|
{ "ETX", '\003' },
|
||||||
|
{ "EOT", '\004' },
|
||||||
|
{ "ENQ", '\005' },
|
||||||
|
{ "ACK", '\006' },
|
||||||
|
{ "BEL", '\007' },
|
||||||
|
{ "alert", '\007' },
|
||||||
|
{ "BS", '\010' },
|
||||||
|
{ "backspace", '\b' },
|
||||||
|
{ "HT", '\011' },
|
||||||
|
{ "tab", '\t' },
|
||||||
|
{ "LF", '\012' },
|
||||||
|
{ "newline", '\n' },
|
||||||
|
{ "VT", '\013' },
|
||||||
|
{ "vertical-tab", '\v' },
|
||||||
|
{ "FF", '\014' },
|
||||||
|
{ "form-feed", '\f' },
|
||||||
|
{ "CR", '\015' },
|
||||||
|
{ "carriage-return", '\r' },
|
||||||
|
{ "SO", '\016' },
|
||||||
|
{ "SI", '\017' },
|
||||||
|
{ "DLE", '\020' },
|
||||||
|
{ "DC1", '\021' },
|
||||||
|
{ "DC2", '\022' },
|
||||||
|
{ "DC3", '\023' },
|
||||||
|
{ "DC4", '\024' },
|
||||||
|
{ "NAK", '\025' },
|
||||||
|
{ "SYN", '\026' },
|
||||||
|
{ "ETB", '\027' },
|
||||||
|
{ "CAN", '\030' },
|
||||||
|
{ "EM", '\031' },
|
||||||
|
{ "SUB", '\032' },
|
||||||
|
{ "ESC", '\033' },
|
||||||
|
{ "IS4", '\034' },
|
||||||
|
{ "FS", '\034' },
|
||||||
|
{ "IS3", '\035' },
|
||||||
|
{ "GS", '\035' },
|
||||||
|
{ "IS2", '\036' },
|
||||||
|
{ "RS", '\036' },
|
||||||
|
{ "IS1", '\037' },
|
||||||
|
{ "US", '\037' },
|
||||||
|
{ "space", ' ' },
|
||||||
|
{ "exclamation-mark", '!' },
|
||||||
|
{ "quotation-mark", '"' },
|
||||||
|
{ "number-sign", '#' },
|
||||||
|
{ "dollar-sign", '$' },
|
||||||
|
{ "percent-sign", '%' },
|
||||||
|
{ "ampersand", '&' },
|
||||||
|
{ "apostrophe", '\'' },
|
||||||
|
{ "left-parenthesis", '(' },
|
||||||
|
{ "right-parenthesis", ')' },
|
||||||
|
{ "asterisk", '*' },
|
||||||
|
{ "plus-sign", '+' },
|
||||||
|
{ "comma", ',' },
|
||||||
|
{ "hyphen", '-' },
|
||||||
|
{ "hyphen-minus", '-' },
|
||||||
|
{ "period", '.' },
|
||||||
|
{ "full-stop", '.' },
|
||||||
|
{ "slash", '/' },
|
||||||
|
{ "solidus", '/' },
|
||||||
|
{ "zero", '0' },
|
||||||
|
{ "one", '1' },
|
||||||
|
{ "two", '2' },
|
||||||
|
{ "three", '3' },
|
||||||
|
{ "four", '4' },
|
||||||
|
{ "five", '5' },
|
||||||
|
{ "six", '6' },
|
||||||
|
{ "seven", '7' },
|
||||||
|
{ "eight", '8' },
|
||||||
|
{ "nine", '9' },
|
||||||
|
{ "colon", ':' },
|
||||||
|
{ "semicolon", ';' },
|
||||||
|
{ "less-than-sign", '<' },
|
||||||
|
{ "equals-sign", '=' },
|
||||||
|
{ "greater-than-sign", '>' },
|
||||||
|
{ "question-mark", '?' },
|
||||||
|
{ "commercial-at", '@' },
|
||||||
|
{ "left-square-bracket", '[' },
|
||||||
|
{ "backslash", '\\' },
|
||||||
|
{ "reverse-solidus", '\\' },
|
||||||
|
{ "right-square-bracket", ']' },
|
||||||
|
{ "circumflex", '^' },
|
||||||
|
{ "circumflex-accent", '^' },
|
||||||
|
{ "underscore", '_' },
|
||||||
|
{ "low-line", '_' },
|
||||||
|
{ "grave-accent", '`' },
|
||||||
|
{ "left-brace", '{' },
|
||||||
|
{ "left-curly-bracket", '{' },
|
||||||
|
{ "vertical-line", '|' },
|
||||||
|
{ "right-brace", '}' },
|
||||||
|
{ "right-curly-bracket", '}' },
|
||||||
|
{ "tilde", '~' },
|
||||||
|
{ "DEL", '\177' },
|
||||||
|
{ NULL, 0 }
|
||||||
|
};
|
1524
lib/Support/regcomp.c
Normal file
1524
lib/Support/regcomp.c
Normal file
File diff suppressed because it is too large
Load Diff
1021
lib/Support/regengine.inc
Normal file
1021
lib/Support/regengine.inc
Normal file
File diff suppressed because it is too large
Load Diff
131
lib/Support/regerror.c
Normal file
131
lib/Support/regerror.c
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "regex_impl.h"
|
||||||
|
|
||||||
|
#include "regutils.h"
|
||||||
|
|
||||||
|
static const char *regatoi(const llvm_regex_t *, char *, int);
|
||||||
|
|
||||||
|
static struct rerr {
|
||||||
|
int code;
|
||||||
|
const char *name;
|
||||||
|
const char *explain;
|
||||||
|
} rerrs[] = {
|
||||||
|
{ REG_NOMATCH, "REG_NOMATCH", "llvm_regexec() failed to match" },
|
||||||
|
{ REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
|
||||||
|
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
|
||||||
|
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
|
||||||
|
{ REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
|
||||||
|
{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
|
||||||
|
{ REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
|
||||||
|
{ REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
|
||||||
|
{ REG_EBRACE, "REG_EBRACE", "braces not balanced" },
|
||||||
|
{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
|
||||||
|
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
|
||||||
|
{ REG_ESPACE, "REG_ESPACE", "out of memory" },
|
||||||
|
{ REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
|
||||||
|
{ REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
|
||||||
|
{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
|
||||||
|
{ REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
|
||||||
|
{ 0, "", "*** unknown regexp error code ***" }
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
- llvm_regerror - the interface to error numbers
|
||||||
|
= extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
|
||||||
|
*/
|
||||||
|
/* ARGSUSED */
|
||||||
|
size_t
|
||||||
|
llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||||
|
{
|
||||||
|
struct rerr *r;
|
||||||
|
size_t len;
|
||||||
|
int target = errcode &~ REG_ITOA;
|
||||||
|
const char *s;
|
||||||
|
char convbuf[50];
|
||||||
|
|
||||||
|
if (errcode == REG_ATOI)
|
||||||
|
s = regatoi(preg, convbuf, sizeof convbuf);
|
||||||
|
else {
|
||||||
|
for (r = rerrs; r->code != 0; r++)
|
||||||
|
if (r->code == target)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (errcode®_ITOA) {
|
||||||
|
if (r->code != 0) {
|
||||||
|
assert(strlen(r->name) < sizeof(convbuf));
|
||||||
|
(void) llvm_strlcpy(convbuf, r->name, sizeof convbuf);
|
||||||
|
} else
|
||||||
|
(void)snprintf(convbuf, sizeof convbuf,
|
||||||
|
"REG_0x%x", target);
|
||||||
|
s = convbuf;
|
||||||
|
} else
|
||||||
|
s = r->explain;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = strlen(s) + 1;
|
||||||
|
if (errbuf_size > 0) {
|
||||||
|
llvm_strlcpy(errbuf, s, errbuf_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
- regatoi - internal routine to implement REG_ATOI
|
||||||
|
*/
|
||||||
|
static const char *
|
||||||
|
regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize)
|
||||||
|
{
|
||||||
|
struct rerr *r;
|
||||||
|
|
||||||
|
for (r = rerrs; r->code != 0; r++)
|
||||||
|
if (strcmp(r->name, preg->re_endp) == 0)
|
||||||
|
break;
|
||||||
|
if (r->code == 0)
|
||||||
|
return("0");
|
||||||
|
|
||||||
|
(void)snprintf(localbuf, localbufsize, "%d", r->code);
|
||||||
|
return(localbuf);
|
||||||
|
}
|
157
lib/Support/regex2.h
Normal file
157
lib/Support/regex2.h
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* internals of regex_t
|
||||||
|
*/
|
||||||
|
#define MAGIC1 ((('r'^0200)<<8) | 'e')
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The internal representation is a *strip*, a sequence of
|
||||||
|
* operators ending with an endmarker. (Some terminology etc. is a
|
||||||
|
* historical relic of earlier versions which used multiple strips.)
|
||||||
|
* Certain oddities in the representation are there to permit running
|
||||||
|
* the machinery backwards; in particular, any deviation from sequential
|
||||||
|
* flow must be marked at both its source and its destination. Some
|
||||||
|
* fine points:
|
||||||
|
*
|
||||||
|
* - OPLUS_ and O_PLUS are *inside* the loop they create.
|
||||||
|
* - OQUEST_ and O_QUEST are *outside* the bypass they create.
|
||||||
|
* - OCH_ and O_CH are *outside* the multi-way branch they create, while
|
||||||
|
* OOR1 and OOR2 are respectively the end and the beginning of one of
|
||||||
|
* the branches. Note that there is an implicit OOR2 following OCH_
|
||||||
|
* and an implicit OOR1 preceding O_CH.
|
||||||
|
*
|
||||||
|
* In state representations, an operator's bit is on to signify a state
|
||||||
|
* immediately *preceding* "execution" of that operator.
|
||||||
|
*/
|
||||||
|
typedef unsigned long sop; /* strip operator */
|
||||||
|
typedef long sopno;
|
||||||
|
#define OPRMASK 0xf8000000LU
|
||||||
|
#define OPDMASK 0x07ffffffLU
|
||||||
|
#define OPSHIFT ((unsigned)27)
|
||||||
|
#define OP(n) ((n)&OPRMASK)
|
||||||
|
#define OPND(n) ((n)&OPDMASK)
|
||||||
|
#define SOP(op, opnd) ((op)|(opnd))
|
||||||
|
/* operators meaning operand */
|
||||||
|
/* (back, fwd are offsets) */
|
||||||
|
#define OEND (1LU<<OPSHIFT) /* endmarker - */
|
||||||
|
#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */
|
||||||
|
#define OBOL (3LU<<OPSHIFT) /* left anchor - */
|
||||||
|
#define OEOL (4LU<<OPSHIFT) /* right anchor - */
|
||||||
|
#define OANY (5LU<<OPSHIFT) /* . - */
|
||||||
|
#define OANYOF (6LU<<OPSHIFT) /* [...] set number */
|
||||||
|
#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */
|
||||||
|
#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */
|
||||||
|
#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */
|
||||||
|
#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */
|
||||||
|
#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */
|
||||||
|
#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */
|
||||||
|
#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */
|
||||||
|
#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */
|
||||||
|
#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */
|
||||||
|
#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
|
||||||
|
#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
|
||||||
|
#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */
|
||||||
|
#define OBOW (19LU<<OPSHIFT) /* begin word - */
|
||||||
|
#define OEOW (20LU<<OPSHIFT) /* end word - */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Structure for [] character-set representation. Character sets are
|
||||||
|
* done as bit vectors, grouped 8 to a byte vector for compactness.
|
||||||
|
* The individual set therefore has both a pointer to the byte vector
|
||||||
|
* and a mask to pick out the relevant bit of each byte. A hash code
|
||||||
|
* simplifies testing whether two sets could be identical.
|
||||||
|
*
|
||||||
|
* This will get trickier for multicharacter collating elements. As
|
||||||
|
* preliminary hooks for dealing with such things, we also carry along
|
||||||
|
* a string of multi-character elements, and decide the size of the
|
||||||
|
* vectors at run time.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
uch *ptr; /* -> uch [csetsize] */
|
||||||
|
uch mask; /* bit within array */
|
||||||
|
uch hash; /* hash code */
|
||||||
|
size_t smultis;
|
||||||
|
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
|
||||||
|
} cset;
|
||||||
|
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
|
||||||
|
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
|
||||||
|
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
||||||
|
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
|
||||||
|
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* llvm_regcomp() internal fns */
|
||||||
|
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
|
||||||
|
#define MCin(p, cs, cp) mcin(p, cs, cp)
|
||||||
|
|
||||||
|
/* stuff for character categories */
|
||||||
|
typedef unsigned char cat_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* main compiled-expression structure
|
||||||
|
*/
|
||||||
|
struct re_guts {
|
||||||
|
int magic;
|
||||||
|
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||||
|
sop *strip; /* malloced area for strip */
|
||||||
|
int csetsize; /* number of bits in a cset vector */
|
||||||
|
int ncsets; /* number of csets in use */
|
||||||
|
cset *sets; /* -> cset [ncsets] */
|
||||||
|
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
||||||
|
int cflags; /* copy of llvm_regcomp() cflags argument */
|
||||||
|
sopno nstates; /* = number of sops */
|
||||||
|
sopno firststate; /* the initial OEND (normally 0) */
|
||||||
|
sopno laststate; /* the final OEND */
|
||||||
|
int iflags; /* internal flags */
|
||||||
|
# define USEBOL 01 /* used ^ */
|
||||||
|
# define USEEOL 02 /* used $ */
|
||||||
|
# define REGEX_BAD 04 /* something wrong */
|
||||||
|
int nbol; /* number of ^ used */
|
||||||
|
int neol; /* number of $ used */
|
||||||
|
int ncategories; /* how many character categories */
|
||||||
|
cat_t *categories; /* ->catspace[-CHAR_MIN] */
|
||||||
|
char *must; /* match must contain this string */
|
||||||
|
int mlen; /* length of must */
|
||||||
|
size_t nsub; /* copy of re_nsub */
|
||||||
|
int backrefs; /* does it use back references? */
|
||||||
|
sopno nplus; /* how deep does it nest +s? */
|
||||||
|
/* catspace must be last */
|
||||||
|
cat_t catspace[1]; /* actually [NC] */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* misc utilities */
|
||||||
|
#define OUT (CHAR_MAX+1) /* a non-character value */
|
||||||
|
#define ISWORD(c) (isalnum(c&0xff) || (c) == '_')
|
108
lib/Support/regex_impl.h
Normal file
108
lib/Support/regex_impl.h
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer of the University of Toronto.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regex.h 8.1 (Berkeley) 6/2/93
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _REGEX_H_
|
||||||
|
#define _REGEX_H_
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
typedef off_t llvm_regoff_t;
|
||||||
|
typedef struct {
|
||||||
|
llvm_regoff_t rm_so; /* start of match */
|
||||||
|
llvm_regoff_t rm_eo; /* end of match */
|
||||||
|
} llvm_regmatch_t;
|
||||||
|
|
||||||
|
typedef struct llvm_regex {
|
||||||
|
int re_magic;
|
||||||
|
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||||
|
const char *re_endp; /* end pointer for REG_PEND */
|
||||||
|
struct re_guts *re_g; /* none of your business :-) */
|
||||||
|
} llvm_regex_t;
|
||||||
|
|
||||||
|
/* llvm_regcomp() flags */
|
||||||
|
#define REG_BASIC 0000
|
||||||
|
#define REG_EXTENDED 0001
|
||||||
|
#define REG_ICASE 0002
|
||||||
|
#define REG_NOSUB 0004
|
||||||
|
#define REG_NEWLINE 0010
|
||||||
|
#define REG_NOSPEC 0020
|
||||||
|
#define REG_PEND 0040
|
||||||
|
#define REG_DUMP 0200
|
||||||
|
|
||||||
|
/* llvm_regerror() flags */
|
||||||
|
#define REG_NOMATCH 1
|
||||||
|
#define REG_BADPAT 2
|
||||||
|
#define REG_ECOLLATE 3
|
||||||
|
#define REG_ECTYPE 4
|
||||||
|
#define REG_EESCAPE 5
|
||||||
|
#define REG_ESUBREG 6
|
||||||
|
#define REG_EBRACK 7
|
||||||
|
#define REG_EPAREN 8
|
||||||
|
#define REG_EBRACE 9
|
||||||
|
#define REG_BADBR 10
|
||||||
|
#define REG_ERANGE 11
|
||||||
|
#define REG_ESPACE 12
|
||||||
|
#define REG_BADRPT 13
|
||||||
|
#define REG_EMPTY 14
|
||||||
|
#define REG_ASSERT 15
|
||||||
|
#define REG_INVARG 16
|
||||||
|
#define REG_ATOI 255 /* convert name to number (!) */
|
||||||
|
#define REG_ITOA 0400 /* convert number to name (!) */
|
||||||
|
|
||||||
|
/* llvm_regexec() flags */
|
||||||
|
#define REG_NOTBOL 00001
|
||||||
|
#define REG_NOTEOL 00002
|
||||||
|
#define REG_STARTEND 00004
|
||||||
|
#define REG_TRACE 00400 /* tracing of execution */
|
||||||
|
#define REG_LARGE 01000 /* force large representation */
|
||||||
|
#define REG_BACKR 02000 /* force use of backref code */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int llvm_regcomp(llvm_regex_t *, const char *, int);
|
||||||
|
size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
|
||||||
|
int llvm_regexec(const llvm_regex_t *, const char *, size_t,
|
||||||
|
llvm_regmatch_t [], int);
|
||||||
|
void llvm_regfree(llvm_regex_t *);
|
||||||
|
size_t llvm_strlcpy(char *dst, const char *src, size_t siz);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* !_REGEX_H_ */
|
161
lib/Support/regexec.c
Normal file
161
lib/Support/regexec.c
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* the outer shell of llvm_regexec()
|
||||||
|
*
|
||||||
|
* This file includes engine.inc *twice*, after muchos fiddling with the
|
||||||
|
* macros that code uses. This lets the same code operate on two different
|
||||||
|
* representations for state sets.
|
||||||
|
*/
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include "regex_impl.h"
|
||||||
|
|
||||||
|
#include "regutils.h"
|
||||||
|
#include "regex2.h"
|
||||||
|
|
||||||
|
/* macros for manipulating states, small version */
|
||||||
|
#define states long
|
||||||
|
#define states1 states /* for later use in llvm_regexec() decision */
|
||||||
|
#define CLEAR(v) ((v) = 0)
|
||||||
|
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
|
||||||
|
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
|
||||||
|
#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
|
||||||
|
#define ASSIGN(d, s) ((d) = (s))
|
||||||
|
#define EQ(a, b) ((a) == (b))
|
||||||
|
#define STATEVARS long dummy /* dummy version */
|
||||||
|
#define STATESETUP(m, n) /* nothing */
|
||||||
|
#define STATETEARDOWN(m) /* nothing */
|
||||||
|
#define SETUP(v) ((v) = 0)
|
||||||
|
#define onestate long
|
||||||
|
#define INIT(o, n) ((o) = (unsigned long)1 << (n))
|
||||||
|
#define INC(o) ((o) <<= 1)
|
||||||
|
#define ISSTATEIN(v, o) (((v) & (o)) != 0)
|
||||||
|
/* some abbreviations; note that some of these know variable names! */
|
||||||
|
/* do "if I'm here, I can also be there" etc without branches */
|
||||||
|
#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
|
||||||
|
#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
|
||||||
|
#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
|
||||||
|
/* function names */
|
||||||
|
#define SNAMES /* engine.inc looks after details */
|
||||||
|
|
||||||
|
#include "regengine.inc"
|
||||||
|
|
||||||
|
/* now undo things */
|
||||||
|
#undef states
|
||||||
|
#undef CLEAR
|
||||||
|
#undef SET0
|
||||||
|
#undef SET1
|
||||||
|
#undef ISSET
|
||||||
|
#undef ASSIGN
|
||||||
|
#undef EQ
|
||||||
|
#undef STATEVARS
|
||||||
|
#undef STATESETUP
|
||||||
|
#undef STATETEARDOWN
|
||||||
|
#undef SETUP
|
||||||
|
#undef onestate
|
||||||
|
#undef INIT
|
||||||
|
#undef INC
|
||||||
|
#undef ISSTATEIN
|
||||||
|
#undef FWD
|
||||||
|
#undef BACK
|
||||||
|
#undef ISSETBACK
|
||||||
|
#undef SNAMES
|
||||||
|
|
||||||
|
/* macros for manipulating states, large version */
|
||||||
|
#define states char *
|
||||||
|
#define CLEAR(v) memset(v, 0, m->g->nstates)
|
||||||
|
#define SET0(v, n) ((v)[n] = 0)
|
||||||
|
#define SET1(v, n) ((v)[n] = 1)
|
||||||
|
#define ISSET(v, n) ((v)[n])
|
||||||
|
#define ASSIGN(d, s) memmove(d, s, m->g->nstates)
|
||||||
|
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
|
||||||
|
#define STATEVARS long vn; char *space
|
||||||
|
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
|
||||||
|
if ((m)->space == NULL) return(REG_ESPACE); \
|
||||||
|
(m)->vn = 0; }
|
||||||
|
#define STATETEARDOWN(m) { free((m)->space); }
|
||||||
|
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
|
||||||
|
#define onestate long
|
||||||
|
#define INIT(o, n) ((o) = (n))
|
||||||
|
#define INC(o) ((o)++)
|
||||||
|
#define ISSTATEIN(v, o) ((v)[o])
|
||||||
|
/* some abbreviations; note that some of these know variable names! */
|
||||||
|
/* do "if I'm here, I can also be there" etc without branches */
|
||||||
|
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
|
||||||
|
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
|
||||||
|
#define ISSETBACK(v, n) ((v)[here - (n)])
|
||||||
|
/* function names */
|
||||||
|
#define LNAMES /* flag */
|
||||||
|
|
||||||
|
#include "regengine.inc"
|
||||||
|
|
||||||
|
/*
|
||||||
|
- llvm_regexec - interface for matching
|
||||||
|
*
|
||||||
|
* We put this here so we can exploit knowledge of the state representation
|
||||||
|
* when choosing which matcher to call. Also, by this point the matchers
|
||||||
|
* have been prototyped.
|
||||||
|
*/
|
||||||
|
int /* 0 success, REG_NOMATCH failure */
|
||||||
|
llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
|
||||||
|
llvm_regmatch_t pmatch[], int eflags)
|
||||||
|
{
|
||||||
|
struct re_guts *g = preg->re_g;
|
||||||
|
#ifdef REDEBUG
|
||||||
|
# define GOODFLAGS(f) (f)
|
||||||
|
#else
|
||||||
|
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
|
||||||
|
return(REG_BADPAT);
|
||||||
|
assert(!(g->iflags®EX_BAD));
|
||||||
|
if (g->iflags®EX_BAD) /* backstop for no-debug case */
|
||||||
|
return(REG_BADPAT);
|
||||||
|
eflags = GOODFLAGS(eflags);
|
||||||
|
|
||||||
|
if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE))
|
||||||
|
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||||
|
else
|
||||||
|
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||||
|
}
|
72
lib/Support/regfree.c
Normal file
72
lib/Support/regfree.c
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "regex_impl.h"
|
||||||
|
|
||||||
|
#include "regutils.h"
|
||||||
|
#include "regex2.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
- llvm_regfree - free everything
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
llvm_regfree(llvm_regex_t *preg)
|
||||||
|
{
|
||||||
|
struct re_guts *g;
|
||||||
|
|
||||||
|
if (preg->re_magic != MAGIC1) /* oops */
|
||||||
|
return; /* nice to complain, but hard */
|
||||||
|
|
||||||
|
g = preg->re_g;
|
||||||
|
if (g == NULL || g->magic != MAGIC2) /* oops again */
|
||||||
|
return;
|
||||||
|
preg->re_magic = 0; /* mark it invalid */
|
||||||
|
g->magic = 0; /* mark it invalid */
|
||||||
|
|
||||||
|
if (g->strip != NULL)
|
||||||
|
free((char *)g->strip);
|
||||||
|
if (g->sets != NULL)
|
||||||
|
free((char *)g->sets);
|
||||||
|
if (g->setbits != NULL)
|
||||||
|
free((char *)g->setbits);
|
||||||
|
if (g->must != NULL)
|
||||||
|
free(g->must);
|
||||||
|
free((char *)g);
|
||||||
|
}
|
52
lib/Support/regstrlcpy.c
Normal file
52
lib/Support/regstrlcpy.c
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* This code is derived from OpenBSD's libc, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose with or without fee is hereby granted, provided that the above
|
||||||
|
* copyright notice and this permission notice appear in all copies.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "regex_impl.h"
|
||||||
|
/*
|
||||||
|
* Copy src to string dst of size siz. At most siz-1 characters
|
||||||
|
* will be copied. Always NUL terminates (unless siz == 0).
|
||||||
|
* Returns strlen(src); if retval >= siz, truncation occurred.
|
||||||
|
*/
|
||||||
|
size_t
|
||||||
|
llvm_strlcpy(char *dst, const char *src, size_t siz)
|
||||||
|
{
|
||||||
|
char *d = dst;
|
||||||
|
const char *s = src;
|
||||||
|
size_t n = siz;
|
||||||
|
|
||||||
|
/* Copy as many bytes as will fit */
|
||||||
|
if (n != 0) {
|
||||||
|
while (--n != 0) {
|
||||||
|
if ((*d++ = *s++) == '\0')
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not enough room in dst, add NUL and traverse rest of src */
|
||||||
|
if (n == 0) {
|
||||||
|
if (siz != 0)
|
||||||
|
*d = '\0'; /* NUL-terminate dst */
|
||||||
|
while (*s++)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
return(s - src - 1); /* count does not include NUL */
|
||||||
|
}
|
55
lib/Support/regutils.h
Normal file
55
lib/Support/regutils.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
/*-
|
||||||
|
* This code is derived from OpenBSD's libc/regex, original license follows:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)utils.h 8.3 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* utility definitions */
|
||||||
|
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
|
||||||
|
#define INFINITY (DUPMAX + 1)
|
||||||
|
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||||
|
typedef unsigned char uch;
|
||||||
|
|
||||||
|
/* switch off assertions (if not already off) if no REDEBUG */
|
||||||
|
#ifndef REDEBUG
|
||||||
|
#ifndef NDEBUG
|
||||||
|
#define NDEBUG /* no assertions please */
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
/* for old systems with bcopy() but no memmove() */
|
||||||
|
#ifdef USEBCOPY
|
||||||
|
#define memmove(d, s, c) bcopy(s, d, c)
|
||||||
|
#endif
|
64
unittests/Support/RegexTest.cpp
Normal file
64
unittests/Support/RegexTest.cpp
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
//===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "llvm/Support/Regex.h"
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class RegexTest : public ::testing::Test {
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(RegexTest, Basics) {
|
||||||
|
Regex r1("^[0-9]+$");
|
||||||
|
EXPECT_TRUE(r1.match("916"));
|
||||||
|
EXPECT_TRUE(r1.match("9"));
|
||||||
|
EXPECT_FALSE(r1.match("9a"));
|
||||||
|
|
||||||
|
SmallVector<StringRef, 1> Matches;
|
||||||
|
Regex r2("[0-9]+", Regex::Sub);
|
||||||
|
EXPECT_TRUE(r2.match("aa216b", &Matches));
|
||||||
|
EXPECT_EQ(1u, Matches.size());
|
||||||
|
EXPECT_EQ("216", Matches[0].str());
|
||||||
|
|
||||||
|
Regex r3("[0-9]+([a-f])?:([0-9]+)", Regex::Sub);
|
||||||
|
EXPECT_TRUE(r3.match("9a:513b", &Matches));
|
||||||
|
EXPECT_EQ(3u, Matches.size());
|
||||||
|
EXPECT_EQ("9a:513", Matches[0].str());
|
||||||
|
EXPECT_EQ("a", Matches[1].str());
|
||||||
|
EXPECT_EQ("513", Matches[2].str());
|
||||||
|
|
||||||
|
EXPECT_TRUE(r3.match("9:513b", &Matches));
|
||||||
|
EXPECT_EQ(3u, Matches.size());
|
||||||
|
EXPECT_EQ("9:513", Matches[0].str());
|
||||||
|
EXPECT_EQ("", Matches[1].str());
|
||||||
|
EXPECT_EQ("513", Matches[2].str());
|
||||||
|
|
||||||
|
Regex r4("a[^b]+b", Regex::Sub);
|
||||||
|
std::string String="axxb";
|
||||||
|
String[2] = '\0';
|
||||||
|
EXPECT_FALSE(r4.match("abb"));
|
||||||
|
EXPECT_TRUE(r4.match(String, &Matches));
|
||||||
|
EXPECT_EQ(1u, Matches.size());
|
||||||
|
EXPECT_EQ(String, Matches[0].str());
|
||||||
|
|
||||||
|
|
||||||
|
std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
|
||||||
|
String="YX99a:513b";
|
||||||
|
NulPattern[7] = '\0';
|
||||||
|
Regex r5(NulPattern, Regex::Sub);
|
||||||
|
EXPECT_FALSE(r5.match(String));
|
||||||
|
EXPECT_FALSE(r5.match("X9"));
|
||||||
|
String[3]='\0';
|
||||||
|
EXPECT_TRUE(r5.match(String));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user