mirror of
https://github.com/openharmony/third_party_re2.git
synced 2026-07-01 09:20:39 -04:00
720274a4cd
Signed-off-by:ganchuantao1<ganchuantao1@huawei.com> Signed-off-by: ganchuantao1 <ganchuantao1@huawei.com>
117 lines
2.4 KiB
Raku
117 lines
2.4 KiB
Raku
#!/usr/bin/perl
|
|
# Copyright 2008 The RE2 Authors. All Rights Reserved.
|
|
# Use of this source code is governed by a BSD-style
|
|
# license that can be found in the LICENSE file.
|
|
|
|
# Generate table entries giving character ranges
|
|
# for POSIX/Perl character classes. Rather than
|
|
# figure out what the definition is, it is easier to ask
|
|
# Perl about each letter from 0-128 and write down
|
|
# its answer.
|
|
|
|
@posixclasses = (
|
|
"[:alnum:]",
|
|
"[:alpha:]",
|
|
"[:ascii:]",
|
|
"[:blank:]",
|
|
"[:cntrl:]",
|
|
"[:digit:]",
|
|
"[:graph:]",
|
|
"[:lower:]",
|
|
"[:print:]",
|
|
"[:punct:]",
|
|
"[:space:]",
|
|
"[:upper:]",
|
|
"[:word:]",
|
|
"[:xdigit:]",
|
|
);
|
|
|
|
@perlclasses = (
|
|
"\\d",
|
|
"\\s",
|
|
"\\w",
|
|
);
|
|
|
|
%overrides = (
|
|
# Prior to Perl 5.18, \s did not match vertical tab.
|
|
# RE2 preserves that original behaviour.
|
|
"\\s:11" => 0,
|
|
);
|
|
|
|
sub ComputeClass($) {
|
|
my ($cname) = @_;
|
|
my @ranges;
|
|
my $regexp = qr/[$cname]/;
|
|
my $start = -1;
|
|
for (my $i=0; $i<=129; $i++) {
|
|
if ($i == 129) { $i = 256; }
|
|
if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) {
|
|
if ($start < 0) {
|
|
$start = $i;
|
|
}
|
|
} else {
|
|
if ($start >= 0) {
|
|
push @ranges, [$start, $i-1];
|
|
}
|
|
$start = -1;
|
|
}
|
|
}
|
|
return @ranges;
|
|
}
|
|
|
|
sub PrintClass($$@) {
|
|
my ($cnum, $cname, @ranges) = @_;
|
|
print "static const URange16 code${cnum}[] = { /* $cname */\n";
|
|
for (my $i=0; $i<@ranges; $i++) {
|
|
my @a = @{$ranges[$i]};
|
|
printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];
|
|
}
|
|
print "};\n";
|
|
my $n = @ranges;
|
|
my $escname = $cname;
|
|
$escname =~ s/\\/\\\\/g;
|
|
$negname = $escname;
|
|
if ($negname =~ /:/) {
|
|
$negname =~ s/:/:^/;
|
|
} else {
|
|
$negname =~ y/a-z/A-Z/;
|
|
}
|
|
return "{ \"$escname\", +1, code$cnum, $n, 0, 0 }", "{ \"$negname\", -1, code$cnum, $n, 0, 0 }";
|
|
}
|
|
|
|
my $cnum = 0;
|
|
|
|
sub PrintClasses($@) {
|
|
my ($pname, @classes) = @_;
|
|
my @entries;
|
|
foreach my $cname (@classes) {
|
|
my @ranges = ComputeClass($cname);
|
|
push @entries, PrintClass(++$cnum, $cname, @ranges);
|
|
}
|
|
print "const UGroup ${pname}_groups[] = {\n";
|
|
foreach my $e (@entries) {
|
|
print "\t$e,\n";
|
|
}
|
|
print "};\n";
|
|
my $count = @entries;
|
|
print "const int num_${pname}_groups = $count;\n";
|
|
}
|
|
|
|
print <<EOF;
|
|
// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
|
|
// make_perl_groups.pl >perl_groups.cc
|
|
|
|
#include "re2/unicode_groups.h"
|
|
|
|
namespace re2 {
|
|
|
|
EOF
|
|
|
|
PrintClasses("perl", @perlclasses);
|
|
PrintClasses("posix", @posixclasses);
|
|
|
|
print <<EOF;
|
|
|
|
} // namespace re2
|
|
EOF
|