mirror of
https://github.com/openharmony/third_party_re2.git
synced 2026-07-01 09:20:39 -04:00
720274a4cd
Signed-off-by:ganchuantao1<ganchuantao1@huawei.com> Signed-off-by: ganchuantao1 <ganchuantao1@huawei.com>
202 lines
5.9 KiB
C++
202 lines
5.9 KiB
C++
// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <string>
|
|
|
|
#include "absl/base/macros.h"
|
|
#include "gtest/gtest.h"
|
|
#include "re2/prog.h"
|
|
#include "re2/regexp.h"
|
|
|
|
namespace re2 {
|
|
|
|
struct PrefixTest {
|
|
const char* regexp;
|
|
bool return_value;
|
|
const char* prefix;
|
|
bool foldcase;
|
|
const char* suffix;
|
|
};
|
|
|
|
static PrefixTest tests[] = {
|
|
// Empty cases.
|
|
{ "", false },
|
|
{ "(?m)^", false },
|
|
{ "(?-m)^", false },
|
|
|
|
// If the regexp has no ^, there's no required prefix.
|
|
{ "abc", false },
|
|
|
|
// If the regexp immediately goes into
|
|
// something not a literal match, there's no required prefix.
|
|
{ "^a*", false },
|
|
{ "^(abc)", false },
|
|
|
|
// Otherwise, it should work.
|
|
{ "^abc$", true, "abc", false, "(?-m:$)" },
|
|
{ "^abc", true, "abc", false, "" },
|
|
{ "^(?i)abc", true, "abc", true, "" },
|
|
{ "^abcd*", true, "abc", false, "d*" },
|
|
{ "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
|
|
{ "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
|
|
{ "^☺abc", true, "☺abc", false, "" },
|
|
};
|
|
|
|
TEST(RequiredPrefix, SimpleTests) {
|
|
for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
|
|
const PrefixTest& t = tests[i];
|
|
for (size_t j = 0; j < 2; j++) {
|
|
Regexp::ParseFlags flags = Regexp::LikePerl;
|
|
if (j == 0)
|
|
flags = flags | Regexp::Latin1;
|
|
Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
|
|
ASSERT_TRUE(re != NULL) << " " << t.regexp;
|
|
|
|
std::string p;
|
|
bool f;
|
|
Regexp* s;
|
|
ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
|
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
|
|
<< " " << re->Dump();
|
|
if (t.return_value) {
|
|
ASSERT_EQ(p, std::string(t.prefix))
|
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
|
ASSERT_EQ(f, t.foldcase)
|
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
|
ASSERT_EQ(s->ToString(), std::string(t.suffix))
|
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
|
s->Decref();
|
|
}
|
|
re->Decref();
|
|
}
|
|
}
|
|
}
|
|
|
|
static PrefixTest for_accel_tests[] = {
|
|
// Empty cases.
|
|
{ "", false },
|
|
{ "(?m)^", false },
|
|
{ "(?-m)^", false },
|
|
|
|
// If the regexp has a ^, there's no required prefix.
|
|
{ "^abc", false },
|
|
|
|
// If the regexp immediately goes into
|
|
// something not a literal match, there's no required prefix.
|
|
{ "a*", false },
|
|
|
|
// Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
|
|
// capturing groups, but doesn't try to glue prefix fragments together.
|
|
{ "(a?)def", false },
|
|
{ "(ab?)def", true, "a", false },
|
|
{ "(abc?)def", true, "ab", false },
|
|
{ "(()a)def", false },
|
|
{ "((a)b)def", true, "a", false },
|
|
{ "((ab)c)def", true, "ab", false },
|
|
|
|
// Otherwise, it should work.
|
|
{ "abc$", true, "abc", false },
|
|
{ "abc", true, "abc", false },
|
|
{ "(?i)abc", true, "abc", true },
|
|
{ "abcd*", true, "abc", false },
|
|
{ "[Aa][Bb]cd*", true, "ab", true },
|
|
{ "ab[Cc]d*", true, "ab", false },
|
|
{ "☺abc", true, "☺abc", false },
|
|
};
|
|
|
|
TEST(RequiredPrefixForAccel, SimpleTests) {
|
|
for (size_t i = 0; i < ABSL_ARRAYSIZE(for_accel_tests); i++) {
|
|
const PrefixTest& t = for_accel_tests[i];
|
|
for (size_t j = 0; j < 2; j++) {
|
|
Regexp::ParseFlags flags = Regexp::LikePerl;
|
|
if (j == 0)
|
|
flags = flags | Regexp::Latin1;
|
|
Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
|
|
ASSERT_TRUE(re != NULL) << " " << t.regexp;
|
|
|
|
std::string p;
|
|
bool f;
|
|
ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
|
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
|
|
<< " " << re->Dump();
|
|
if (t.return_value) {
|
|
ASSERT_EQ(p, std::string(t.prefix))
|
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
|
ASSERT_EQ(f, t.foldcase)
|
|
<< " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
|
|
}
|
|
re->Decref();
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(RequiredPrefixForAccel, CaseFoldingForKAndS) {
|
|
Regexp* re;
|
|
std::string p;
|
|
bool f;
|
|
|
|
// With Latin-1 encoding, `(?i)` prefixes can include 'k' and 's'.
|
|
re = Regexp::Parse("(?i)KLM", Regexp::LikePerl|Regexp::Latin1, NULL);
|
|
ASSERT_TRUE(re != NULL);
|
|
ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
|
|
ASSERT_EQ(p, "klm");
|
|
ASSERT_EQ(f, true);
|
|
re->Decref();
|
|
|
|
re = Regexp::Parse("(?i)STU", Regexp::LikePerl|Regexp::Latin1, NULL);
|
|
ASSERT_TRUE(re != NULL);
|
|
ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
|
|
ASSERT_EQ(p, "stu");
|
|
ASSERT_EQ(f, true);
|
|
re->Decref();
|
|
|
|
// With UTF-8 encoding, `(?i)` prefixes can't include 'k' and 's'.
|
|
// This is because they match U+212A and U+017F, respectively, and
|
|
// so the parser ends up emitting character classes, not literals.
|
|
re = Regexp::Parse("(?i)KLM", Regexp::LikePerl, NULL);
|
|
ASSERT_TRUE(re != NULL);
|
|
ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
|
|
re->Decref();
|
|
|
|
re = Regexp::Parse("(?i)STU", Regexp::LikePerl, NULL);
|
|
ASSERT_TRUE(re != NULL);
|
|
ASSERT_FALSE(re->RequiredPrefixForAccel(&p, &f));
|
|
re->Decref();
|
|
}
|
|
|
|
static const char* prefix_accel_tests[] = {
|
|
"aababc\\d+",
|
|
"(?i)AABABC\\d+",
|
|
};
|
|
|
|
TEST(PrefixAccel, SimpleTests) {
|
|
for (size_t i = 0; i < ABSL_ARRAYSIZE(prefix_accel_tests); i++) {
|
|
const char* pattern = prefix_accel_tests[i];
|
|
Regexp* re = Regexp::Parse(pattern, Regexp::LikePerl, NULL);
|
|
ASSERT_TRUE(re != NULL);
|
|
Prog* prog = re->CompileToProg(0);
|
|
ASSERT_TRUE(prog != NULL);
|
|
ASSERT_TRUE(prog->can_prefix_accel());
|
|
for (int j = 0; j < 100; j++) {
|
|
std::string text(j, 'a');
|
|
const char* p = reinterpret_cast<const char*>(
|
|
prog->PrefixAccel(text.data(), text.size()));
|
|
EXPECT_TRUE(p == NULL);
|
|
text.append("aababc");
|
|
for (int k = 0; k < 100; k++) {
|
|
text.append(k, 'a');
|
|
p = reinterpret_cast<const char*>(
|
|
prog->PrefixAccel(text.data(), text.size()));
|
|
EXPECT_EQ(j, p - text.data());
|
|
}
|
|
}
|
|
delete prog;
|
|
re->Decref();
|
|
}
|
|
}
|
|
|
|
} // namespace re2
|