Regex: Make "match" and "sub" const member functions

Summary:
The Regex "match" and "sub" member functions were previously not "const"
because they wrote to the "error" member variable. This commit removes
those assignments, and instead assumes that the validity of the regex
is already known after the initial compilation of the regular
expression. As a result, these member functions were possible to make
"const". This makes it easier to do things like pre-compile Regexes
up-front, and makes "match" and "sub" thread-safe. The error status is
now returned as an optional output, which also makes the API of "match"
and "sub" more consistent with each other.

Also, some uses of Regex that could be refactored to be const were made const.

Patch by Nicolas Guillemot

Reviewers: jankratochvil, thopre

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D67241

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372764 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Thomas Preud'homme 2019-09-24 14:42:36 +00:00
parent 3a0728c761
commit 2c032d2797
6 changed files with 42 additions and 23 deletions

View File

@ -57,8 +57,8 @@ namespace llvm {
Regex(Regex &&regex);
~Regex();
/// isValid - returns the error encountered during regex compilation, or
/// matching, if any.
/// isValid - returns the error encountered during regex compilation, if
/// any.
bool isValid(std::string &Error) const;
bool isValid() const { return !error; }
@ -73,8 +73,12 @@ namespace llvm {
/// with references to the matched group expressions (inside \p String),
/// the first group is always the entire pattern.
///
/// \param Error - If non-null, any errors in the matching will be recorded
/// as a non-empty string. If there is no error, it will be an empty string.
///
/// This returns true on a successful match.
bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr);
bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr,
std::string *Error = nullptr) const;
/// sub - Return the result of replacing the first match of the regex in
/// \p String with the \p Repl string. Backreferences like "\0" in the
@ -85,9 +89,9 @@ namespace llvm {
///
/// \param Error If non-null, any errors in the substitution (invalid
/// backreferences, trailing backslashes) will be recorded as a non-empty
/// string.
/// string. If there is no error, it will be an empty string.
std::string sub(StringRef Repl, StringRef String,
std::string *Error = nullptr);
std::string *Error = nullptr) const;
/// If this function returns true, ^Str$ is an extended regular
/// expression that matches Str and only Str.

View File

@ -522,7 +522,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
F->arg_begin()->getType());
return true;
}
Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vldRegex.match(Name)) {
auto fArgs = F->getFunctionType()->params();
SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
@ -533,7 +533,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
"llvm." + Name + ".p0i8", F->getParent());
return true;
}
Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
Intrinsic::arm_neon_vst2,
@ -598,7 +598,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
case 'e': {
SmallVector<StringRef, 2> Groups;
Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
if (R.match(Name, &Groups)) {
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (Groups[1] == "fadd")

View File

@ -209,7 +209,7 @@ static cl::opt<bool> EnableSyntheticCounts(
cl::desc("Run synthetic function entry count generation "
"pass"));
static Regex DefaultAliasRegex(
static const Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
// This option is used in simplifying testing SampleFDO optimizations for

View File

@ -1712,7 +1712,7 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
// A check prefix must contain only alphanumeric, hyphens and underscores.
static bool ValidateCheckPrefix(StringRef CheckPrefix) {
Regex Validator("^[a-zA-Z0-9_-]*$");
static const Regex Validator("^[a-zA-Z0-9_-]*$");
return Validator.match(CheckPrefix);
}

View File

@ -52,14 +52,24 @@ Regex::~Regex() {
}
}
bool Regex::isValid(std::string &Error) const {
if (!error)
return true;
namespace {
/// Utility to convert a regex error code into a human-readable string.
void RegexErrorToString(int error, struct llvm_regex *preg,
std::string &Error) {
size_t len = llvm_regerror(error, preg, nullptr, 0);
Error.resize(len - 1);
llvm_regerror(error, preg, &Error[0], len);
}
} // namespace
bool Regex::isValid(std::string &Error) const {
if (!error)
return true;
RegexErrorToString(error, preg, Error);
return false;
}
@ -69,8 +79,14 @@ unsigned Regex::getNumMatches() const {
return preg->re_nsub;
}
bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
if (error)
bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
std::string *Error) const {
// Reset error, if given.
if (Error && !Error->empty())
*Error = "";
// Check if the regex itself didn't successfully compile.
if (Error ? !isValid(*Error) : !isValid())
return false;
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
@ -83,11 +99,13 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
// Failure to match is not an error, it's just a normal return value.
// Any other error code is considered abnormal, and is logged in the Error.
if (rc == REG_NOMATCH)
return false;
if (rc != 0) {
// regexec can fail due to invalid pattern or running out of memory.
error = rc;
if (Error)
RegexErrorToString(error, preg, *Error);
return false;
}
@ -112,14 +130,11 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
}
std::string Regex::sub(StringRef Repl, StringRef String,
std::string *Error) {
std::string *Error) const {
SmallVector<StringRef, 8> Matches;
// Reset error, if given.
if (Error && !Error->empty()) *Error = "";
// Return the input if there was no match.
if (!match(String, &Matches))
if (!match(String, &Matches, Error))
return String;
// Otherwise splice in the replacement string, starting with the prefix before

View File

@ -125,7 +125,7 @@ namespace llvm {
uint32_t AArch64SysReg::parseGenericRegister(StringRef Name) {
// Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name
Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$");
static const Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$");
std::string UpperName = Name.upper();
SmallVector<StringRef, 5> Ops;