[Utility] Reimplement RegularExpression on top of llvm::Regex

Originally I wanted to remove the RegularExpression class in Utility and
replace it with llvm::Regex. However, during that transition I noticed
that there are several places where need the regular expression string.
So instead I propose to keep the RegularExpression class and make it a
thin wrapper around llvm::Regex.

This patch also removes the workaround for empty regular expressions.
The result is that we are now (more or less) POSIX conformant.

Differential revision: https://reviews.llvm.org/D66174

llvm-svn: 369153
This commit is contained in:
Jonas Devlieghere 2019-08-16 21:25:36 +00:00
parent 250aafa2c4
commit 3af3f1e8e2
28 changed files with 272 additions and 465 deletions

View File

@ -24,7 +24,7 @@ namespace lldb_private {
class BreakpointResolverFileRegex : public BreakpointResolver {
public:
BreakpointResolverFileRegex(
Breakpoint *bkpt, RegularExpression &regex,
Breakpoint *bkpt, RegularExpression regex,
const std::unordered_set<std::string> &func_name_set, bool exact_match);
static BreakpointResolver *

View File

@ -44,7 +44,7 @@ public:
// Creates a function breakpoint by regular expression. Takes over control
// of the lifespan of func_regex.
BreakpointResolverName(Breakpoint *bkpt, RegularExpression &func_regex,
BreakpointResolverName(Breakpoint *bkpt, RegularExpression func_regex,
lldb::LanguageType language, lldb::addr_t offset,
bool skip_prologue);

View File

@ -31,7 +31,7 @@ public:
// Creates a function breakpoint by regular expression. Takes over control
// of the lifespan of func_regex.
AddressResolverName(RegularExpression &func_regex);
AddressResolverName(RegularExpression func_regex);
AddressResolverName(const char *class_name, const char *method,
AddressResolver::MatchType type);

View File

@ -36,7 +36,7 @@ public:
VarSetOperationType = eVarSetOperationAssign) = delete;
bool Clear() override {
m_regex.Clear();
m_regex = RegularExpression();
m_value_was_set = false;
return true;
}
@ -52,7 +52,7 @@ public:
if (value && value[0])
m_regex.Compile(llvm::StringRef(value));
else
m_regex.Clear();
m_regex = RegularExpression();
}
bool IsValid() const { return m_regex.IsValid(); }

View File

@ -9,102 +9,28 @@
#ifndef liblldb_RegularExpression_h_
#define liblldb_RegularExpression_h_
#ifdef _WIN32
#include "../lib/Support/regex_impl.h"
typedef llvm_regmatch_t regmatch_t;
typedef llvm_regex_t regex_t;
inline int regcomp(llvm_regex_t *a, const char *b, int c) {
return llvm_regcomp(a, b, c);
}
inline size_t regerror(int a, const llvm_regex_t *b, char *c, size_t d) {
return llvm_regerror(a, b, c, d);
}
inline int regexec(const llvm_regex_t *a, const char *b, size_t c,
llvm_regmatch_t d[], int e) {
return llvm_regexec(a, b, c, d, e);
}
inline void regfree(llvm_regex_t *a) { llvm_regfree(a); }
#else
#ifdef __ANDROID__
#include <regex>
#endif
#include <regex.h>
#endif
#include <string>
#include <vector>
#include <stddef.h>
#include <stdint.h>
namespace llvm {
class StringRef;
} // namespace llvm
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Regex.h"
namespace lldb_private {
/// \class RegularExpression RegularExpression.h
/// "lldb/Utility/RegularExpression.h"
/// A C++ wrapper class for regex.
///
/// This regular expression class wraps the posix regex functions \c
/// regcomp(), \c regerror(), \c regexec(), and \c regfree() from the header
/// file in \c /usr/include/regex\.h.
class RegularExpression {
class RegularExpression : public llvm::Regex {
public:
class Match {
public:
Match(uint32_t max_matches) : m_matches() {
if (max_matches > 0)
m_matches.resize(max_matches + 1);
}
void Clear() {
const size_t num_matches = m_matches.size();
regmatch_t invalid_match = {-1, -1};
for (size_t i = 0; i < num_matches; ++i)
m_matches[i] = invalid_match;
}
size_t GetSize() const { return m_matches.size(); }
regmatch_t *GetData() {
return (m_matches.empty() ? nullptr : m_matches.data());
}
bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
std::string &match_str) const;
bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
llvm::StringRef &match_str) const;
protected:
std::vector<regmatch_t>
m_matches; ///< Where parenthesized subexpressions results are stored
};
/// Default constructor.
///
/// The default constructor that initializes the object state such that it
/// contains no compiled regular expression.
RegularExpression();
RegularExpression() = default;
explicit RegularExpression(llvm::StringRef string);
/// Destructor.
///
/// Any previously compiled regular expression contained in this object will
/// be freed.
~RegularExpression();
~RegularExpression() = default;
RegularExpression(const RegularExpression &rhs);
RegularExpression(RegularExpression &&rhs) = default;
const RegularExpression &operator=(const RegularExpression &rhs);
RegularExpression &operator=(RegularExpression &&rhs) = default;
RegularExpression &operator=(const RegularExpression &rhs) = default;
/// Compile a regular expression.
///
@ -118,11 +44,9 @@ public:
/// A NULL terminated C string that represents the regular
/// expression to compile.
///
/// \return
/// \b true if the regular expression compiles successfully,
/// \b false otherwise.
/// \return \b true if the regular expression compiles successfully, \b false
/// otherwise.
bool Compile(llvm::StringRef string);
bool Compile(const char *) = delete;
/// Executes a regular expression.
///
@ -140,19 +64,10 @@ public:
/// properly initialized with the desired number of maximum
/// matches, or nullptr if no parenthesized matching is needed.
///
/// \return
/// \b true if \a string matches the compiled regular
/// expression, \b false otherwise.
bool Execute(llvm::StringRef string, Match *match = nullptr) const;
bool Execute(const char *, Match * = nullptr) = delete;
size_t GetErrorAsCString(char *err_str, size_t err_str_max_len) const;
/// Free the compiled regular expression.
///
/// If this object contains a valid compiled regular expression, this
/// function will free any resources it was consuming.
void Free();
/// \return \b true if \a string matches the compiled regular expression, \b
/// false otherwise.
bool Execute(llvm::StringRef string,
llvm::SmallVectorImpl<llvm::StringRef> *matches = nullptr) const;
/// Access the regular expression text.
///
@ -168,26 +83,18 @@ public:
///
/// Test if this object contains a valid regular expression.
///
/// \return
/// \b true if the regular expression compiled and is ready
/// for execution, \b false otherwise.
/// \return \b true if the regular expression compiled and is ready for
/// execution, \b false otherwise.
bool IsValid() const;
void Clear() {
Free();
m_re.clear();
m_comp_err = 1;
}
int GetErrorCode() const { return m_comp_err; }
bool operator<(const RegularExpression &rhs) const;
/// Return an error if the regular expression failed to compile.
llvm::Error GetError() const;
private:
// Member variables
std::string m_re; ///< A copy of the original regular expression text
int m_comp_err; ///< Status code for the regular expression compilation
regex_t m_preg; ///< The compiled regular expression
/// A copy of the original regular expression text.
std::string m_regex_text;
/// The compiled regular expression.
mutable llvm::Regex m_regex;
};
} // namespace lldb_private

View File

@ -20,11 +20,11 @@ using namespace lldb_private;
// BreakpointResolverFileRegex:
BreakpointResolverFileRegex::BreakpointResolverFileRegex(
Breakpoint *bkpt, RegularExpression &regex,
Breakpoint *bkpt, RegularExpression regex,
const std::unordered_set<std::string> &func_names, bool exact_match)
: BreakpointResolver(bkpt, BreakpointResolver::FileRegexResolver),
m_regex(regex), m_exact_match(exact_match), m_function_names(func_names) {
}
m_regex(std::move(regex)), m_exact_match(exact_match),
m_function_names(func_names) {}
BreakpointResolverFileRegex::~BreakpointResolverFileRegex() {}
@ -69,7 +69,8 @@ BreakpointResolver *BreakpointResolverFileRegex::CreateFromStructuredData(
}
}
return new BreakpointResolverFileRegex(bkpt, regex, names_set, exact_match);
return new BreakpointResolverFileRegex(bkpt, std::move(regex), names_set,
exact_match);
}
StructuredData::ObjectSP

View File

@ -70,12 +70,12 @@ BreakpointResolverName::BreakpointResolverName(Breakpoint *bkpt,
}
BreakpointResolverName::BreakpointResolverName(Breakpoint *bkpt,
RegularExpression &func_regex,
RegularExpression func_regex,
lldb::LanguageType language,
lldb::addr_t offset,
bool skip_prologue)
: BreakpointResolver(bkpt, BreakpointResolver::NameResolver, offset),
m_class_name(nullptr), m_regex(func_regex),
m_class_name(nullptr), m_regex(std::move(func_regex)),
m_match_type(Breakpoint::Regexp), m_language(language),
m_skip_prologue(skip_prologue) {}

View File

@ -682,12 +682,10 @@ protected:
// name
{
RegularExpression regexp(m_options.m_func_regexp);
if (!regexp.IsValid()) {
char err_str[1024];
regexp.GetErrorAsCString(err_str, sizeof(err_str));
if (llvm::Error err = regexp.GetError()) {
result.AppendErrorWithFormat(
"Function name regular expression could not be compiled: \"%s\"",
err_str);
llvm::toString(std::move(err)).c_str());
result.SetStatus(eReturnStatusFailed);
return false;
}
@ -718,12 +716,10 @@ protected:
}
RegularExpression regexp(m_options.m_source_text_regexp);
if (!regexp.IsValid()) {
char err_str[1024];
regexp.GetErrorAsCString(err_str, sizeof(err_str));
if (llvm::Error err = regexp.GetError()) {
result.AppendErrorWithFormat(
"Source text regular expression could not be compiled: \"%s\"",
err_str);
llvm::toString(std::move(err)).c_str());
result.SetStatus(eReturnStatusFailed);
return false;
}

View File

@ -573,9 +573,9 @@ protected:
entry.c_str());
}
} else {
char regex_error[1024];
if (regex.GetErrorAsCString(regex_error, sizeof(regex_error)))
result.GetErrorStream().Printf("error: %s\n", regex_error);
if (llvm::Error err = regex.GetError())
result.GetErrorStream().Printf(
"error: %s\n", llvm::toString(std::move(err)).c_str());
else
result.GetErrorStream().Printf(
"error: unknown regex error when compiling '%s'\n",

View File

@ -46,9 +46,9 @@ AddressResolverName::AddressResolverName(const char *func_name,
}
}
AddressResolverName::AddressResolverName(RegularExpression &func_regex)
AddressResolverName::AddressResolverName(RegularExpression func_regex)
: AddressResolver(), m_func_name(nullptr), m_class_name(nullptr),
m_regex(func_regex), m_match_type(AddressResolver::Regexp) {}
m_regex(std::move(func_regex)), m_match_type(AddressResolver::Regexp) {}
AddressResolverName::AddressResolverName(const char *class_name,
const char *method,

View File

@ -355,12 +355,9 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine(
const char *function_name =
sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
.GetCString();
if (function_name) {
RegularExpression::Match regex_match(1);
if (avoid_regex->Execute(function_name, &regex_match)) {
// skip this source line
return true;
}
if (function_name && avoid_regex->Execute(function_name)) {
// skip this source line
return true;
}
}
// don't skip this source line
@ -793,10 +790,9 @@ OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
std::string value;
static RegularExpression g_reg_exp(
llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
RegularExpression::Match regex_match(1);
bool reg_exp_success = g_reg_exp.Execute(line, &regex_match);
if (reg_exp_success)
regex_match.GetMatchAtIndex(line.c_str(), 1, value);
llvm::SmallVector<llvm::StringRef, 2> matches;
if (g_reg_exp.Execute(line, &matches))
value = matches[1].str();
else
value = line;
@ -856,14 +852,15 @@ OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
if (!line.empty()) {
static RegularExpression g_reg_exp(llvm::StringRef(
"^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
RegularExpression::Match regex_match(2);
bool reg_exp_success = g_reg_exp.Execute(line, &regex_match);
llvm::SmallVector<llvm::StringRef, 3> matches;
bool reg_exp_success = g_reg_exp.Execute(line, &matches);
std::string key;
std::string value;
if (reg_exp_success) {
regex_match.GetMatchAtIndex(line.c_str(), 1, key);
regex_match.GetMatchAtIndex(line.c_str(), 2, value);
key = matches[1].str();
value = matches[2].str();
} else {
out_stream->Printf("Instruction::ReadDictionary: Failure executing "
"regular expression.\n");

View File

@ -282,27 +282,25 @@ bool Socket::DecodeHostAndPort(llvm::StringRef host_and_port,
int32_t &port, Status *error_ptr) {
static RegularExpression g_regex(
llvm::StringRef("([^:]+|\\[[0-9a-fA-F:]+.*\\]):([0-9]+)"));
RegularExpression::Match regex_match(2);
if (g_regex.Execute(host_and_port, &regex_match)) {
if (regex_match.GetMatchAtIndex(host_and_port, 1, host_str) &&
regex_match.GetMatchAtIndex(host_and_port, 2, port_str)) {
// IPv6 addresses are wrapped in [] when specified with ports
if (host_str.front() == '[' && host_str.back() == ']')
host_str = host_str.substr(1, host_str.size() - 2);
bool ok = false;
port = StringConvert::ToUInt32(port_str.c_str(), UINT32_MAX, 10, &ok);
if (ok && port <= UINT16_MAX) {
if (error_ptr)
error_ptr->Clear();
return true;
}
// port is too large
llvm::SmallVector<llvm::StringRef, 3> matches;
if (g_regex.Execute(host_and_port, &matches)) {
host_str = matches[1].str();
port_str = matches[2].str();
// IPv6 addresses are wrapped in [] when specified with ports
if (host_str.front() == '[' && host_str.back() == ']')
host_str = host_str.substr(1, host_str.size() - 2);
bool ok = false;
port = StringConvert::ToUInt32(port_str.c_str(), UINT32_MAX, 10, &ok);
if (ok && port <= UINT16_MAX) {
if (error_ptr)
error_ptr->SetErrorStringWithFormat(
"invalid host:port specification: '%s'",
host_and_port.str().c_str());
return false;
error_ptr->Clear();
return true;
}
// port is too large
if (error_ptr)
error_ptr->SetErrorStringWithFormat(
"invalid host:port specification: '%s'", host_and_port.str().c_str());
return false;
}
// If this was unsuccessful, then check if it's simply a signed 32-bit

View File

@ -1064,7 +1064,7 @@ CommandObject::ArgumentTableEntry CommandObject::g_arguments_data[] = {
{ eArgTypePythonScript, "python-script", CommandCompletions::eNoCompletion, { nullptr, false }, "Source code written in Python." },
{ eArgTypeQueueName, "queue-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of the thread queue." },
{ eArgTypeRegisterName, "register-name", CommandCompletions::eNoCompletion, { RegisterNameHelpTextCallback, true }, nullptr },
{ eArgTypeRegularExpression, "regular-expression", CommandCompletions::eNoCompletion, { nullptr, false }, "A regular expression." },
{ eArgTypeRegularExpression, "regular-expression", CommandCompletions::eNoCompletion, { nullptr, false }, "A POSIX-compliant extended regular expression." },
{ eArgTypeRunArgs, "run-args", CommandCompletions::eNoCompletion, { nullptr, false }, "Arguments to be passed to the target program when it starts executing." },
{ eArgTypeRunMode, "run-mode", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." },
{ eArgTypeScriptedCommandSynchronicity, "script-cmd-synchronicity", CommandCompletions::eNoCompletion, { nullptr, false }, "The synchronicity to use to run scripted commands with regard to LLDB event system." },

View File

@ -30,15 +30,14 @@ bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command,
CommandReturnObject &result) {
EntryCollection::const_iterator pos, end = m_entries.end();
for (pos = m_entries.begin(); pos != end; ++pos) {
RegularExpression::Match regex_match(m_max_matches);
if (pos->regex.Execute(command, &regex_match)) {
llvm::SmallVector<llvm::StringRef, 4> matches;
if (pos->regex.Execute(command, &matches)) {
std::string new_command(pos->command);
std::string match_str;
char percent_var[8];
size_t idx, percent_var_idx;
for (uint32_t match_idx = 1; match_idx <= m_max_matches; ++match_idx) {
if (regex_match.GetMatchAtIndex(command, match_idx, match_str)) {
if (match_idx < matches.size()) {
const std::string match_str = matches[match_idx].str();
const int percent_var_len =
::snprintf(percent_var, sizeof(percent_var), "%%%u", match_idx);
for (idx = 0; (percent_var_idx = new_command.find(

View File

@ -211,29 +211,21 @@ lldb::addr_t OptionArgParser::ToAddress(const ExecutionContext *exe_ctx,
// pointer types.
static RegularExpression g_symbol_plus_offset_regex(
"^(.*)([-\\+])[[:space:]]*(0x[0-9A-Fa-f]+|[0-9]+)[[:space:]]*$");
RegularExpression::Match regex_match(3);
if (g_symbol_plus_offset_regex.Execute(sref, &regex_match)) {
uint64_t offset = 0;
bool add = true;
std::string name;
std::string str;
if (regex_match.GetMatchAtIndex(s, 1, name)) {
if (regex_match.GetMatchAtIndex(s, 2, str)) {
add = str[0] == '+';
if (regex_match.GetMatchAtIndex(s, 3, str)) {
if (!llvm::StringRef(str).getAsInteger(0, offset)) {
Status error;
addr = ToAddress(exe_ctx, name.c_str(), LLDB_INVALID_ADDRESS,
&error);
if (addr != LLDB_INVALID_ADDRESS) {
if (add)
return addr + offset;
else
return addr - offset;
}
}
}
llvm::SmallVector<llvm::StringRef, 4> matches;
if (g_symbol_plus_offset_regex.Execute(sref, &matches)) {
uint64_t offset = 0;
std::string name = matches[1].str();
std::string sign = matches[2].str();
std::string str_offset = matches[3].str();
if (!llvm::StringRef(str_offset).getAsInteger(0, offset)) {
Status error;
addr = ToAddress(exe_ctx, name.c_str(), LLDB_INVALID_ADDRESS, &error);
if (addr != LLDB_INVALID_ADDRESS) {
if (sign[0] == '+')
return addr + offset;
else
return addr - offset;
}
}
}

View File

@ -49,13 +49,10 @@ Status OptionValueRegex::SetValueFromString(llvm::StringRef value,
if (m_regex.Compile(value)) {
m_value_was_set = true;
NotifyValueChanged();
} else if (llvm::Error err = m_regex.GetError()) {
error.SetErrorString(llvm::toString(std::move(err)));
} else {
char regex_error[1024];
if (m_regex.GetErrorAsCString(regex_error, sizeof(regex_error)))
error.SetErrorString(regex_error);
else
error.SetErrorStringWithFormat("regex error %u",
m_regex.GetErrorCode());
error.SetErrorString("regex error");
}
break;
}

View File

@ -381,11 +381,10 @@ public:
static RegularExpression s_regex(
llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
RegularExpression::Match matches(3);
llvm::SmallVector<llvm::StringRef, 4> matches;
if (s_regex.Execute(out_string, &matches)) {
matches.GetMatchAtIndex(out_string.c_str(), 1, m_opcode_name);
matches.GetMatchAtIndex(out_string.c_str(), 2, m_mnemonics);
m_opcode_name = matches[1].str();
m_mnemonics = matches[2].str();
}
}
}

View File

@ -443,27 +443,28 @@ bool ParseCoordinate(llvm::StringRef coord_s, RSCoordinate &coord) {
// returned, `true` otherwise
RegularExpression regex;
RegularExpression::Match regex_match(3);
llvm::SmallVector<llvm::StringRef, 4> matches;
bool matched = false;
if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+),([0-9]+)$")) &&
regex.Execute(coord_s, &regex_match))
regex.Execute(coord_s, &matches))
matched = true;
else if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+)$")) &&
regex.Execute(coord_s, &regex_match))
regex.Execute(coord_s, &matches))
matched = true;
else if (regex.Compile(llvm::StringRef("^([0-9]+)$")) &&
regex.Execute(coord_s, &regex_match))
regex.Execute(coord_s, &matches))
matched = true;
if (!matched)
return false;
auto get_index = [&](int idx, uint32_t &i) -> bool {
auto get_index = [&](size_t idx, uint32_t &i) -> bool {
std::string group;
errno = 0;
if (regex_match.GetMatchAtIndex(coord_s.str().c_str(), idx + 1, group))
return !llvm::StringRef(group).getAsInteger<uint32_t>(10, i);
if (idx + 1 < matches.size()) {
return !llvm::StringRef(matches[idx + 1]).getAsInteger<uint32_t>(10, i);
}
return true;
};
@ -4147,13 +4148,12 @@ public:
// Matching a comma separated list of known words is fairly
// straightforward with PCRE, but we're using ERE, so we end up with a
// little ugliness...
RegularExpression::Match match(/* max_matches */ 5);
RegularExpression match_type_list(
llvm::StringRef("^([[:alpha:]]+)(,[[:alpha:]]+){0,4}$"));
assert(match_type_list.IsValid());
if (!match_type_list.Execute(option_val, &match)) {
if (!match_type_list.Execute(option_val)) {
err_str.PutCString(
"a comma-separated list of kernel types is required");
return false;

View File

@ -137,76 +137,67 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
// ends at
static RegularExpression g_bitfield_regex(
llvm::StringRef("([A-Za-z_][A-Za-z0-9_]*)\\[([0-9]+):([0-9]+)\\]"));
RegularExpression::Match regex_match(3);
if (g_bitfield_regex.Execute(slice_str, &regex_match)) {
llvm::StringRef reg_name_str;
std::string msbit_str;
std::string lsbit_str;
if (regex_match.GetMatchAtIndex(slice_str, 1, reg_name_str) &&
regex_match.GetMatchAtIndex(slice_str, 2, msbit_str) &&
regex_match.GetMatchAtIndex(slice_str, 3, lsbit_str)) {
const uint32_t msbit =
StringConvert::ToUInt32(msbit_str.c_str(), UINT32_MAX);
const uint32_t lsbit =
StringConvert::ToUInt32(lsbit_str.c_str(), UINT32_MAX);
if (msbit != UINT32_MAX && lsbit != UINT32_MAX) {
if (msbit > lsbit) {
const uint32_t msbyte = msbit / 8;
const uint32_t lsbyte = lsbit / 8;
llvm::SmallVector<llvm::StringRef, 4> matches;
if (g_bitfield_regex.Execute(slice_str, &matches)) {
std::string reg_name_str = matches[1].str();
std::string msbit_str = matches[2].str();
std::string lsbit_str = matches[3].str();
const uint32_t msbit =
StringConvert::ToUInt32(msbit_str.c_str(), UINT32_MAX);
const uint32_t lsbit =
StringConvert::ToUInt32(lsbit_str.c_str(), UINT32_MAX);
if (msbit != UINT32_MAX && lsbit != UINT32_MAX) {
if (msbit > lsbit) {
const uint32_t msbyte = msbit / 8;
const uint32_t lsbyte = lsbit / 8;
ConstString containing_reg_name(reg_name_str);
ConstString containing_reg_name(reg_name_str);
const RegisterInfo *containing_reg_info =
GetRegisterInfo(containing_reg_name);
if (containing_reg_info) {
const uint32_t max_bit = containing_reg_info->byte_size * 8;
if (msbit < max_bit && lsbit < max_bit) {
m_invalidate_regs_map[containing_reg_info
->kinds[eRegisterKindLLDB]]
.push_back(i);
m_value_regs_map[i].push_back(
containing_reg_info->kinds[eRegisterKindLLDB]);
m_invalidate_regs_map[i].push_back(
containing_reg_info->kinds[eRegisterKindLLDB]);
const RegisterInfo *containing_reg_info =
GetRegisterInfo(containing_reg_name);
if (containing_reg_info) {
const uint32_t max_bit = containing_reg_info->byte_size * 8;
if (msbit < max_bit && lsbit < max_bit) {
m_invalidate_regs_map[containing_reg_info
->kinds[eRegisterKindLLDB]]
.push_back(i);
m_value_regs_map[i].push_back(
containing_reg_info->kinds[eRegisterKindLLDB]);
m_invalidate_regs_map[i].push_back(
containing_reg_info->kinds[eRegisterKindLLDB]);
if (byte_order == eByteOrderLittle) {
success = true;
reg_info.byte_offset =
containing_reg_info->byte_offset + lsbyte;
} else if (byte_order == eByteOrderBig) {
success = true;
reg_info.byte_offset =
containing_reg_info->byte_offset + msbyte;
} else {
llvm_unreachable("Invalid byte order");
}
if (byte_order == eByteOrderLittle) {
success = true;
reg_info.byte_offset =
containing_reg_info->byte_offset + lsbyte;
} else if (byte_order == eByteOrderBig) {
success = true;
reg_info.byte_offset =
containing_reg_info->byte_offset + msbyte;
} else {
if (msbit > max_bit)
printf("error: msbit (%u) must be less than the bitsize "
"of the register (%u)\n",
msbit, max_bit);
else
printf("error: lsbit (%u) must be less than the bitsize "
"of the register (%u)\n",
lsbit, max_bit);
llvm_unreachable("Invalid byte order");
}
} else {
printf("error: invalid concrete register \"%s\"\n",
containing_reg_name.GetCString());
if (msbit > max_bit)
printf("error: msbit (%u) must be less than the bitsize "
"of the register (%u)\n",
msbit, max_bit);
else
printf("error: lsbit (%u) must be less than the bitsize "
"of the register (%u)\n",
lsbit, max_bit);
}
} else {
printf("error: msbit (%u) must be greater than lsbit (%u)\n",
msbit, lsbit);
printf("error: invalid concrete register \"%s\"\n",
containing_reg_name.GetCString());
}
} else {
printf("error: msbit (%u) and lsbit (%u) must be valid\n", msbit,
lsbit);
printf("error: msbit (%u) must be greater than lsbit (%u)\n",
msbit, lsbit);
}
} else {
// TODO: print error invalid slice string that doesn't follow the
// format
printf("error: failed to extract regex matches for parsing the "
"register bitfield regex\n");
printf("error: msbit (%u) and lsbit (%u) must be valid\n", msbit,
lsbit);
}
} else {
// TODO: print error invalid slice string that doesn't follow the

View File

@ -288,11 +288,8 @@ private:
// Instantiate the regex so we can report any errors.
auto regex = RegularExpression(op_arg);
if (!regex.IsValid()) {
char error_text[256];
error_text[0] = '\0';
regex.GetErrorAsCString(error_text, sizeof(error_text));
error.SetErrorString(error_text);
if (llvm::Error err = regex.GetError()) {
error.SetErrorString(llvm::toString(std::move(err)));
return FilterRuleSP();
}

View File

@ -540,19 +540,15 @@ void DWARFUnit::ParseProducerInfo() {
} else if (strstr(producer_cstr, "clang")) {
static RegularExpression g_clang_version_regex(
llvm::StringRef("clang-([0-9]+)\\.([0-9]+)\\.([0-9]+)"));
RegularExpression::Match regex_match(3);
llvm::SmallVector<llvm::StringRef, 4> matches;
if (g_clang_version_regex.Execute(llvm::StringRef(producer_cstr),
&regex_match)) {
std::string str;
if (regex_match.GetMatchAtIndex(producer_cstr, 1, str))
m_producer_version_major =
StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
if (regex_match.GetMatchAtIndex(producer_cstr, 2, str))
m_producer_version_minor =
StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
if (regex_match.GetMatchAtIndex(producer_cstr, 3, str))
m_producer_version_update =
StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
&matches)) {
m_producer_version_major =
StringConvert::ToUInt32(matches[1].str().c_str(), UINT32_MAX, 10);
m_producer_version_minor =
StringConvert::ToUInt32(matches[2].str().c_str(), UINT32_MAX, 10);
m_producer_version_update =
StringConvert::ToUInt32(matches[3].str().c_str(), UINT32_MAX, 10);
}
m_producer = eProducerClang;
} else if (strstr(producer_cstr, "GNU"))

View File

@ -575,18 +575,15 @@ bool ObjectFile::SplitArchivePathWithObject(const char *path_with_object,
FileSpec &archive_file,
ConstString &archive_object,
bool must_exist) {
llvm::SmallVector<llvm::StringRef, 3> matches;
RegularExpression g_object_regex(llvm::StringRef("(.*)\\(([^\\)]+)\\)$"));
RegularExpression::Match regex_match(2);
if (g_object_regex.Execute(llvm::StringRef::withNullAsEmpty(path_with_object),
&regex_match)) {
std::string path;
std::string obj;
if (regex_match.GetMatchAtIndex(path_with_object, 1, path) &&
regex_match.GetMatchAtIndex(path_with_object, 2, obj)) {
archive_file.SetFile(path, FileSpec::Style::native);
archive_object.SetCString(obj.c_str());
return !(must_exist && !FileSystem::Instance().Exists(archive_file));
}
&matches)) {
std::string path = matches[1].str();
std::string obj = matches[2].str();
archive_file.SetFile(path, FileSpec::Style::native);
archive_object.SetCString(obj.c_str());
return !(must_exist && !FileSystem::Instance().Exists(archive_file));
}
return false;
}

View File

@ -390,21 +390,15 @@ Status Variable::GetValuesForVariableExpressionPath(
default: {
static RegularExpression g_regex(
llvm::StringRef("^([A-Za-z_:][A-Za-z_0-9:]*)(.*)"));
RegularExpression::Match regex_match(1);
std::string variable_name;
llvm::SmallVector<llvm::StringRef, 2> matches;
variable_list.Clear();
if (!g_regex.Execute(variable_expr_path, &regex_match)) {
error.SetErrorStringWithFormat(
"unable to extract a variable name from '%s'",
variable_expr_path.str().c_str());
return error;
}
if (!regex_match.GetMatchAtIndex(variable_expr_path, 1, variable_name)) {
if (!g_regex.Execute(variable_expr_path, &matches)) {
error.SetErrorStringWithFormat(
"unable to extract a variable name from '%s'",
variable_expr_path.str().c_str());
return error;
}
std::string variable_name = matches[1].str();
if (!callback(baton, variable_name.c_str(), variable_list)) {
error.SetErrorString("unknown error");
return error;

View File

@ -361,26 +361,17 @@ bool ThreadPlanStepInRange::FrameMatchesAvoidCriteria() {
sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
.GetCString();
if (frame_function_name) {
size_t num_matches = 0;
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP));
if (log)
num_matches = 1;
RegularExpression::Match regex_match(num_matches);
llvm::SmallVector<llvm::StringRef, 2> matches;
bool return_value =
avoid_regexp_to_use->Execute(frame_function_name, &regex_match);
if (return_value) {
if (log) {
std::string match;
regex_match.GetMatchAtIndex(frame_function_name, 0, match);
LLDB_LOGF(log,
"Stepping out of function \"%s\" because it matches "
"the avoid regexp \"%s\" - match substring: \"%s\".",
frame_function_name,
avoid_regexp_to_use->GetText().str().c_str(),
match.c_str());
}
avoid_regexp_to_use->Execute(frame_function_name, &matches);
if (return_value && matches.size() > 1) {
std::string match = matches[1].str();
LLDB_LOGF(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP),
"Stepping out of function \"%s\" because it matches "
"the avoid regexp \"%s\" - match substring: \"%s\".",
frame_function_name,
avoid_regexp_to_use->GetText().str().c_str(),
match.c_str());
}
return return_value;
}

View File

@ -8,151 +8,40 @@
#include "lldb/Utility/RegularExpression.h"
#include "llvm/ADT/StringRef.h"
#include <string>
// Enable enhanced mode if it is available. This allows for things like \d for
// digit, \s for space, and many more, but it isn't available everywhere.
#if defined(REG_ENHANCED)
#define DEFAULT_COMPILE_FLAGS (REG_ENHANCED | REG_EXTENDED)
#else
#define DEFAULT_COMPILE_FLAGS (REG_EXTENDED)
#endif
using namespace lldb_private;
RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() {
memset(&m_preg, 0, sizeof(m_preg));
}
// Constructor that compiles "re" using "flags" and stores the resulting
// compiled regular expression into this object.
RegularExpression::RegularExpression(llvm::StringRef str)
: RegularExpression() {
Compile(str);
}
RegularExpression::RegularExpression(llvm::StringRef str) { Compile(str); }
RegularExpression::RegularExpression(const RegularExpression &rhs)
: RegularExpression() {
Compile(rhs.GetText());
}
const RegularExpression &RegularExpression::
operator=(const RegularExpression &rhs) {
if (&rhs != this)
Compile(rhs.GetText());
return *this;
}
// Destructor
//
// Any previously compiled regular expression contained in this object will be
// freed.
RegularExpression::~RegularExpression() { Free(); }
// Compile a regular expression using the supplied regular expression text and
// flags. The compiled regular expression lives in this object so that it can
// be readily used for regular expression matches. Execute() can be called
// after the regular expression is compiled. Any previously compiled regular
// expression contained in this object will be freed.
//
// RETURNS
// True if the regular expression compiles successfully, false
// otherwise.
bool RegularExpression::Compile(llvm::StringRef str) {
Free();
// regcomp() on darwin does not recognize "" as a valid regular expression,
// so we substitute it with an equivalent non-empty one.
m_re = str.empty() ? "()" : str;
m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS);
return m_comp_err == 0;
m_regex_text = str;
m_regex = llvm::Regex(str);
return IsValid();
}
// Execute a regular expression match using the compiled regular expression
// that is already in this object against the match string "s". If any parens
// are used for regular expression matches "match_count" should indicate the
// number of regmatch_t values that are present in "match_ptr". The regular
// expression will be executed using the "execute_flags".
bool RegularExpression::Execute(llvm::StringRef str, Match *match) const {
int err = 1;
if (m_comp_err == 0) {
// Argument to regexec must be null-terminated.
std::string reg_str = str;
if (match) {
err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(),
match->GetData(), 0);
} else {
err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0);
}
}
if (err != 0) {
// The regular expression didn't compile, so clear the matches
if (match)
match->Clear();
return false;
}
return true;
bool RegularExpression::Execute(
llvm::StringRef str,
llvm::SmallVectorImpl<llvm::StringRef> *matches) const {
return m_regex.match(str, matches);
}
bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
std::string &match_str) const {
llvm::StringRef match_str_ref;
if (GetMatchAtIndex(s, idx, match_str_ref)) {
match_str = match_str_ref.str();
return true;
}
return false;
bool RegularExpression::IsValid() const {
std::string discarded;
return m_regex.isValid(discarded);
}
bool RegularExpression::Match::GetMatchAtIndex(
llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const {
if (idx < m_matches.size()) {
if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1)
return false;
llvm::StringRef RegularExpression::GetText() const { return m_regex_text; }
if (m_matches[idx].rm_eo == m_matches[idx].rm_so) {
// Matched the empty string...
match_str = llvm::StringRef();
return true;
} else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) {
match_str = s.substr(m_matches[idx].rm_so,
m_matches[idx].rm_eo - m_matches[idx].rm_so);
return true;
}
}
return false;
}
// Returns true if the regular expression compiled and is ready for execution.
bool RegularExpression::IsValid() const { return m_comp_err == 0; }
// Returns the text that was used to compile the current regular expression.
llvm::StringRef RegularExpression::GetText() const { return m_re; }
// Free any contained compiled regular expressions.
void RegularExpression::Free() {
if (m_comp_err == 0) {
m_re.clear();
regfree(&m_preg);
// Set a compile error since we no longer have a valid regex
m_comp_err = 1;
}
}
size_t RegularExpression::GetErrorAsCString(char *err_str,
size_t err_str_max_len) const {
if (m_comp_err == 0) {
if (err_str && err_str_max_len)
*err_str = '\0';
return 0;
}
return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len);
}
bool RegularExpression::operator<(const RegularExpression &rhs) const {
return (m_re < rhs.m_re);
llvm::Error RegularExpression::GetError() const {
std::string error;
if (!m_regex.isValid(error))
return llvm::make_error<llvm::StringError>(llvm::inconvertibleErrorCode(),
error);
return llvm::Error::success();
}

View File

@ -21,8 +21,9 @@ add_lldb_unittest(UtilityTests
RangeMapTest.cpp
RangeTest.cpp
RegisterValueTest.cpp
ReproducerTest.cpp
RegularExpressionTest.cpp
ReproducerInstrumentationTest.cpp
ReproducerTest.cpp
ScalarTest.cpp
StateTest.cpp
StatusTest.cpp

View File

@ -49,8 +49,8 @@ TEST(NameMatchesTest, EndsWith) {
TEST(NameMatchesTest, RegularExpression) {
EXPECT_TRUE(NameMatches("foobar", NameMatch::RegularExpression, "foo"));
EXPECT_TRUE(NameMatches("foobar", NameMatch::RegularExpression, "f[oa]o"));
EXPECT_TRUE(NameMatches("foo", NameMatch::RegularExpression, ""));
EXPECT_TRUE(NameMatches("", NameMatch::RegularExpression, ""));
EXPECT_FALSE(NameMatches("foo", NameMatch::RegularExpression, ""));
EXPECT_FALSE(NameMatches("", NameMatch::RegularExpression, ""));
EXPECT_FALSE(NameMatches("foo", NameMatch::RegularExpression, "b"));
EXPECT_FALSE(NameMatches("", NameMatch::RegularExpression, "b"));
EXPECT_FALSE(NameMatches("^a", NameMatch::RegularExpression, "^a"));

View File

@ -0,0 +1,65 @@
//===-- RegularExpressionTest.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/Utility/RegularExpression.h"
#include "llvm/ADT/SmallVector.h"
#include "gtest/gtest.h"
using namespace lldb_private;
using namespace llvm;
TEST(RegularExpression, Valid) {
RegularExpression r1("^[0-9]+$");
cantFail(r1.GetError());
EXPECT_TRUE(r1.IsValid());
EXPECT_EQ("^[0-9]+$", r1.GetText());
EXPECT_TRUE(r1.Execute("916"));
}
TEST(RegularExpression, CopyAssignment) {
RegularExpression r1("^[0-9]+$");
RegularExpression r2 = r1;
cantFail(r2.GetError());
EXPECT_TRUE(r2.IsValid());
EXPECT_EQ("^[0-9]+$", r2.GetText());
EXPECT_TRUE(r2.Execute("916"));
}
TEST(RegularExpression, Empty) {
RegularExpression r1("");
Error err = r1.GetError();
EXPECT_TRUE(static_cast<bool>(err));
consumeError(std::move(err));
EXPECT_FALSE(r1.IsValid());
EXPECT_EQ("", r1.GetText());
EXPECT_FALSE(r1.Execute("916"));
}
TEST(RegularExpression, Invalid) {
RegularExpression r1("a[b-");
Error err = r1.GetError();
EXPECT_TRUE(static_cast<bool>(err));
consumeError(std::move(err));
EXPECT_FALSE(r1.IsValid());
EXPECT_EQ("a[b-", r1.GetText());
EXPECT_FALSE(r1.Execute("ab"));
}
TEST(RegularExpression, Match) {
RegularExpression r1("[0-9]+([a-f])?:([0-9]+)");
cantFail(r1.GetError());
EXPECT_TRUE(r1.IsValid());
EXPECT_EQ("[0-9]+([a-f])?:([0-9]+)", r1.GetText());
SmallVector<StringRef, 3> matches;
EXPECT_TRUE(r1.Execute("9a:513b", &matches));
EXPECT_EQ(3u, matches.size());
EXPECT_EQ("9a:513", matches[0].str());
EXPECT_EQ("a", matches[1].str());
EXPECT_EQ("513", matches[2].str());
}