Extend SpecialCaseList to allow users to blame matches on entries in the file.

Summary:
Extends SCL functionality to allow users to find the line number in the file the SCL is built from through SpecialCaseList::inSectionBlame(...).

Also removes the need to compile the SCL before use. As the matcher now contains a list of regexes to test against instead of a single regex, the regexes can be individually built on each insertion rather than one large compilation at the end of construction.

This change also fixes a bug where blank lines would cause the parser to become out-of-sync with the line number. An error on line `k` was being reported as being on line `k - num_blank_lines_before_k`.

Note: This change has a cyclical dependency on D39486. Both these changes must be submitted at the same time to avoid a build breakage.

Reviewers: vlad.tsyrklevich

Reviewed By: vlad.tsyrklevich

Subscribers: kcc, pcc, llvm-commits

Differential Revision: https://reviews.llvm.org/D39485

llvm-svn: 317617
This commit is contained in:
Mitch Phillips 2017-11-07 21:16:46 +00:00
parent 67127eb114
commit c4d16d7686
3 changed files with 85 additions and 63 deletions

View File

@ -89,6 +89,17 @@ public:
bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
StringRef Category = StringRef()) const;
/// Returns the line number corresponding to the special case list entry if
/// the special case list contains a line
/// \code
/// @Prefix:<E>=@Category
/// \endcode
/// where @Query satisfies wildcard expression <E> in a given @Section.
/// Returns zero if there is no blacklist entry corresponding to this
/// expression.
unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
StringRef Category = StringRef()) const;
protected:
// Implementations of the create*() functions that can also be used by derived
// classes.
@ -96,25 +107,25 @@ protected:
std::string &Error);
bool createInternal(const MemoryBuffer *MB, std::string &Error);
SpecialCaseList() = default;
SpecialCaseList(SpecialCaseList const &) = delete;
SpecialCaseList &operator=(SpecialCaseList const &) = delete;
/// Represents a set of regular expressions. Regular expressions which are
/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
/// others are represented as a single pipe-separated regex in RegEx. The
/// reason for doing so is efficiency; StringSet is much faster at matching
/// "literal" (i.e. no regex metacharacters) are stored in Strings. The
/// reason for doing so is efficiency; StringMap is much faster at matching
/// literal strings than Regex.
class Matcher {
public:
bool insert(std::string Regexp, std::string &REError);
void compile();
bool match(StringRef Query) const;
bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
// Returns the line number in the source file that this query matches to.
// Returns zero if no match is found.
unsigned match(StringRef Query) const;
private:
StringSet<> Strings;
StringMap<unsigned> Strings;
TrigramIndex Trigrams;
std::unique_ptr<Regex> RegEx;
std::string UncompiledRegEx;
std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
};
using SectionEntries = StringMap<StringMap<Matcher>>;
@ -127,19 +138,15 @@ protected:
};
std::vector<Section> Sections;
bool IsCompiled;
SpecialCaseList();
/// Parses just-constructed SpecialCaseList entries from a memory buffer.
bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
std::string &Error);
/// compile() should be called once, after parsing all the memory buffers.
void compile();
// Helper method for derived classes to search by Prefix, Query, and Category
// once they have already resolved a section entry.
bool inSection(const SectionEntries &Entries, StringRef Prefix,
StringRef Query, StringRef Category) const;
unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
StringRef Query, StringRef Category) const;
};
} // namespace llvm

View File

@ -27,6 +27,7 @@
namespace llvm {
bool SpecialCaseList::Matcher::insert(std::string Regexp,
unsigned LineNumber,
std::string &REError) {
if (Regexp.empty()) {
REError = "Supplied regexp was blank";
@ -34,7 +35,7 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp,
}
if (Regex::isLiteralERE(Regexp)) {
Strings.insert(Regexp);
Strings[Regexp] = LineNumber;
return true;
}
Trigrams.insert(Regexp);
@ -45,34 +46,30 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp,
Regexp.replace(pos, strlen("*"), ".*");
}
Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
// Check that the regexp is valid.
Regex CheckRE(Regexp);
if (!CheckRE.isValid(REError))
return false;
if (!UncompiledRegEx.empty())
UncompiledRegEx += "|";
UncompiledRegEx += "^(" + Regexp + ")$";
RegExes.emplace_back(
std::make_pair(make_unique<Regex>(std::move(CheckRE)), LineNumber));
return true;
}
void SpecialCaseList::Matcher::compile() {
if (!UncompiledRegEx.empty()) {
RegEx.reset(new Regex(UncompiledRegEx));
UncompiledRegEx.clear();
}
}
bool SpecialCaseList::Matcher::match(StringRef Query) const {
if (Strings.count(Query))
return true;
unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
auto It = Strings.find(Query);
if (It != Strings.end())
return It->second;
if (Trigrams.isDefinitelyOut(Query))
return false;
return RegEx && RegEx->match(Query);
for (auto& RegExKV : RegExes)
if (RegExKV.first->match(Query))
return RegExKV.second;
return 0;
}
SpecialCaseList::SpecialCaseList() : Sections(), IsCompiled(false) {}
std::unique_ptr<SpecialCaseList>
SpecialCaseList::create(const std::vector<std::string> &Paths,
std::string &Error) {
@ -114,7 +111,6 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
return false;
}
}
compile();
return true;
}
@ -123,7 +119,6 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
StringMap<size_t> Sections;
if (!parse(MB, Sections, Error))
return false;
compile();
return true;
}
@ -132,11 +127,13 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
std::string &Error) {
// Iterate through each line in the blacklist file.
SmallVector<StringRef, 16> Lines;
SplitString(MB->getBuffer(), Lines, "\n\r");
MB->getBuffer().split(Lines, '\n');
int LineNo = 1;
unsigned LineNo = 1;
StringRef Section = "*";
for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) {
*I = I->trim();
// Ignore empty lines and lines starting with "#"
if (I->empty() || I->startswith("#"))
continue;
@ -181,11 +178,10 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
if (SectionsMap.find(Section) == SectionsMap.end()) {
std::unique_ptr<Matcher> M = make_unique<Matcher>();
std::string REError;
if (!M->insert(Section, REError)) {
if (!M->insert(Section, LineNo, REError)) {
Error = (Twine("malformed section ") + Section + ": '" + REError).str();
return false;
}
M->compile();
SectionsMap[Section] = Sections.size();
Sections.emplace_back(std::move(M));
@ -193,7 +189,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category];
std::string REError;
if (!Entry.insert(std::move(Regexp), REError)) {
if (!Entry.insert(std::move(Regexp), LineNo, REError)) {
Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" +
SplitLine.second + "': " + REError).str();
return false;
@ -202,38 +198,33 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
return true;
}
void SpecialCaseList::compile() {
assert(!IsCompiled && "compile() should only be called once");
// Iterate through every section compiling regular expressions for every query
// and creating Section entries.
for (auto &Section : Sections)
for (auto &Prefix : Section.Entries)
for (auto &Category : Prefix.getValue())
Category.getValue().compile();
IsCompiled = true;
}
SpecialCaseList::~SpecialCaseList() {}
bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
StringRef Query, StringRef Category) const {
assert(IsCompiled && "SpecialCaseList::compile() was not called!");
for (auto &SectionIter : Sections)
if (SectionIter.SectionMatcher->match(Section) &&
inSection(SectionIter.Entries, Prefix, Query, Category))
return true;
return false;
return inSectionBlame(Section, Prefix, Query, Category);
}
bool SpecialCaseList::inSection(const SectionEntries &Entries, StringRef Prefix,
StringRef Query, StringRef Category) const {
unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query,
StringRef Category) const {
for (auto &SectionIter : Sections)
if (SectionIter.SectionMatcher->match(Section)) {
unsigned Blame =
inSectionBlame(SectionIter.Entries, Prefix, Query, Category);
if (Blame)
return Blame;
}
return 0;
}
unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
StringRef Prefix, StringRef Query,
StringRef Category) const {
SectionEntries::const_iterator I = Entries.find(Prefix);
if (I == Entries.end()) return false;
if (I == Entries.end()) return 0;
StringMap<Matcher>::const_iterator II = I->second.find(Category);
if (II == I->second.end()) return false;
if (II == I->second.end()) return 0;
return II->getValue().match(Query);
}

View File

@ -58,6 +58,30 @@ TEST_F(SpecialCaseListTest, Basic) {
EXPECT_FALSE(SCL->inSection("", "src", "hi"));
EXPECT_FALSE(SCL->inSection("", "fun", "hello"));
EXPECT_FALSE(SCL->inSection("", "src", "hello", "category"));
EXPECT_EQ(3u, SCL->inSectionBlame("", "src", "hello"));
EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "bye"));
EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "hi", "category"));
EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "zzzz", "category"));
EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hi"));
EXPECT_EQ(0u, SCL->inSectionBlame("", "fun", "hello"));
EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hello", "category"));
}
TEST_F(SpecialCaseListTest, CorrectErrorLineNumberWithBlankLine) {
std::string Error;
EXPECT_EQ(nullptr, makeSpecialCaseList("# This is a comment.\n"
"\n"
"[not valid\n",
Error));
EXPECT_TRUE(
((StringRef)Error).startswith("malformed section header on line 3:"));
EXPECT_EQ(nullptr, makeSpecialCaseList("\n\n\n"
"[not valid\n",
Error));
EXPECT_TRUE(
((StringRef)Error).startswith("malformed section header on line 4:"));
}
TEST_F(SpecialCaseListTest, SectionRegexErrorHandling) {