mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-06 17:51:44 +00:00

Summary: it's often the case when the rules in the SpecialCaseList are of the form hel.o*bar. That gives us a chance to build trigram index to quickly discard 99% of inputs without running a full regex. A similar idea was used in Google Code Search as described in the blog post: https://swtch.com/~rsc/regexp/regexp4.html The check is defeated, if there's at least one regex more complicated than that. In this case, all inputs will go through the regex. That said, the real-world rules are often simple or can be simplied. That considerably speeds up compiling Chromium with CFI and UBSan. As measured on Chromium's content_message_generator.cc: before, CFI: 44 s after, CFI: 23 s after, CFI, no blacklist: 23 s (~1% slower, but 3 runs were unable to show the difference) after, regular compilation to bitcode: 23 s Reviewers: pcc Subscribers: mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D27188 llvm-svn: 288303
113 lines
3.2 KiB
C++
113 lines
3.2 KiB
C++
//===- TrigramIndexTest.cpp - Unit tests for TrigramIndex -----------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/Support/TrigramIndex.h"
|
|
#include "gtest/gtest.h"
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
class TrigramIndexTest : public ::testing::Test {
|
|
protected:
|
|
std::unique_ptr<TrigramIndex> makeTrigramIndex(
|
|
std::vector<std::string> Rules) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
make_unique<TrigramIndex>();
|
|
for (auto &Rule : Rules)
|
|
TI->insert(Rule);
|
|
return TI;
|
|
}
|
|
};
|
|
|
|
TEST_F(TrigramIndexTest, Empty) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({});
|
|
EXPECT_FALSE(TI->isDefeated());
|
|
EXPECT_TRUE(TI->isDefinitelyOut("foo"));
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, Basic) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"*hello*", "*wor.d*"});
|
|
EXPECT_FALSE(TI->isDefeated());
|
|
EXPECT_TRUE(TI->isDefinitelyOut("foo"));
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, NoTrigramsInRules) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"b.r", "za*az"});
|
|
EXPECT_TRUE(TI->isDefeated());
|
|
EXPECT_FALSE(TI->isDefinitelyOut("foo"));
|
|
EXPECT_FALSE(TI->isDefinitelyOut("bar"));
|
|
EXPECT_FALSE(TI->isDefinitelyOut("zakaz"));
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, NoTrigramsInARule) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"*hello*", "*wo.ld*"});
|
|
EXPECT_TRUE(TI->isDefeated());
|
|
EXPECT_FALSE(TI->isDefinitelyOut("foo"));
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, RepetitiveRule) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"*bar*bar*bar*bar*bar", "bar*bar"});
|
|
EXPECT_FALSE(TI->isDefeated());
|
|
EXPECT_TRUE(TI->isDefinitelyOut("foo"));
|
|
EXPECT_TRUE(TI->isDefinitelyOut("bar"));
|
|
EXPECT_FALSE(TI->isDefinitelyOut("barbara"));
|
|
EXPECT_FALSE(TI->isDefinitelyOut("bar+bar"));
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, PopularTrigram) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"*aaa*", "*aaaa*", "*aaaaa*", "*aaaaa*", "*aaaaaa*"});
|
|
EXPECT_TRUE(TI->isDefeated());
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, PopularTrigram2) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h", "class.h"});
|
|
EXPECT_TRUE(TI->isDefeated());
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, TooComplicatedRegex) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"[0-9]+"});
|
|
EXPECT_TRUE(TI->isDefeated());
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, TooComplicatedRegex2) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"foo|bar"});
|
|
EXPECT_TRUE(TI->isDefeated());
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, SpecialSymbol) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"*c\\+\\+*"});
|
|
EXPECT_TRUE(TI->isDefeated());
|
|
}
|
|
|
|
TEST_F(TrigramIndexTest, Sequence) {
|
|
std::unique_ptr<TrigramIndex> TI =
|
|
makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h"});
|
|
EXPECT_FALSE(TI->isDefeated());
|
|
EXPECT_FALSE(TI->isDefinitelyOut("class1"));
|
|
EXPECT_TRUE(TI->isDefinitelyOut("class.h"));
|
|
EXPECT_TRUE(TI->isDefinitelyOut("class"));
|
|
}
|
|
|
|
} // namespace
|