mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-23 01:20:03 +00:00
Move GlobPattern class from LLD to llvm/Support.
GlobPattern is a class to handle glob pattern matching. Currently only LLD is using that, but technically that feature is not specific to linkers, so in this patch I move that file to LLVM. Differential Revision: https://reviews.llvm.org/D27969 llvm-svn: 290212
This commit is contained in:
parent
e909a6ed35
commit
f7a7ab59af
@ -21,134 +21,14 @@ using namespace llvm;
|
||||
using namespace lld;
|
||||
using namespace lld::elf;
|
||||
|
||||
// This is a scanner for the glob pattern.
|
||||
// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
|
||||
// (which is a negative form of "[<chars>]"), or a non-meta character.
|
||||
// This function returns the first token in S.
|
||||
BitVector GlobPattern::scan(StringRef &S) {
|
||||
switch (S[0]) {
|
||||
case '*':
|
||||
S = S.substr(1);
|
||||
// '*' is represented by an empty bitvector.
|
||||
// All other bitvectors are 256-bit long.
|
||||
return BitVector();
|
||||
case '?':
|
||||
S = S.substr(1);
|
||||
return BitVector(256, true);
|
||||
case '[': {
|
||||
size_t End = S.find(']', 1);
|
||||
if (End == StringRef::npos) {
|
||||
error("invalid glob pattern: " + Original);
|
||||
S = "";
|
||||
return BitVector(256, false);
|
||||
}
|
||||
StringRef Chars = S.substr(1, End - 1);
|
||||
S = S.substr(End + 1);
|
||||
if (Chars.startswith("^"))
|
||||
return expand(Chars.substr(1)).flip();
|
||||
return expand(Chars);
|
||||
}
|
||||
default:
|
||||
BitVector BV(256, false);
|
||||
BV[S[0]] = true;
|
||||
S = S.substr(1);
|
||||
return BV;
|
||||
}
|
||||
}
|
||||
|
||||
// Expands character ranges and returns a bitmap.
|
||||
// For example, "a-cf-hz" is expanded to "abcfghz".
|
||||
BitVector GlobPattern::expand(StringRef S) {
|
||||
BitVector BV(256, false);
|
||||
|
||||
// Expand "x-y".
|
||||
for (;;) {
|
||||
if (S.size() < 3)
|
||||
break;
|
||||
|
||||
// If it doesn't start with something like "x-y",
|
||||
// consume the first character and proceed.
|
||||
if (S[1] != '-') {
|
||||
BV[S[0]] = true;
|
||||
S = S.substr(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// It must be in the form of "x-y".
|
||||
// Validate it and then interpret the range.
|
||||
if (S[0] > S[2]) {
|
||||
error("invalid glob pattern: " + Original);
|
||||
return BV;
|
||||
}
|
||||
for (int C = S[0]; C <= S[2]; ++C)
|
||||
BV[C] = true;
|
||||
S = S.substr(3);
|
||||
}
|
||||
|
||||
for (char C : S)
|
||||
BV[C] = true;
|
||||
return BV;
|
||||
}
|
||||
|
||||
GlobPattern::GlobPattern(StringRef S) : Original(S) {
|
||||
if (!hasWildcard(S)) {
|
||||
// S doesn't contain any metacharacter,
|
||||
// so the regular string comparison should work.
|
||||
Exact = S;
|
||||
} else if (S.endswith("*") && !hasWildcard(S.drop_back())) {
|
||||
// S is something like "foo*". We can use startswith().
|
||||
Prefix = S.drop_back();
|
||||
} else if (S.startswith("*") && !hasWildcard(S.drop_front())) {
|
||||
// S is something like "*foo". We can use endswith().
|
||||
Suffix = S.drop_front();
|
||||
} else {
|
||||
// Otherwise, we need to do real glob pattern matching.
|
||||
// Parse the pattern now.
|
||||
while (!S.empty())
|
||||
Tokens.push_back(scan(S));
|
||||
}
|
||||
}
|
||||
|
||||
bool GlobPattern::match(StringRef S) const {
|
||||
if (Exact)
|
||||
return S == *Exact;
|
||||
if (Prefix)
|
||||
return S.startswith(*Prefix);
|
||||
if (Suffix)
|
||||
return S.endswith(*Suffix);
|
||||
return matchOne(Tokens, S);
|
||||
}
|
||||
|
||||
// Runs glob pattern Pats against string S.
|
||||
bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
|
||||
for (;;) {
|
||||
if (Pats.empty())
|
||||
return S.empty();
|
||||
|
||||
// If Pats[0] is '*', try to match Pats[1..] against all possible
|
||||
// substrings of S to see at least one pattern succeeds.
|
||||
if (Pats[0].size() == 0) {
|
||||
Pats = Pats.slice(1);
|
||||
if (Pats.empty())
|
||||
// Fast path. If a pattern is '*', it matches anything.
|
||||
return true;
|
||||
for (size_t I = 0, E = S.size(); I < E; ++I)
|
||||
if (matchOne(Pats, S.substr(I)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// If Pats[0] is not '*', it must consume one character.
|
||||
if (S.empty() || !Pats[0][S[0]])
|
||||
return false;
|
||||
Pats = Pats.slice(1);
|
||||
S = S.substr(1);
|
||||
}
|
||||
}
|
||||
|
||||
StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) {
|
||||
for (StringRef S : Pat)
|
||||
Patterns.push_back(GlobPattern(S));
|
||||
for (StringRef S : Pat) {
|
||||
Expected<GlobPattern> Pat = GlobPattern::create(S);
|
||||
if (!Pat)
|
||||
error(toString(Pat.takeError()));
|
||||
else
|
||||
Patterns.push_back(*Pat);
|
||||
}
|
||||
}
|
||||
|
||||
bool StringMatcher::match(StringRef S) const {
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/GlobPattern.h"
|
||||
#include <vector>
|
||||
|
||||
namespace lld {
|
||||
@ -56,30 +57,6 @@ private:
|
||||
mutable size_t Size;
|
||||
};
|
||||
|
||||
// This class represents a glob pattern. Supported metacharacters
|
||||
// are "*", "?", "[<chars>]" and "[^<chars>]".
|
||||
class GlobPattern {
|
||||
public:
|
||||
explicit GlobPattern(StringRef Pat);
|
||||
bool match(StringRef S) const;
|
||||
|
||||
private:
|
||||
bool matchOne(ArrayRef<llvm::BitVector> Pat, StringRef S) const;
|
||||
llvm::BitVector scan(StringRef &S);
|
||||
llvm::BitVector expand(StringRef S);
|
||||
|
||||
// Parsed glob pattern.
|
||||
std::vector<llvm::BitVector> Tokens;
|
||||
|
||||
// A glob pattern given to this class. This is for error reporting.
|
||||
StringRef Original;
|
||||
|
||||
// The following members are for optimization.
|
||||
llvm::Optional<StringRef> Exact;
|
||||
llvm::Optional<StringRef> Prefix;
|
||||
llvm::Optional<StringRef> Suffix;
|
||||
};
|
||||
|
||||
// This class represents multiple glob patterns.
|
||||
class StringMatcher {
|
||||
public:
|
||||
@ -89,7 +66,7 @@ public:
|
||||
bool match(StringRef S) const;
|
||||
|
||||
private:
|
||||
std::vector<GlobPattern> Patterns;
|
||||
std::vector<llvm::GlobPattern> Patterns;
|
||||
};
|
||||
|
||||
// Returns a demangled C++ symbol name. If Name is not a mangled
|
||||
|
48
llvm/include/llvm/Support/GlobPattern.h
Normal file
48
llvm/include/llvm/Support/GlobPattern.h
Normal file
@ -0,0 +1,48 @@
|
||||
//===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements a glob pattern matcher. The glob pattern is the
|
||||
// rule used by the shell.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_SUPPORT_GLOB_PATTERN_H
|
||||
#define LLVM_SUPPORT_GLOB_PATTERN_H
|
||||
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include <vector>
|
||||
|
||||
// This class represents a glob pattern. Supported metacharacters
|
||||
// are "*", "?", "[<chars>]" and "[^<chars>]".
|
||||
namespace llvm {
|
||||
class BitVector;
|
||||
template <typename T> class ArrayRef;
|
||||
|
||||
class GlobPattern {
|
||||
public:
|
||||
static Expected<GlobPattern> create(StringRef Pat);
|
||||
bool match(StringRef S) const;
|
||||
|
||||
private:
|
||||
bool matchOne(ArrayRef<BitVector> Pat, StringRef S) const;
|
||||
|
||||
// Parsed glob pattern.
|
||||
std::vector<BitVector> Tokens;
|
||||
|
||||
// The following members are for optimization.
|
||||
Optional<StringRef> Exact;
|
||||
Optional<StringRef> Prefix;
|
||||
Optional<StringRef> Suffix;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // LLVM_SUPPORT_GLOB_PATTERN_H
|
@ -56,6 +56,7 @@ add_llvm_library(LLVMSupport
|
||||
FoldingSet.cpp
|
||||
FormattedStream.cpp
|
||||
FormatVariadic.cpp
|
||||
GlobPattern.cpp
|
||||
GraphWriter.cpp
|
||||
Hashing.cpp
|
||||
IntEqClasses.cpp
|
||||
|
167
llvm/lib/Support/GlobPattern.cpp
Normal file
167
llvm/lib/Support/GlobPattern.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements a glob pattern matcher.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Support/GlobPattern.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static bool hasWildcard(StringRef S) {
|
||||
return S.find_first_of("?*[") != StringRef::npos;
|
||||
}
|
||||
|
||||
// Expands character ranges and returns a bitmap.
|
||||
// For example, "a-cf-hz" is expanded to "abcfghz".
|
||||
static Expected<BitVector> expand(StringRef S, StringRef Original) {
|
||||
BitVector BV(256, false);
|
||||
|
||||
// Expand X-Y.
|
||||
for (;;) {
|
||||
if (S.size() < 3)
|
||||
break;
|
||||
|
||||
// If it doesn't start with something like X-Y,
|
||||
// consume the first character and proceed.
|
||||
if (S[1] != '-') {
|
||||
BV[S[0]] = true;
|
||||
S = S.substr(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// It must be in the form of X-Y.
|
||||
// Validate it and then interpret the range.
|
||||
if (S[0] > S[2])
|
||||
return make_error<StringError>("invalid glob pattern: " + Original,
|
||||
errc::invalid_argument);
|
||||
|
||||
for (int C = S[0]; C <= S[2]; ++C)
|
||||
BV[C] = true;
|
||||
S = S.substr(3);
|
||||
}
|
||||
|
||||
for (char C : S)
|
||||
BV[C] = true;
|
||||
return BV;
|
||||
}
|
||||
|
||||
// This is a scanner for the glob pattern.
|
||||
// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
|
||||
// (which is a negative form of "[<chars>]"), or a non-meta character.
|
||||
// This function returns the first token in S.
|
||||
static Expected<BitVector> scan(StringRef &S, StringRef Original) {
|
||||
switch (S[0]) {
|
||||
case '*':
|
||||
S = S.substr(1);
|
||||
// '*' is represented by an empty bitvector.
|
||||
// All other bitvectors are 256-bit long.
|
||||
return BitVector();
|
||||
case '?':
|
||||
S = S.substr(1);
|
||||
return BitVector(256, true);
|
||||
case '[': {
|
||||
size_t End = S.find(']', 1);
|
||||
if (End == StringRef::npos)
|
||||
return make_error<StringError>("invalid glob pattern: " + Original,
|
||||
errc::invalid_argument);
|
||||
|
||||
StringRef Chars = S.substr(1, End - 1);
|
||||
S = S.substr(End + 1);
|
||||
if (Chars.startswith("^")) {
|
||||
Expected<BitVector> BV = expand(Chars.substr(1), Original);
|
||||
if (!BV)
|
||||
return BV.takeError();
|
||||
return BV->flip();
|
||||
}
|
||||
return expand(Chars, Original);
|
||||
}
|
||||
default:
|
||||
BitVector BV(256, false);
|
||||
BV[S[0]] = true;
|
||||
S = S.substr(1);
|
||||
return BV;
|
||||
}
|
||||
}
|
||||
|
||||
Expected<GlobPattern> GlobPattern::create(StringRef S) {
|
||||
GlobPattern Pat;
|
||||
|
||||
// S doesn't contain any metacharacter,
|
||||
// so the regular string comparison should work.
|
||||
if (!hasWildcard(S)) {
|
||||
Pat.Exact = S;
|
||||
return Pat;
|
||||
}
|
||||
|
||||
// S is something like "foo*". We can use startswith().
|
||||
if (S.endswith("*") && !hasWildcard(S.drop_back())) {
|
||||
Pat.Prefix = S.drop_back();
|
||||
return Pat;
|
||||
}
|
||||
|
||||
// S is something like "*foo". We can use endswith().
|
||||
if (S.startswith("*") && !hasWildcard(S.drop_front())) {
|
||||
Pat.Suffix = S.drop_front();
|
||||
return Pat;
|
||||
}
|
||||
|
||||
// Otherwise, we need to do real glob pattern matching.
|
||||
// Parse the pattern now.
|
||||
StringRef Original = S;
|
||||
while (!S.empty()) {
|
||||
Expected<BitVector> BV = scan(S, Original);
|
||||
if (!BV)
|
||||
return BV.takeError();
|
||||
Pat.Tokens.push_back(*BV);
|
||||
}
|
||||
return Pat;
|
||||
}
|
||||
|
||||
bool GlobPattern::match(StringRef S) const {
|
||||
if (Exact)
|
||||
return S == *Exact;
|
||||
if (Prefix)
|
||||
return S.startswith(*Prefix);
|
||||
if (Suffix)
|
||||
return S.endswith(*Suffix);
|
||||
return matchOne(Tokens, S);
|
||||
}
|
||||
|
||||
// Runs glob pattern Pats against string S.
|
||||
bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
|
||||
for (;;) {
|
||||
if (Pats.empty())
|
||||
return S.empty();
|
||||
|
||||
// If Pats[0] is '*', try to match Pats[1..] against all possible
|
||||
// tail strings of S to see at least one pattern succeeds.
|
||||
if (Pats[0].size() == 0) {
|
||||
Pats = Pats.slice(1);
|
||||
if (Pats.empty())
|
||||
// Fast path. If a pattern is '*', it matches anything.
|
||||
return true;
|
||||
for (size_t I = 0, E = S.size(); I < E; ++I)
|
||||
if (matchOne(Pats, S.substr(I)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// If Pats[0] is not '*', it must consume one character.
|
||||
if (S.empty() || !Pats[0][S[0]])
|
||||
return false;
|
||||
Pats = Pats.slice(1);
|
||||
S = S.substr(1);
|
||||
}
|
||||
}
|
@ -17,10 +17,11 @@ add_llvm_unittest(SupportTests
|
||||
DwarfTest.cpp
|
||||
EndianStreamTest.cpp
|
||||
EndianTest.cpp
|
||||
ErrorTest.cpp
|
||||
ErrorOrTest.cpp
|
||||
ErrorTest.cpp
|
||||
FileOutputBufferTest.cpp
|
||||
FormatVariadicTest.cpp
|
||||
GlobPatternTest.cpp
|
||||
Host.cpp
|
||||
LEB128Test.cpp
|
||||
LineIteratorTest.cpp
|
||||
|
70
llvm/unittests/Support/GlobPatternTest.cpp
Normal file
70
llvm/unittests/Support/GlobPatternTest.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
//===- llvm/unittest/Support/GlobPatternTest.cpp --------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Support/GlobPattern.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace llvm;
|
||||
namespace {
|
||||
|
||||
class GlobPatternTest : public ::testing::Test {};
|
||||
|
||||
TEST_F(GlobPatternTest, Basics) {
|
||||
Expected<GlobPattern> Pat1 = GlobPattern::create("");
|
||||
EXPECT_TRUE((bool)Pat1);
|
||||
EXPECT_TRUE(Pat1->match(""));
|
||||
EXPECT_FALSE(Pat1->match("a"));
|
||||
|
||||
Expected<GlobPattern> Pat2 = GlobPattern::create("ab*c*def");
|
||||
EXPECT_TRUE((bool)Pat2);
|
||||
EXPECT_TRUE(Pat2->match("abcdef"));
|
||||
EXPECT_TRUE(Pat2->match("abxcxdef"));
|
||||
EXPECT_FALSE(Pat2->match(""));
|
||||
EXPECT_FALSE(Pat2->match("xabcdef"));
|
||||
EXPECT_FALSE(Pat2->match("abcdefx"));
|
||||
|
||||
Expected<GlobPattern> Pat3 = GlobPattern::create("a??c");
|
||||
EXPECT_TRUE((bool)Pat3);
|
||||
EXPECT_TRUE(Pat3->match("axxc"));
|
||||
EXPECT_FALSE(Pat3->match("axxx"));
|
||||
EXPECT_FALSE(Pat3->match(""));
|
||||
|
||||
Expected<GlobPattern> Pat4 = GlobPattern::create("[abc-fy-z]");
|
||||
EXPECT_TRUE((bool)Pat4);
|
||||
EXPECT_TRUE(Pat4->match("a"));
|
||||
EXPECT_TRUE(Pat4->match("b"));
|
||||
EXPECT_TRUE(Pat4->match("c"));
|
||||
EXPECT_TRUE(Pat4->match("d"));
|
||||
EXPECT_TRUE(Pat4->match("e"));
|
||||
EXPECT_TRUE(Pat4->match("f"));
|
||||
EXPECT_TRUE(Pat4->match("y"));
|
||||
EXPECT_TRUE(Pat4->match("z"));
|
||||
EXPECT_FALSE(Pat4->match("g"));
|
||||
EXPECT_FALSE(Pat4->match(""));
|
||||
|
||||
Expected<GlobPattern> Pat5 = GlobPattern::create("[^abc-fy-z]");
|
||||
EXPECT_TRUE((bool)Pat5);
|
||||
EXPECT_TRUE(Pat5->match("g"));
|
||||
EXPECT_FALSE(Pat5->match("a"));
|
||||
EXPECT_FALSE(Pat5->match("b"));
|
||||
EXPECT_FALSE(Pat5->match("c"));
|
||||
EXPECT_FALSE(Pat5->match("d"));
|
||||
EXPECT_FALSE(Pat5->match("e"));
|
||||
EXPECT_FALSE(Pat5->match("f"));
|
||||
EXPECT_FALSE(Pat5->match("y"));
|
||||
EXPECT_FALSE(Pat5->match("z"));
|
||||
EXPECT_FALSE(Pat5->match(""));
|
||||
}
|
||||
|
||||
TEST_F(GlobPatternTest, Invalid) {
|
||||
Expected<GlobPattern> Pat1 = GlobPattern::create("[");
|
||||
EXPECT_FALSE((bool)Pat1);
|
||||
handleAllErrors(Pat1.takeError(), [&](ErrorInfoBase &EIB) {});
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user