Avoid O(n*m) complexity in StringRef::find_first(_not)_of(StringRef).

- Cache used characters in a bitset to reduce memory overhead to just 32 bytes.
- On my core2 this code is faster except when the checked string was very short
  (smaller than the list of delimiters).

llvm-svn: 111817
This commit is contained in:
Benjamin Kramer 2010-08-23 18:16:08 +00:00
parent ee69c0112d
commit 926ba6058b
2 changed files with 15 additions and 6 deletions

View File

@ -238,7 +238,7 @@ namespace llvm {
/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
/// Note: O(size() * Chars.size())
/// Note: O(size() + Chars.size())
size_type find_first_of(StringRef Chars, size_t From = 0) const;
/// find_first_not_of - Find the first character in the string that is not
@ -248,7 +248,7 @@ namespace llvm {
/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
/// Note: O(size() * Chars.size())
/// Note: O(size() + Chars.size())
size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
/// @}

View File

@ -9,6 +9,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/APInt.h"
#include <bitset>
using namespace llvm;
@ -153,11 +154,15 @@ size_t StringRef::rfind(StringRef Str) const {
/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
/// Note: O(size() * Chars.size())
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = min(From, Length), e = Length; i != e; ++i)
if (Chars.find(Data[i]) != npos)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
@ -174,11 +179,15 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
/// Note: O(size() * Chars.size())
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
for (size_type i = min(From, Length), e = Length; i != e; ++i)
if (Chars.find(Data[i]) == npos)
if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}