From 301af636d3e0202d1f4d557ede00c6d87946ac1e Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Wed, 30 Dec 2009 17:23:44 +0000 Subject: [PATCH] Implement edit distance for StringRef llvm-svn: 92309 --- include/llvm/ADT/StringRef.h | 16 ++++++++++++++++ lib/Support/StringRef.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index a7442666d67..1c738369d75 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -133,6 +133,22 @@ namespace llvm { /// compare_lower - Compare two strings, ignoring case. int compare_lower(StringRef RHS) const; + /// \brief Determine the edit distance between this string and another + /// string. + /// + /// \param Other the string to compare this string against. + /// + /// \param AllowReplacements whether to allow character + /// replacements (change one character into another) as a single + /// operation, rather than as two operations (an insertion and a + /// removal). + /// + /// \returns the minimum number of character insertions, removals, + /// or (if \p AllowReplacements is \c true) replacements needed to + /// transform one of the given strings into the other. If zero, + /// the strings are identical. + unsigned edit_distance(StringRef Other, bool AllowReplacements = true); + /// str - Get the contents as an std::string. std::string str() const { return std::string(Data, Length); } diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 2d023e4895d..9084ea6ece0 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" +#include using namespace llvm; // MSVC emits references to this into the translation units which reference it. @@ -35,6 +36,36 @@ int StringRef::compare_lower(StringRef RHS) const { return Length < RHS.Length ? -1 : 1; } +/// \brief Compute the edit distance between the two given strings. +unsigned StringRef::edit_distance(llvm::StringRef Other, + bool AllowReplacements) { + size_type m = size(); + size_type n = Other.size(); + + std::vector previous(n+1, 0); + for (std::vector::size_type i = 0; i <= n; ++i) + previous[i] = i; + + std::vector current(n+1, 0); + for (size_type y = 1; y <= m; ++y) { + current.assign(n+1, 0); + current[0] = y; + for (size_type x = 1; x <= n; ++x) { + if (AllowReplacements) { + current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u), + min(current[x-1], previous[x])+1); + } + else { + if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1]; + else current[x] = min(current[x-1], previous[x]) + 1; + } + } + current.swap(previous); + } + + return previous[n]; +} + //===----------------------------------------------------------------------===// // String Searching //===----------------------------------------------------------------------===//