UUID: Add support for arbitrary-sized module IDs

Summary: The data structure is optimized for the case where the UUID size is <= 20 bytes (standard length emitted by the GNU linkers), but larger sizes are also possible. I've modified the string conversion function to support the new sizes as well. For standard UUIDs it maintains the traditional formatting (4-2-2-2-6). If a UUID is shorter, we just cut this sequence short, and for longer UUIDs it will just repeat the last 6-byte block as long as necessary. I've also modified ObjectFileELF to take advantage of the new UUIDs and avoid manually padding the UUID to 16 bytes. While there, I also made sure the computed UUID does not depend on host endianness. Reviewers: clayborg, lemo, sas, davide, espindola Subscribers: emaste, arichardson, lldb-commits Differential Revision: https://reviews.llvm.org/D48633 llvm-svn: 335963
2024-11-24 06:10:12 +00:00 · 2018-06-29 11:20:29 +00:00 · 2018-06-29 11:20:29 +00:00 · 77c397f465
commit 77c397f465
parent 3b599d75d5
5 changed files with 96 additions and 127 deletions
--- a/lldb/include/lldb/Utility/UUID.h
+++ b/lldb/include/lldb/Utility/UUID.h
@ -27,12 +27,6 @@ namespace lldb_private {

 class UUID {
 public:
-  // Most UUIDs are 16 bytes, but some Linux build-ids (SHA1) are 20.
-  typedef uint8_t ValueType[20];
-
-  //------------------------------------------------------------------
-  // Constructors and Destructors
-  //------------------------------------------------------------------
  UUID() = default;

  /// Creates a UUID from the data pointed to by the bytes argument. No special
@ -64,18 +58,16 @@ public:
    return UUID(bytes);
  }

-  void Clear() { m_num_uuid_bytes = 0; }
+  void Clear() { m_bytes.clear(); }

  void Dump(Stream *s) const;

-  llvm::ArrayRef<uint8_t> GetBytes() const {
-    return {m_uuid, m_num_uuid_bytes};
-  }
+  llvm::ArrayRef<uint8_t> GetBytes() const { return m_bytes; }

  explicit operator bool() const { return IsValid(); }
-  bool IsValid() const { return m_num_uuid_bytes > 0; }
+  bool IsValid() const { return !m_bytes.empty(); }

-  std::string GetAsString(const char *separator = nullptr) const;
+  std::string GetAsString(llvm::StringRef separator = "-") const;

  size_t SetFromStringRef(llvm::StringRef str, uint32_t num_uuid_bytes = 16);

@ -99,24 +91,34 @@ public:
  ///     The original string, with all decoded bytes removed.
  //------------------------------------------------------------------
  static llvm::StringRef
-  DecodeUUIDBytesFromString(llvm::StringRef str, ValueType &uuid_bytes,
-                            uint32_t &bytes_decoded,
+  DecodeUUIDBytesFromString(llvm::StringRef str,
+                            llvm::SmallVectorImpl<uint8_t> &uuid_bytes,
                            uint32_t num_uuid_bytes = 16);

 private:
-  UUID(llvm::ArrayRef<uint8_t> bytes);
+  UUID(llvm::ArrayRef<uint8_t> bytes) : m_bytes(bytes.begin(), bytes.end()) {}

-  uint32_t m_num_uuid_bytes = 0; // Should be 0, 16 or 20
-  ValueType m_uuid;
+  // GNU ld generates 20-byte build-ids. Size chosen to avoid heap allocations
+  // for this case.
+  llvm::SmallVector<uint8_t, 20> m_bytes;
+
+  friend bool operator==(const UUID &LHS, const UUID &RHS) {
+    return LHS.m_bytes == RHS.m_bytes;
+  }
+  friend bool operator!=(const UUID &LHS, const UUID &RHS) {
+    return !(LHS == RHS);
+  }
+  friend bool operator<(const UUID &LHS, const UUID &RHS) {
+    return LHS.m_bytes < RHS.m_bytes;
+  }
+  friend bool operator<=(const UUID &LHS, const UUID &RHS) {
+    return !(RHS < LHS);
+  }
+  friend bool operator>(const UUID &LHS, const UUID &RHS) { return RHS < LHS; }
+  friend bool operator>=(const UUID &LHS, const UUID &RHS) {
+    return !(LHS < RHS);
+  }
 };
-
-bool operator==(const UUID &lhs, const UUID &rhs);
-bool operator!=(const UUID &lhs, const UUID &rhs);
-bool operator<(const UUID &lhs, const UUID &rhs);
-bool operator<=(const UUID &lhs, const UUID &rhs);
-bool operator>(const UUID &lhs, const UUID &rhs);
-bool operator>=(const UUID &lhs, const UUID &rhs);
-
 } // namespace lldb_private

 #endif // LLDB_UTILITY_UUID_H
--- a/lldb/source/Interpreter/OptionValueUUID.cpp
+++ b/lldb/source/Interpreter/OptionValueUUID.cpp
@ -76,21 +76,17 @@ size_t OptionValueUUID::AutoComplete(CommandInterpreter &interpreter,
  ExecutionContext exe_ctx(interpreter.GetExecutionContext());
  Target *target = exe_ctx.GetTargetPtr();
  if (target) {
-    const size_t num_modules = target->GetImages().GetSize();
-    if (num_modules > 0) {
-      UUID::ValueType uuid_bytes;
-      uint32_t num_bytes_decoded = 0;
-      UUID::DecodeUUIDBytesFromString(s, uuid_bytes, num_bytes_decoded);
+    llvm::SmallVector<uint8_t, 20> uuid_bytes;
+    if (UUID::DecodeUUIDBytesFromString(s, uuid_bytes).empty()) {
+      const size_t num_modules = target->GetImages().GetSize();
      for (size_t i = 0; i < num_modules; ++i) {
        ModuleSP module_sp(target->GetImages().GetModuleAtIndex(i));
        if (module_sp) {
          const UUID &module_uuid = module_sp->GetUUID();
          if (module_uuid.IsValid()) {
-            llvm::ArrayRef<uint8_t> decoded_bytes(uuid_bytes,
-                                                  num_bytes_decoded);
            llvm::ArrayRef<uint8_t> module_bytes = module_uuid.GetBytes();
-            if (module_bytes.size() >= num_bytes_decoded &&
-                module_bytes.take_front(num_bytes_decoded) == decoded_bytes) {
+            if (module_bytes.size() >= uuid_bytes.size() &&
+                module_bytes.take_front(uuid_bytes.size()).equals(uuid_bytes)) {
              matches.AppendString(module_uuid.GetAsString());
            }
          }
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@ -730,16 +730,17 @@ size_t ObjectFileELF::GetModuleSpecifications(
                    data.GetDataStart(), data.GetByteSize());
              }
            }
+            using u32le = llvm::support::ulittle32_t;
            if (gnu_debuglink_crc) {
              // Use 4 bytes of crc from the .gnu_debuglink section.
-              uint32_t uuidt[4] = {gnu_debuglink_crc, 0, 0, 0};
-              uuid = UUID::fromData(uuidt, sizeof(uuidt));
+              u32le data(gnu_debuglink_crc);
+              uuid = UUID::fromData(&data, sizeof(data));
            } else if (core_notes_crc) {
              // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make
              // it look different form .gnu_debuglink crc followed by 4 bytes
              // of note segments crc.
-              uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0};
-              uuid = UUID::fromData(uuidt, sizeof(uuidt));
+              u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)};
+              uuid = UUID::fromData(data, sizeof(data));
            }
          }

@ -909,6 +910,7 @@ bool ObjectFileELF::GetUUID(lldb_private::UUID *uuid) {
  if (!ParseSectionHeaders() && GetType() != ObjectFile::eTypeCoreFile)
    return false;

+  using u32le = llvm::support::ulittle32_t;
  if (m_uuid.IsValid()) {
    // We have the full build id uuid.
    *uuid = m_uuid;
@ -925,8 +927,8 @@ bool ObjectFileELF::GetUUID(lldb_private::UUID *uuid) {
      // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make it look
      // different form .gnu_debuglink crc - followed by 4 bytes of note
      // segments crc.
-      uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0};
-      m_uuid = UUID::fromData(uuidt, sizeof(uuidt));
+      u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)};
+      m_uuid = UUID::fromData(data, sizeof(data));
    }
  } else {
    if (!m_gnu_debuglink_crc)
@ -934,8 +936,8 @@ bool ObjectFileELF::GetUUID(lldb_private::UUID *uuid) {
          calc_gnu_debuglink_crc32(m_data.GetDataStart(), m_data.GetByteSize());
    if (m_gnu_debuglink_crc) {
      // Use 4 bytes of crc from the .gnu_debuglink section.
-      uint32_t uuidt[4] = {m_gnu_debuglink_crc, 0, 0, 0};
-      m_uuid = UUID::fromData(uuidt, sizeof(uuidt));
+      u32le data(m_gnu_debuglink_crc);
+      m_uuid = UUID::fromData(&data, sizeof(data));
    }
  }

@ -1273,18 +1275,16 @@ ObjectFileELF::RefineModuleDetailsFromNote(lldb_private::DataExtractor &data,
        // Only bother processing this if we don't already have the uuid set.
        if (!uuid.IsValid()) {
          // 16 bytes is UUID|MD5, 20 bytes is SHA1. Other linkers may produce a
-          // build-id of a different
-          // length. Accept it as long as it's at least 4 bytes as it will be
-          // better than our own crc32.
-          if (note.n_descsz >= 4 && note.n_descsz <= 20) {
-            uint8_t uuidbuf[20];
-            if (data.GetU8(&offset, &uuidbuf, note.n_descsz) == nullptr) {
+          // build-id of a different length. Accept it as long as it's at least
+          // 4 bytes as it will be better than our own crc32.
+          if (note.n_descsz >= 4) {
+            if (const uint8_t *buf = data.PeekData(offset, note.n_descsz)) {
+              // Save the build id as the UUID for the module.
+              uuid = UUID::fromData(buf, note.n_descsz);
+            } else {
              error.SetErrorString("failed to read GNU_BUILD_ID note payload");
              return error;
            }
-
-            // Save the build id as the UUID for the module.
-            uuid = UUID::fromData(uuidbuf, note.n_descsz);
          }
        }
        break;
--- a/lldb/source/Utility/UUID.cpp
+++ b/lldb/source/Utility/UUID.cpp
@ -13,6 +13,7 @@
 // Project includes
 #include "lldb/Utility/Stream.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Format.h"

 // C Includes
 #include <ctype.h>
@ -21,35 +22,35 @@

 using namespace lldb_private;

-UUID::UUID(llvm::ArrayRef<uint8_t> bytes) {
-  if (bytes.size() != 20 && bytes.size() != 16)
-    bytes = {};
+// Whether to put a separator after count uuid bytes.
+// For the first 16 bytes we follow the traditional UUID format. After that, we
+// simply put a dash after every 6 bytes.
+static inline bool separate(size_t count) {
+  if (count >= 10)
+    return (count - 10) % 6 == 0;

-  m_num_uuid_bytes = bytes.size();
-  std::memcpy(m_uuid, bytes.data(), bytes.size());
+  switch (count) {
+  case 4:
+  case 6:
+  case 8:
+    return true;
+  default:
+    return false;
+  }
 }

-std::string UUID::GetAsString(const char *separator) const {
+std::string UUID::GetAsString(llvm::StringRef separator) const {
  std::string result;
-  char buf[256];
-  if (!separator)
-    separator = "-";
-  const uint8_t *u = GetBytes().data();
-  if (sizeof(buf) >
-      (size_t)snprintf(buf, sizeof(buf), "%2.2X%2.2X%2.2X%2.2X%s%2.2X%2.2X%s%2."
-                                         "2X%2.2X%s%2.2X%2.2X%s%2.2X%2.2X%2.2X%"
-                                         "2.2X%2.2X%2.2X",
-                       u[0], u[1], u[2], u[3], separator, u[4], u[5], separator,
-                       u[6], u[7], separator, u[8], u[9], separator, u[10],
-                       u[11], u[12], u[13], u[14], u[15])) {
-    result.append(buf);
-    if (m_num_uuid_bytes == 20) {
-      if (sizeof(buf) > (size_t)snprintf(buf, sizeof(buf),
-                                         "%s%2.2X%2.2X%2.2X%2.2X", separator,
-                                         u[16], u[17], u[18], u[19]))
-        result.append(buf);
-    }
+  llvm::raw_string_ostream os(result);
+
+  for (auto B : llvm::enumerate(GetBytes())) {
+    if (separate(B.index()))
+      os << separator;
+
+    os << llvm::format_hex_no_prefix(B.value(), 2, true);
  }
+  os.flush();
+
  return result;
 }

@ -64,25 +65,24 @@ static inline int xdigit_to_int(char ch) {
  return ch - '0';
 }

-llvm::StringRef UUID::DecodeUUIDBytesFromString(llvm::StringRef p,
-                                                ValueType &uuid_bytes,
-                                                uint32_t &bytes_decoded,
-                                                uint32_t num_uuid_bytes) {
-  ::memset(uuid_bytes, 0, sizeof(uuid_bytes));
-  size_t uuid_byte_idx = 0;
+llvm::StringRef
+UUID::DecodeUUIDBytesFromString(llvm::StringRef p,
+                                llvm::SmallVectorImpl<uint8_t> &uuid_bytes,
+                                uint32_t num_uuid_bytes) {
+  uuid_bytes.clear();
  while (!p.empty()) {
    if (isxdigit(p[0]) && isxdigit(p[1])) {
      int hi_nibble = xdigit_to_int(p[0]);
      int lo_nibble = xdigit_to_int(p[1]);
      // Translate the two hex nibble characters into a byte
-      uuid_bytes[uuid_byte_idx] = (hi_nibble << 4) + lo_nibble;
+      uuid_bytes.push_back((hi_nibble << 4) + lo_nibble);

      // Skip both hex digits
      p = p.drop_front(2);

      // Increment the byte that we are decoding within the UUID value and
      // break out if we are done
-      if (++uuid_byte_idx == num_uuid_bytes)
+      if (uuid_bytes.size() == num_uuid_bytes)
        break;
    } else if (p.front() == '-') {
      // Skip dashes
@ -92,11 +92,6 @@ llvm::StringRef UUID::DecodeUUIDBytesFromString(llvm::StringRef p,
      break;
    }
  }
-
-  // Clear trailing bytes to 0.
-  for (uint32_t i = uuid_byte_idx; i < sizeof(ValueType); i++)
-    uuid_bytes[i] = 0;
-  bytes_decoded = uuid_byte_idx;
  return p;
 }

@ -106,52 +101,17 @@ size_t UUID::SetFromStringRef(llvm::StringRef str, uint32_t num_uuid_bytes) {
  // Skip leading whitespace characters
  p = p.ltrim();

-  ValueType bytes;
-  uint32_t bytes_decoded = 0;
+  llvm::SmallVector<uint8_t, 20> bytes;
  llvm::StringRef rest =
-      UUID::DecodeUUIDBytesFromString(p, bytes, bytes_decoded, num_uuid_bytes);
+      UUID::DecodeUUIDBytesFromString(p, bytes, num_uuid_bytes);

  // If we successfully decoded a UUID, return the amount of characters that
  // were consumed
-  if (bytes_decoded == num_uuid_bytes) {
-    *this = fromData(bytes, bytes_decoded);
+  if (bytes.size() == num_uuid_bytes) {
+    *this = fromData(bytes);
    return str.size() - rest.size();
  }

  // Else return zero to indicate we were not able to parse a UUID value
  return 0;
 }
-
-bool lldb_private::operator==(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return lhs.GetBytes() == rhs.GetBytes();
-}
-
-bool lldb_private::operator!=(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return !(lhs == rhs);
-}
-
-bool lldb_private::operator<(const lldb_private::UUID &lhs,
-                             const lldb_private::UUID &rhs) {
-  if (lhs.GetBytes().size() != rhs.GetBytes().size())
-    return lhs.GetBytes().size() < rhs.GetBytes().size();
-
-  return std::memcmp(lhs.GetBytes().data(), rhs.GetBytes().data(),
-                     lhs.GetBytes().size());
-}
-
-bool lldb_private::operator<=(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return !(lhs > rhs);
-}
-
-bool lldb_private::operator>(const lldb_private::UUID &lhs,
-                             const lldb_private::UUID &rhs) {
-  return rhs < lhs;
-}
-
-bool lldb_private::operator>=(const lldb_private::UUID &lhs,
-                              const lldb_private::UUID &rhs) {
-  return !(lhs < rhs);
-}
--- a/lldb/unittests/Utility/UUIDTest.cpp
+++ b/lldb/unittests/Utility/UUIDTest.cpp
@ -71,3 +71,14 @@ TEST(UUIDTest, SetFromStringRef) {
      32u, u.SetFromStringRef("404142434445464748494a4b4c4d4e4f-50515253", 16));
  EXPECT_EQ(UUID::fromData("@ABCDEFGHIJKLMNO", 16), u);
 }
+
+TEST(UUIDTest, StringConverion) {
+  EXPECT_EQ("40414243", UUID::fromData("@ABC", 4).GetAsString());
+  EXPECT_EQ("40414243-4445-4647", UUID::fromData("@ABCDEFG", 8).GetAsString());
+  EXPECT_EQ("40414243-4445-4647-4849-4A4B",
+            UUID::fromData("@ABCDEFGHIJK", 12).GetAsString());
+  EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F",
+            UUID::fromData("@ABCDEFGHIJKLMNO", 16).GetAsString());
+  EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F-50515253",
+            UUID::fromData("@ABCDEFGHIJKLMNOPQRS", 20).GetAsString());
+}