[lld-macho] Have dead-stripping work with literal sections

Literal sections are not atomically live or dead. Rather, liveness is tracked for each individual literal they contain. CStrings have their liveness tracked via a `live` bit in StringPiece, and fixed-width literals have theirs tracked via a BitVector. The live-marking code now needs to track the offset within each section that is to be marked live, in order to identify the literal at that particular offset. Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W with both `-dead_strip` and `--deduplicate-literals`, with and without this diff applied: ``` N Min Max Median Avg Stddev x 20 4.32 4.44 4.375 4.372 0.03105174 + 20 4.3 4.39 4.36 4.3595 0.023277502 No difference proven at 95.0% confidence ``` This gives us size savings of about 0.4%. Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D103979
2024-11-23 22:00:10 +00:00 · 2021-06-11 19:49:54 -04:00 · 2021-06-11 19:49:54 -04:00 · 464d3dc3d1
commit 464d3dc3d1
parent 681cfeb591
5 changed files with 122 additions and 26 deletions
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@ -108,7 +108,7 @@ void CStringInputSection::splitIntoPieces() {
  }
 }

-const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const {
+StringPiece &CStringInputSection::getStringPiece(uint64_t off) {
  if (off >= data.size())
    fatal(toString(this) + ": offset is outside the section");

@ -117,6 +117,10 @@ const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const {
  return it[-1];
 }

+const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const {
+  return const_cast<CStringInputSection *>(this)->getStringPiece(off);
+}
+
 uint64_t CStringInputSection::getFileOffset(uint64_t off) const {
  return parent->fileOff + getOffset(off);
 }
@ -132,7 +136,23 @@ WordLiteralInputSection::WordLiteralInputSection(StringRef segname,
                                                 InputFile *file,
                                                 ArrayRef<uint8_t> data,
                                                 uint32_t align, uint32_t flags)
-    : InputSection(WordLiteralKind, segname, name, file, data, align, flags) {}
+    : InputSection(WordLiteralKind, segname, name, file, data, align, flags) {
+  switch (sectionType(flags)) {
+  case S_4BYTE_LITERALS:
+    power2LiteralSize = 2;
+    break;
+  case S_8BYTE_LITERALS:
+    power2LiteralSize = 3;
+    break;
+  case S_16BYTE_LITERALS:
+    power2LiteralSize = 4;
+    break;
+  default:
+    llvm_unreachable("invalid literal section type");
+  }
+
+  live.resize(data.size() >> power2LiteralSize, !config->deadStrip);
+}

 uint64_t WordLiteralInputSection::getFileOffset(uint64_t off) const {
  return parent->fileOff + getOffset(off);
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@ -14,6 +14,7 @@

 #include "lld/Common/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/BinaryFormat/MachO.h"

@ -43,6 +44,7 @@ public:
  uint64_t getVA(uint64_t off) const;
  // Whether the data at \p off in this InputSection is live.
  virtual bool isLive(uint64_t off) const = 0;
+  virtual void markLive(uint64_t off) = 0;

  void writeTo(uint8_t *buf);

@ -91,6 +93,7 @@ public:
  uint64_t getVA() const { return InputSection::getVA(0); }
  // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
  bool isLive(uint64_t off) const override { return live; }
+  void markLive(uint64_t off) override { live = true; }
  bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
  bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }

@ -112,17 +115,21 @@ public:
 };

 // We allocate a lot of these and binary search on them, so they should be as
-// compact as possible. Hence the use of 32 rather than 64 bits for the hash.
+// compact as possible. Hence the use of 31 rather than 64 bits for the hash.
 struct StringPiece {
  // Offset from the start of the containing input section.
  uint32_t inSecOff;
-  uint32_t hash;
+  uint32_t live : 1;
+  uint32_t hash : 31;
  // Offset from the start of the containing output section.
  uint64_t outSecOff = 0;

-  StringPiece(uint64_t off, uint32_t hash) : inSecOff(off), hash(hash) {}
+  StringPiece(uint64_t off, uint32_t hash)
+      : inSecOff(off), live(!config->deadStrip), hash(hash) {}
 };

+static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
+
 // CStringInputSections are composed of multiple null-terminated string
 // literals, which we represent using StringPieces. These literals can be
 // deduplicated and tail-merged, so translating offsets between the input and
@ -141,9 +148,10 @@ public:
                     flags) {}
  uint64_t getFileOffset(uint64_t off) const override;
  uint64_t getOffset(uint64_t off) const override;
-  // FIXME implement this
-  bool isLive(uint64_t off) const override { return true; }
+  bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
+  void markLive(uint64_t off) override { getStringPiece(off).live = true; }
  // Find the StringPiece that contains this offset.
+  StringPiece &getStringPiece(uint64_t off);
  const StringPiece &getStringPiece(uint64_t off) const;
  // Split at each null byte.
  void splitIntoPieces();
@ -172,12 +180,19 @@ public:
                          uint32_t flags);
  uint64_t getFileOffset(uint64_t off) const override;
  uint64_t getOffset(uint64_t off) const override;
-  // FIXME implement this
-  bool isLive(uint64_t off) const override { return true; }
+  bool isLive(uint64_t off) const override {
+    return live[off >> power2LiteralSize];
+  }
+  void markLive(uint64_t off) override { live[off >> power2LiteralSize] = 1; }

  static bool classof(const InputSection *isec) {
    return isec->kind() == WordLiteralKind;
  }
+
+private:
+  unsigned power2LiteralSize;
+  // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
+  llvm::BitVector live;
 };

 inline uint8_t sectionType(uint32_t flags) {
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@ -34,12 +34,12 @@ void markLive() {
  // store ConcatInputSections in our worklist.
  SmallVector<ConcatInputSection *, 256> worklist;

-  auto enqueue = [&](InputSection *isec) {
+  auto enqueue = [&](InputSection *isec, uint64_t off) {
+    if (isec->isLive(off))
+      return;
+    isec->markLive(off);
    if (auto s = dyn_cast<ConcatInputSection>(isec)) {
      assert(!s->isCoalescedWeak());
-      if (s->live)
-        return;
-      s->live = true;
      worklist.push_back(s);
    }
  };
@ -48,7 +48,7 @@ void markLive() {
    s->used = true;
    if (auto *d = dyn_cast<Defined>(s))
      if (d->isec)
-        enqueue(d->isec);
+        enqueue(d->isec, d->value);
  };

  // Add GC roots.
@ -104,14 +104,16 @@ void markLive() {
  for (InputSection *isec : inputSections) {
    // Sections marked no_dead_strip
    if (isec->flags & S_ATTR_NO_DEAD_STRIP) {
-      enqueue(isec);
+      assert(isa<ConcatInputSection>(isec));
+      enqueue(isec, 0);
      continue;
    }

    // mod_init_funcs, mod_term_funcs sections
    if (sectionType(isec->flags) == S_MOD_INIT_FUNC_POINTERS ||
        sectionType(isec->flags) == S_MOD_TERM_FUNC_POINTERS) {
-      enqueue(isec);
+      assert(isa<ConcatInputSection>(isec));
+      enqueue(isec, 0);
      continue;
    }

@ -138,7 +140,7 @@ void markLive() {
        if (auto *s = r.referent.dyn_cast<Symbol *>())
          addSym(s);
        else
-          enqueue(r.referent.get<InputSection *>());
+          enqueue(r.referent.get<InputSection *>(), r.addend);
      }
      continue;
    }
@ -155,7 +157,7 @@ void markLive() {
        if (auto *s = r.referent.dyn_cast<Symbol *>())
          addSym(s);
        else
-          enqueue(r.referent.get<InputSection *>());
+          enqueue(r.referent.get<InputSection *>(), r.addend);
      }
    }

@ -177,7 +179,7 @@ void markLive() {
        else
          referentLive = r.referent.get<InputSection *>()->isLive(r.addend);
        if (referentLive)
-          enqueue(isec);
+          enqueue(isec, 0);
      }
    }

--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@ -1121,6 +1121,7 @@ void CStringSection::finalize() {
  // contents.
  for (const CStringInputSection *isec : inputs)
    for (size_t i = 0, e = isec->pieces.size(); i != e; ++i)
+      if (isec->pieces[i].live)
        builder.add(isec->getCachedHashStringRef(i));

  // Fix the string table content. After this, the contents will never change.
@ -1131,6 +1132,8 @@ void CStringSection::finalize() {
  // to a corresponding SectionPiece for easy access.
  for (CStringInputSection *isec : inputs) {
    for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) {
+      if (!isec->pieces[i].live)
+        continue;
      isec->pieces[i].outSecOff =
          builder.getOffset(isec->getCachedHashStringRef(i));
      isec->isFinal = true;
@ -1155,22 +1158,28 @@ void WordLiteralSection::addInput(WordLiteralInputSection *isec) {
  const uint8_t *buf = isec->data.data();
  switch (sectionType(isec->flags)) {
  case S_4BYTE_LITERALS: {
-    for (size_t i = 0, e = isec->data.size() / 4; i < e; ++i) {
-      uint32_t value = *reinterpret_cast<const uint32_t *>(buf + i * 4);
+    for (size_t off = 0, e = isec->data.size(); off < e; off += 4) {
+      if (!isec->isLive(off))
+        continue;
+      uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off);
      literal4Map.emplace(value, literal4Map.size());
    }
    break;
  }
  case S_8BYTE_LITERALS: {
-    for (size_t i = 0, e = isec->data.size() / 8; i < e; ++i) {
-      uint64_t value = *reinterpret_cast<const uint64_t *>(buf + i * 8);
+    for (size_t off = 0, e = isec->data.size(); off < e; off += 8) {
+      if (!isec->isLive(off))
+        continue;
+      uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off);
      literal8Map.emplace(value, literal8Map.size());
    }
    break;
  }
  case S_16BYTE_LITERALS: {
-    for (size_t i = 0, e = isec->data.size() / 16; i < e; ++i) {
-      UInt128 value = *reinterpret_cast<const UInt128 *>(buf + i * 16);
+    for (size_t off = 0, e = isec->data.size(); off < e; off += 16) {
+      if (!isec->isLive(off))
+        continue;
+      UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off);
      literal16Map.emplace(value, literal16Map.size());
    }
    break;
--- a/lld/test/MachO/dead-strip.s
+++ b/lld/test/MachO/dead-strip.s
@ -253,6 +253,20 @@
 # EXECSTABS:     N_FUN {{.*}} '_main'
 # EXECSTABS-NOT: N_FUN {{.*}} '_unref'

+# RUN: llvm-mc -g -filetype=obj -triple=x86_64-apple-macos \
+# RUN:     %t/literals.s -o %t/literals.o
+# RUN: %lld -dylib -dead_strip --deduplicate-literals %t/literals.o -o %t/literals
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --section="__DATA,str_ptrs" \
+# RUN:   --section="__TEXT,__literals" %t/literals | FileCheck %s --check-prefix=LIT
+
+# LIT:      Contents of (__TEXT,__cstring) section
+# LIT-NEXT: foobar
+# LIT-NEXT: Contents of (__DATA,str_ptrs) section
+# LIT-NEXT: __TEXT:__cstring:bar
+# LIT-NEXT: __TEXT:__cstring:bar
+# LIT-NEXT: Contents of (__TEXT,__literals) section
+# LIT-NEXT: ef be ad de {{$}}
+
 #--- basics.s
 .comm _ref_com, 1
 .comm _unref_com, 1
@ -736,3 +750,39 @@ _main:
  retq

 .subsections_via_symbols
+
+#--- literals.s
+.cstring
+_unref_foo:
+  .ascii "foo"
+_bar:
+Lbar:
+  .asciz "bar"
+_unref_baz:
+  .asciz "baz"
+
+.literal4
+.p2align 2
+L._foo4:
+  .long 0xdeadbeef
+L._bar4:
+  .long 0xdeadbeef
+L._unref:
+  .long 0xfeedface
+
+.section __DATA,str_ptrs,literal_pointers
+.globl _data
+_data:
+  .quad _bar
+  .quad Lbar
+
+## The output binary has these integer literals put into a section that isn't
+## marked with a S_*BYTE_LITERALS flag, so we don't mark word_ptrs with the
+## S_LITERAL_POINTERS flag in order not to confuse llvm-objdump.
+.section __DATA,word_ptrs
+.globl _more_data
+_more_data:
+  .quad L._foo4
+  .quad L._bar4
+
+.subsections_via_symbols