Support GNU-style ZLIB-compressed input sections.

Previously, we supported only SHF_COMPRESSED sections because it's new and it's the ELF standard. But there are object files compressed in the GNU style out there, so we had to support it. Sections compressed in the GNU style start with ".zdebug_" and contain different headers than the ELF standard's one. In this patch, getRawCompressedData is responsible to handle it. A tricky thing about GNU-style compressed sections is that we have to rename them when creating output sections. ".zdebug_" prefix implies the section is compressed. We need to rename ".zdebug_" ".debug" because our output sections are not compressed. We do that in this patch. llvm-svn: 284068
2024-12-14 11:39:35 +00:00 · 2016-10-12 22:36:31 +00:00 · 2016-10-12 22:36:31 +00:00 · 05384080df
commit 05384080df
parent 3137c81e56
6 changed files with 143 additions and 70 deletions
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@ -23,6 +23,7 @@
 using namespace llvm;
 using namespace llvm::ELF;
 using namespace llvm::object;
+using namespace llvm::support;
 using namespace llvm::support::endian;

 using namespace lld;
@ -40,12 +41,19 @@ static ArrayRef<uint8_t> getSectionContents(elf::ObjectFile<ELFT> *File,
  return check(File->getObj().getSectionContents(Hdr));
 }

+// ELF supports ZLIB-compressed section. Returns true if the section
+// is compressed.
+template <class ELFT>
+static bool isCompressed(const typename ELFT::Shdr *Hdr, StringRef Name) {
+  return (Hdr->sh_flags & SHF_COMPRESSED) || Name.startswith(".zdebug");
+}
+
 template <class ELFT>
 InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File,
                                         const Elf_Shdr *Hdr, StringRef Name,
                                         Kind SectionKind)
    : InputSectionData(SectionKind, Name, getSectionContents(File, Hdr),
-                       Hdr->sh_flags & SHF_COMPRESSED, !Config->GcSections),
+                       isCompressed<ELFT>(Hdr, Name), !Config->GcSections),
      Header(Hdr), File(File), Repl(this) {
  // The ELF spec states that a value of 0 means the section has
  // no alignment constraits.
@ -100,30 +108,62 @@ typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const {
  llvm_unreachable("invalid section kind");
 }

+// Returns compressed data and its size when uncompressed.
+template <class ELFT>
+std::pair<ArrayRef<uint8_t>, uint64_t>
+InputSectionBase<ELFT>::getElfCompressedData(ArrayRef<uint8_t> Data) {
+  // Compressed section with Elf_Chdr is the ELF standard.
+  if (Data.size() < sizeof(Elf_Chdr))
+    fatal(getName(this) + ": corrupted compressed section");
+  auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data());
+  if (Hdr->ch_type != ELFCOMPRESS_ZLIB)
+    fatal(getName(this) + ": unsupported compression type");
+  return {Data.slice(sizeof(*Hdr)), Hdr->ch_size};
+}
+
+// Returns compressed data and its size when uncompressed.
+template <class ELFT>
+std::pair<ArrayRef<uint8_t>, uint64_t>
+InputSectionBase<ELFT>::getRawCompressedData(ArrayRef<uint8_t> Data) {
+  // Compressed sections without Elf_Chdr header contain this header
+  // instead. This is a GNU extension.
+  struct ZlibHeader {
+    char magic[4]; // should be "ZLIB"
+    char Size[8];  // Uncompressed size in big-endian
+  };
+
+  if (Data.size() < sizeof(ZlibHeader))
+    fatal(getName(this) + ": corrupted compressed section");
+  auto *Hdr = reinterpret_cast<const ZlibHeader *>(Data.data());
+  if (memcmp(Hdr->magic, "ZLIB", 4))
+    fatal(getName(this) + ": broken ZLIB-compressed section");
+  return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)};
+}
+
 template <class ELFT> void InputSectionBase<ELFT>::uncompress() {
  if (!zlib::isAvailable())
    fatal(getName(this) +
          ": build lld with zlib to enable compressed sections support");

-  // A compressed section consists of a header of Elf_Chdr type
-  // followed by compressed data.
-  if (Data.size() < sizeof(Elf_Chdr))
-    fatal("corrupt compressed section");
+  // This section is compressed. Here we decompress it. Ideally, all
+  // compressed sections have SHF_COMPRESSED bit and their contents
+  // start with headers of Elf_Chdr type. However, sections whose
+  // names start with ".zdebug_" don't have the bit and contains a raw
+  // ZLIB-compressed data (which is a bad thing because section names
+  // shouldn't be significant in ELF.) We need to be able to read both.
+  ArrayRef<uint8_t> Buf; // Compressed data
+  size_t Size;           // Uncompressed size
+  if (Header->sh_flags & SHF_COMPRESSED)
+    std::tie(Buf, Size) = getElfCompressedData(Data);
+  else
+    std::tie(Buf, Size) = getRawCompressedData(Data);

-  auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data());
-  Data = Data.slice(sizeof(Elf_Chdr));
-
-  if (Hdr->ch_type != ELFCOMPRESS_ZLIB)
-    fatal(getName(this) + ": unsupported compression type");
-
-  StringRef Buf((const char *)Data.data(), Data.size());
-  size_t UncompressedDataSize = Hdr->ch_size;
-  UncompressedData.reset(new char[UncompressedDataSize]);
-  if (zlib::uncompress(Buf, UncompressedData.get(), UncompressedDataSize) !=
-      zlib::StatusOK)
-    fatal(getName(this) + ": error uncompressing section");
-  Data = ArrayRef<uint8_t>((uint8_t *)UncompressedData.get(),
-                           UncompressedDataSize);
+  // Uncompress Buf.
+  UncompressedData.reset(new uint8_t[Size]);
+  if (zlib::uncompress(StringRef((const char *)Buf.data(), Buf.size()),
+                       (char *)UncompressedData.get(), Size) != zlib::StatusOK)
+    fatal(getName(this) + ": error while uncompressing section");
+  Data = ArrayRef<uint8_t>(UncompressedData.get(), Size);
 }

 template <class ELFT>
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@ -67,7 +67,7 @@ public:
  ArrayRef<uint8_t> getData(const SectionPiece &P) const;

  // If a section is compressed, this has the uncompressed section data.
-  std::unique_ptr<char[]> UncompressedData;
+  std::unique_ptr<uint8_t[]> UncompressedData;

  std::vector<Relocation> Relocations;
 };
@ -118,6 +118,13 @@ public:
  void uncompress();

  void relocate(uint8_t *Buf, uint8_t *BufEnd);
+
+private:
+  std::pair<ArrayRef<uint8_t>, uint64_t>
+  getElfCompressedData(ArrayRef<uint8_t> Data);
+
+  std::pair<ArrayRef<uint8_t>, uint64_t>
+  getRawCompressedData(ArrayRef<uint8_t> Data);
 };

 template <class ELFT> InputSectionBase<ELFT> InputSectionBase<ELFT>::Discarded;
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@ -351,7 +351,7 @@ void LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) {
  for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles())
    for (InputSectionBase<ELFT> *S : F->getSections())
      if (!isDiscarded(S) && !S->OutSec)
-        addSection(Factory, S, getOutputSectionName(S->Name));
+        addSection(Factory, S, getOutputSectionName(S->Name, Opt.Alloc));
 }

 // Sets value of a section-defined symbol. Two kinds of
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@ -91,7 +91,7 @@ private:
 };
 } // anonymous namespace

-StringRef elf::getOutputSectionName(StringRef Name) {
+StringRef elf::getOutputSectionName(StringRef Name, BumpPtrAllocator &Alloc) {
  if (Config->Relocatable)
    return Name;

@ -103,6 +103,11 @@ StringRef elf::getOutputSectionName(StringRef Name) {
    if (Name.startswith(V) || Name == Prefix)
      return Prefix;
  }
+
+  // ".zdebug_" is a prefix for ZLIB-compressed sections.
+  // Because we decompressed input sections, we want to remove 'z'.
+  if (Name.startswith(".zdebug_"))
+    return StringSaver(Alloc).save(Twine(".") + Name.substr(2));
  return Name;
 }

@ -699,7 +704,8 @@ template <class ELFT> void Writer<ELFT>::createSections() {
      }
      OutputSectionBase<ELFT> *Sec;
      bool IsNew;
-      std::tie(Sec, IsNew) = Factory.create(IS, getOutputSectionName(IS->Name));
+      StringRef OutsecName = getOutputSectionName(IS->Name, Alloc);
+      std::tie(Sec, IsNew) = Factory.create(IS, OutsecName);
      if (IsNew)
        OutputSections.push_back(Sec);
      Sec->addSection(IS);
--- a/lld/ELF/Writer.h
+++ b/lld/ELF/Writer.h
@ -10,13 +10,11 @@
 #ifndef LLD_ELF_WRITER_H
 #define LLD_ELF_WRITER_H

+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
 #include <cstdint>
 #include <memory>

-namespace llvm {
-  class StringRef;
-}
-
 namespace lld {
 namespace elf {
 template <class ELFT> class OutputSectionBase;
@ -41,7 +39,8 @@ struct PhdrEntry {
  bool HasLMA = false;
 };

-llvm::StringRef getOutputSectionName(llvm::StringRef Name);
+llvm::StringRef getOutputSectionName(llvm::StringRef Name,
+                                     llvm::BumpPtrAllocator &Alloc);

 template <class ELFT> void reportDiscarded(InputSectionBase<ELFT> *IS);

--- a/lld/test/ELF/compressed-debug-input.s
+++ b/lld/test/ELF/compressed-debug-input.s
@ -1,52 +1,73 @@
 # REQUIRES: zlib

 # RUN: llvm-mc -compress-debug-sections=zlib -filetype=obj -triple=x86_64-unknown-linux %s -o %t
-# RUN: llvm-readobj -sections %t | FileCheck -check-prefix=COMPRESSED %s
+# RUN: llvm-readobj -sections %t | FileCheck -check-prefix=ZLIB %s
+# ZLIB:      Section {
+# ZLIB:        Index: 2
+# ZLIB:        Name: .debug_str
+# ZLIB-NEXT:   Type: SHT_PROGBITS
+# ZLIB-NEXT:   Flags [
+# ZLIB-NEXT:     SHF_COMPRESSED (0x800)
+# ZLIB-NEXT:     SHF_MERGE (0x10)
+# ZLIB-NEXT:     SHF_STRINGS (0x20)
+# ZLIB-NEXT:   ]
+# ZLIB-NEXT:   Address:
+# ZLIB-NEXT:   Offset:
+# ZLIB-NEXT:   Size:
+# ZLIB-NEXT:   Link:
+# ZLIB-NEXT:   Info:
+# ZLIB-NEXT:   AddressAlignment: 1
+# ZLIB-NEXT:   EntrySize: 1
+# ZLIB-NEXT: }

-# COMPRESSED:      Section {
-# COMPRESSED:        Index: 2
-# COMPRESSED:        Name: .debug_str
-# COMPRESSED-NEXT:   Type: SHT_PROGBITS
-# COMPRESSED-NEXT:   Flags [
-# COMPRESSED-NEXT:     SHF_COMPRESSED (0x800)
-# COMPRESSED-NEXT:     SHF_MERGE (0x10)
-# COMPRESSED-NEXT:     SHF_STRINGS (0x20)
-# COMPRESSED-NEXT:   ]
-# COMPRESSED-NEXT:   Address:
-# COMPRESSED-NEXT:   Offset:
-# COMPRESSED-NEXT:   Size: 66
-# COMPRESSED-NEXT:   Link:
-# COMPRESSED-NEXT:   Info:
-# COMPRESSED-NEXT:   AddressAlignment: 1
-# COMPRESSED-NEXT:   EntrySize: 1
-# COMPRESSED-NEXT: }
+# RUN: llvm-mc -compress-debug-sections=zlib-gnu -filetype=obj -triple=x86_64-unknown-linux %s -o %t2
+# RUN: llvm-readobj -sections %t2 | FileCheck -check-prefix=GNU %s
+# GNU:      Section {
+# GNU:        Index: 2
+# GNU:        Name: .zdebug_str
+# GNU-NEXT:   Type: SHT_PROGBITS
+# GNU-NEXT:   Flags [
+# GNU-NEXT:     SHF_MERGE (0x10)
+# GNU-NEXT:     SHF_STRINGS (0x20)
+# GNU-NEXT:   ]
+# GNU-NEXT:   Address:
+# GNU-NEXT:   Offset:
+# GNU-NEXT:   Size:
+# GNU-NEXT:   Link:
+# GNU-NEXT:   Info:
+# GNU-NEXT:   AddressAlignment: 1
+# GNU-NEXT:   EntrySize: 1
+# GNU-NEXT: }

 # RUN: ld.lld %t -o %t.so -shared
-# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=UNCOMPRESSED %s
+# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=DATA %s

-# UNCOMPRESSED:      Section {
-# UNCOMPRESSED:        Index: 6
-# UNCOMPRESSED:        Name: .debug_str
-# UNCOMPRESSED-NEXT:   Type: SHT_PROGBITS
-# UNCOMPRESSED-NEXT:   Flags [
-# UNCOMPRESSED-NEXT:     SHF_MERGE (0x10)
-# UNCOMPRESSED-NEXT:     SHF_STRINGS (0x20)
-# UNCOMPRESSED-NEXT:   ]
-# UNCOMPRESSED-NEXT:   Address: 0x0
-# UNCOMPRESSED-NEXT:   Offset: 0x1060
-# UNCOMPRESSED-NEXT:   Size: 69
-# UNCOMPRESSED-NEXT:   Link: 0
-# UNCOMPRESSED-NEXT:   Info: 0
-# UNCOMPRESSED-NEXT:   AddressAlignment: 1
-# UNCOMPRESSED-NEXT:   EntrySize: 1
-# UNCOMPRESSED-NEXT:   SectionData (
-# UNCOMPRESSED-NEXT:     0000: 73686F72 7420756E 7369676E 65642069  |short unsigned i|
-# UNCOMPRESSED-NEXT:     0010: 6E740075 6E736967 6E656420 696E7400  |nt.unsigned int.|
-# UNCOMPRESSED-NEXT:     0020: 6C6F6E67 20756E73 69676E65 6420696E  |long unsigned in|
-# UNCOMPRESSED-NEXT:     0030: 74006368 61720075 6E736967 6E656420  |t.char.unsigned |
-# UNCOMPRESSED-NEXT:     0040: 63686172 00                          |char.|
-# UNCOMPRESSED-NEXT:   )
-# UNCOMPRESSED-NEXT: }
+# RUN: ld.lld %t2 -o %t2.so -shared
+# RUN: llvm-readobj -sections -section-data %t2.so | FileCheck -check-prefix=DATA %s
+
+# DATA:      Section {
+# DATA:        Index: 6
+# DATA:        Name: .debug_str
+# DATA-NEXT:   Type: SHT_PROGBITS
+# DATA-NEXT:   Flags [
+# DATA-NEXT:     SHF_MERGE (0x10)
+# DATA-NEXT:     SHF_STRINGS (0x20)
+# DATA-NEXT:   ]
+# DATA-NEXT:   Address: 0x0
+# DATA-NEXT:   Offset: 0x1060
+# DATA-NEXT:   Size: 69
+# DATA-NEXT:   Link: 0
+# DATA-NEXT:   Info: 0
+# DATA-NEXT:   AddressAlignment: 1
+# DATA-NEXT:   EntrySize: 1
+# DATA-NEXT:   SectionData (
+# DATA-NEXT:     0000: 73686F72 7420756E 7369676E 65642069  |short unsigned i|
+# DATA-NEXT:     0010: 6E740075 6E736967 6E656420 696E7400  |nt.unsigned int.|
+# DATA-NEXT:     0020: 6C6F6E67 20756E73 69676E65 6420696E  |long unsigned in|
+# DATA-NEXT:     0030: 74006368 61720075 6E736967 6E656420  |t.char.unsigned |
+# DATA-NEXT:     0040: 63686172 00                          |char.|
+# DATA-NEXT:   )
+# DATA-NEXT: }

 .section .debug_str,"MS",@progbits,1
 .LASF2: