Pavel Labath a7c457d288 [CodeGen] Refactor AppleAccelTable
Summary:
This commit separates the abstract accelerator table data structure
from the code for writing out an on-disk representation of a specific
accelerator table format. The idea is that former (now called
AccelTable<T>) can be reused for the DWARF v5 accelerator tables
as-is, without any further customizations.

Some bits of the emission code (now living in the EmissionContext class)
can be reused for DWARF v5 as well, but the subtle differences in the
layout of various subtables mean the sharing is not always possible.
(Also, the individual emit*** functions are fairly simple so there's a
tradeoff between making a bigger general-purpose function, and two
smaller targeted functions.)

Another advantage of this setup is that more of the serialization logic
can be hidden in the .cpp file -- I have moved declarations of the
header and all the emission functions there.

Reviewers: JDevlieghere, aprantl, probinson, dblaikie

Subscribers: echristo, clayborg, vleschuk, llvm-commits

Differential Revision: https://reviews.llvm.org/D43285

llvm-svn: 325516
2018-02-19 16:12:20 +00:00

432 lines
14 KiB
C++

//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains support for writing accelerator tables.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <vector>
using namespace llvm;
void AccelTableBase::computeBucketCount() {
// First get the number of unique hashes.
std::vector<uint32_t> Uniques;
Uniques.reserve(Entries.size());
for (const auto &E : Entries)
Uniques.push_back(E.second.HashValue);
array_pod_sort(Uniques.begin(), Uniques.end());
std::vector<uint32_t>::iterator P =
std::unique(Uniques.begin(), Uniques.end());
UniqueHashCount = std::distance(Uniques.begin(), P);
if (UniqueHashCount > 1024)
BucketCount = UniqueHashCount / 4;
else if (UniqueHashCount > 16)
BucketCount = UniqueHashCount / 2;
else
BucketCount = std::max<uint32_t>(UniqueHashCount, 1);
}
void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
// Create the individual hash data outputs.
for (auto &E : Entries) {
// Unique the entries.
std::stable_sort(E.second.Values.begin(), E.second.Values.end(),
[](const AccelTableData *A, const AccelTableData *B) {
return *A < *B;
});
E.second.Values.erase(
std::unique(E.second.Values.begin(), E.second.Values.end()),
E.second.Values.end());
}
// Figure out how many buckets we need, then compute the bucket contents and
// the final ordering. The hashes and offsets can be emitted by walking these
// data structures. We add temporary symbols to the data so they can be
// referenced when emitting the offsets.
computeBucketCount();
// Compute bucket contents and final ordering.
Buckets.resize(BucketCount);
for (auto &E : Entries) {
uint32_t Bucket = E.second.HashValue % BucketCount;
Buckets[Bucket].push_back(&E.second);
E.second.Sym = Asm->createTempSymbol(Prefix);
}
// Sort the contents of the buckets by hash value so that hash collisions end
// up together. Stable sort makes testing easier and doesn't cost much more.
for (auto &Bucket : Buckets)
std::stable_sort(Bucket.begin(), Bucket.end(),
[](HashData *LHS, HashData *RHS) {
return LHS->HashValue < RHS->HashValue;
});
}
namespace {
class AccelTableEmitter {
protected:
AsmPrinter *const Asm; ///< Destination.
const AccelTableBase &Contents; ///< Data to emit.
/// Controls whether to emit duplicate hash and offset table entries for names
/// with identical hashes. Apple tables don't emit duplicate entries, DWARF v5
/// tables do.
const bool SkipIdenticalHashes;
void emitHashes() const;
/// Emit offsets to lists of entries with identical names. The offsets are
/// relative to the Base argument.
void emitOffsets(const MCSymbol *Base) const;
public:
AccelTableEmitter(AsmPrinter *Asm, const AccelTableBase &Contents,
bool SkipIdenticalHashes)
: Asm(Asm), Contents(Contents), SkipIdenticalHashes(SkipIdenticalHashes) {
}
};
class AppleAccelTableEmitter : public AccelTableEmitter {
using Atom = AppleAccelTableData::Atom;
/// The fixed header of an Apple Accelerator Table.
struct Header {
uint32_t Magic = MagicHash;
uint16_t Version = 1;
uint16_t HashFunction = dwarf::DW_hash_function_djb;
uint32_t BucketCount;
uint32_t HashCount;
uint32_t HeaderDataLength;
/// 'HASH' magic value to detect endianness.
static const uint32_t MagicHash = 0x48415348;
Header(uint32_t BucketCount, uint32_t UniqueHashCount, uint32_t DataLength)
: BucketCount(BucketCount), HashCount(UniqueHashCount),
HeaderDataLength(DataLength) {}
void emit(AsmPrinter *Asm) const;
#ifndef NDEBUG
void print(raw_ostream &OS) const;
void dump() const { print(dbgs()); }
#endif
};
/// The HeaderData describes the structure of an Apple accelerator table
/// through a list of Atoms.
struct HeaderData {
/// In the case of data that is referenced via DW_FORM_ref_* the offset
/// base is used to describe the offset for all forms in the list of atoms.
uint32_t DieOffsetBase;
const SmallVector<Atom, 4> Atoms;
HeaderData(ArrayRef<Atom> AtomList, uint32_t Offset = 0)
: DieOffsetBase(Offset), Atoms(AtomList.begin(), AtomList.end()) {}
void emit(AsmPrinter *Asm) const;
#ifndef NDEBUG
void print(raw_ostream &OS) const;
void dump() const { print(dbgs()); }
#endif
};
Header Header;
HeaderData HeaderData;
const MCSymbol *SecBegin;
void emitBuckets() const;
void emitData() const;
public:
AppleAccelTableEmitter(AsmPrinter *Asm, const AccelTableBase &Contents,
ArrayRef<Atom> Atoms, const MCSymbol *SecBegin)
: AccelTableEmitter(Asm, Contents, true),
Header(Contents.getBucketCount(), Contents.getUniqueHashCount(),
8 + (Atoms.size() * 4)),
HeaderData(Atoms), SecBegin(SecBegin) {}
void emit() const;
#ifndef NDEBUG
void print(raw_ostream &OS) const;
void dump() const { print(dbgs()); }
#endif
};
} // namespace
void AccelTableEmitter::emitHashes() const {
uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
unsigned BucketIdx = 0;
for (auto &Bucket : Contents.getBuckets()) {
for (auto &Hash : Bucket) {
uint32_t HashValue = Hash->HashValue;
if (SkipIdenticalHashes && PrevHash == HashValue)
continue;
Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(BucketIdx));
Asm->EmitInt32(HashValue);
PrevHash = HashValue;
}
BucketIdx++;
}
}
void AccelTableEmitter::emitOffsets(const MCSymbol *Base) const {
const auto &Buckets = Contents.getBuckets();
uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (auto *Hash : Buckets[i]) {
uint32_t HashValue = Hash->HashValue;
if (SkipIdenticalHashes && PrevHash == HashValue)
continue;
PrevHash = HashValue;
Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
Asm->EmitLabelDifference(Hash->Sym, Base, sizeof(uint32_t));
}
}
}
void AppleAccelTableEmitter::Header::emit(AsmPrinter *Asm) const {
Asm->OutStreamer->AddComment("Header Magic");
Asm->EmitInt32(Magic);
Asm->OutStreamer->AddComment("Header Version");
Asm->EmitInt16(Version);
Asm->OutStreamer->AddComment("Header Hash Function");
Asm->EmitInt16(HashFunction);
Asm->OutStreamer->AddComment("Header Bucket Count");
Asm->EmitInt32(BucketCount);
Asm->OutStreamer->AddComment("Header Hash Count");
Asm->EmitInt32(HashCount);
Asm->OutStreamer->AddComment("Header Data Length");
Asm->EmitInt32(HeaderDataLength);
}
void AppleAccelTableEmitter::HeaderData::emit(AsmPrinter *Asm) const {
Asm->OutStreamer->AddComment("HeaderData Die Offset Base");
Asm->EmitInt32(DieOffsetBase);
Asm->OutStreamer->AddComment("HeaderData Atom Count");
Asm->EmitInt32(Atoms.size());
for (const Atom &A : Atoms) {
Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.Type));
Asm->EmitInt16(A.Type);
Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.Form));
Asm->EmitInt16(A.Form);
}
}
void AppleAccelTableEmitter::emitBuckets() const {
const auto &Buckets = Contents.getBuckets();
unsigned index = 0;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
Asm->OutStreamer->AddComment("Bucket " + Twine(i));
if (!Buckets[i].empty())
Asm->EmitInt32(index);
else
Asm->EmitInt32(std::numeric_limits<uint32_t>::max());
// Buckets point in the list of hashes, not to the data. Do not increment
// the index multiple times in case of hash collisions.
uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
for (auto *HD : Buckets[i]) {
uint32_t HashValue = HD->HashValue;
if (PrevHash != HashValue)
++index;
PrevHash = HashValue;
}
}
}
void AppleAccelTableEmitter::emitData() const {
const auto &Buckets = Contents.getBuckets();
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
for (auto &Hash : Buckets[i]) {
// Terminate the previous entry if there is no hash collision with the
// current one.
if (PrevHash != std::numeric_limits<uint64_t>::max() &&
PrevHash != Hash->HashValue)
Asm->EmitInt32(0);
// Remember to emit the label for our offset.
Asm->OutStreamer->EmitLabel(Hash->Sym);
Asm->OutStreamer->AddComment(Hash->Name.getString());
Asm->emitDwarfStringOffset(Hash->Name);
Asm->OutStreamer->AddComment("Num DIEs");
Asm->EmitInt32(Hash->Values.size());
for (const auto *V : Hash->Values)
static_cast<const AppleAccelTableData *>(V)->emit(Asm);
PrevHash = Hash->HashValue;
}
// Emit the final end marker for the bucket.
if (!Buckets[i].empty())
Asm->EmitInt32(0);
}
}
void AppleAccelTableEmitter::emit() const {
Header.emit(Asm);
HeaderData.emit(Asm);
emitBuckets();
emitHashes();
emitOffsets(SecBegin);
emitData();
}
void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
StringRef Prefix, const MCSymbol *SecBegin,
ArrayRef<AppleAccelTableData::Atom> Atoms) {
Contents.finalize(Asm, Prefix);
AppleAccelTableEmitter(Asm, Contents, Atoms, SecBegin).emit();
}
void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const {
Asm->EmitInt32(Die->getDebugSectionOffset());
}
void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const {
Asm->EmitInt32(Die->getDebugSectionOffset());
Asm->EmitInt16(Die->getTag());
Asm->EmitInt8(0);
}
void AppleAccelTableStaticOffsetData::emit(AsmPrinter *Asm) const {
Asm->EmitInt32(Offset);
}
void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
Asm->EmitInt32(Offset);
Asm->EmitInt16(Tag);
Asm->EmitInt8(ObjCClassIsImplementation ? dwarf::DW_FLAG_type_implementation
: 0);
Asm->EmitInt32(QualifiedNameHash);
}
#ifndef _MSC_VER
// The lines below are rejected by older versions (TBD) of MSVC.
constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
#else
// FIXME: Erase this path once the minimum MSCV version has been bumped.
const SmallVector<AppleAccelTableData::Atom, 4>
AppleAccelTableOffsetData::Atoms = {
Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms =
{Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
const SmallVector<AppleAccelTableData::Atom, 4>
AppleAccelTableStaticOffsetData::Atoms = {
Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
const SmallVector<AppleAccelTableData::Atom, 4>
AppleAccelTableStaticTypeData::Atoms = {
Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
#endif
#ifndef NDEBUG
void AppleAccelTableEmitter::Header::print(raw_ostream &OS) const {
OS << "Magic: " << format("0x%x", Magic) << "\n"
<< "Version: " << Version << "\n"
<< "Hash Function: " << HashFunction << "\n"
<< "Bucket Count: " << BucketCount << "\n"
<< "Header Data Length: " << HeaderDataLength << "\n";
}
void AppleAccelTableData::Atom::print(raw_ostream &OS) const {
OS << "Type: " << dwarf::AtomTypeString(Type) << "\n"
<< "Form: " << dwarf::FormEncodingString(Form) << "\n";
}
void AppleAccelTableEmitter::HeaderData::print(raw_ostream &OS) const {
OS << "DIE Offset Base: " << DieOffsetBase << "\n";
for (auto Atom : Atoms)
Atom.print(OS);
}
void AppleAccelTableEmitter::print(raw_ostream &OS) const {
Header.print(OS);
HeaderData.print(OS);
Contents.print(OS);
SecBegin->print(OS, nullptr);
}
void AccelTableBase::HashData::print(raw_ostream &OS) const {
OS << "Name: " << Name.getString() << "\n";
OS << " Hash Value: " << format("0x%x", HashValue) << "\n";
OS << " Symbol: ";
if (Sym)
OS << *Sym;
else
OS << "<none>";
OS << "\n";
for (auto *Value : Values)
Value->print(OS);
}
void AccelTableBase::print(raw_ostream &OS) const {
// Print Content.
OS << "Entries: \n";
for (const auto &Entry : Entries) {
OS << "Name: " << Entry.first() << "\n";
for (auto *V : Entry.second.Values)
V->print(OS);
}
OS << "Buckets and Hashes: \n";
for (auto &Bucket : Buckets)
for (auto &Hash : Bucket)
Hash->print(OS);
OS << "Data: \n";
for (auto &E : Entries)
E.second.print(OS);
}
void AppleAccelTableOffsetData::print(raw_ostream &OS) const {
OS << " Offset: " << Die->getOffset() << "\n";
}
void AppleAccelTableTypeData::print(raw_ostream &OS) const {
OS << " Offset: " << Die->getOffset() << "\n";
OS << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
}
void AppleAccelTableStaticOffsetData::print(raw_ostream &OS) const {
OS << " Static Offset: " << Offset << "\n";
}
void AppleAccelTableStaticTypeData::print(raw_ostream &OS) const {
OS << " Static Offset: " << Offset << "\n";
OS << " QualifiedNameHash: " << format("%x\n", QualifiedNameHash) << "\n";
OS << " Tag: " << dwarf::TagString(Tag) << "\n";
OS << " ObjCClassIsImplementation: "
<< (ObjCClassIsImplementation ? "true" : "false");
OS << "\n";
}
#endif