[BOLT][DWARF] Add ability to insert new entries in to DIE

Added ability to append new entries to DIE. This is useful to standadize DWARF4
Split Dwarf, and simplify implementation of DWARF5.
Multiple DIEs can share an abbrev. So currently limitation is that only unique
Attributes can be added.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D119577
This commit is contained in:
Alexander Yermolovich 2022-02-15 18:06:03 -08:00
parent 13b6f31548
commit bd1ebe9d04
4 changed files with 262 additions and 69 deletions

View File

@ -494,7 +494,8 @@ public:
PatchValueVariable,
ReferencePatchValue,
DWARFUnitOffsetBaseLabel,
DestinationReferenceLabel
DestinationReferenceLabel,
NewDebugEntry
};
struct Patch {
@ -605,6 +606,22 @@ public:
}
};
struct NewDebugEntry : public Patch {
NewDebugEntry() = delete;
NewDebugEntry(uint32_t O, std::string &&V)
: Patch(O, DebugPatchKind::NewDebugEntry) {
CurrentOrder = NewDebugEntry::OrderCounter++;
Value = std::move(V);
}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::NewDebugEntry;
}
static uint32_t OrderCounter;
uint32_t CurrentOrder;
std::string Value;
};
virtual PatcherKind getKind() const override {
return PatcherKind::DebugInfoBinaryPatcher;
}
@ -646,6 +663,12 @@ public:
void addReferenceToPatch(uint64_t Offset, uint32_t DestinationOffset,
uint32_t OldValueSize, dwarf::Form Form);
/// Inserts a new uint32_t \p Value at the end of \p DIE .
void insertNewEntry(const DWARFDie &DIE, uint32_t);
/// Inserts a new encoded \p Value at the end of \p DIE .
void insertNewEntry(const DWARFDie &DIE, std::string &&Value);
/// Clears unordered set for DestinationLabels.
void clearDestinationLabels() { DestinationLabels.clear(); }
@ -685,6 +708,9 @@ private:
case DebugPatchKind::DestinationReferenceLabel:
delete reinterpret_cast<DestinationReferenceLabel *>(P);
break;
case DebugPatchKind::NewDebugEntry:
delete reinterpret_cast<NewDebugEntry *>(P);
break;
}
}
};
@ -728,10 +754,19 @@ class DebugAbbrevWriter {
uint8_t NewAttrForm;
};
struct AbbrevEntry {
dwarf::Attribute Attr;
dwarf::Form Form;
};
using PatchesTy = std::unordered_map<const DWARFAbbreviationDeclaration *,
SmallVector<PatchInfo, 2>>;
std::unordered_map<const DWARFUnit *, PatchesTy> Patches;
using AbbrevEntryTy = std::unordered_map<const DWARFAbbreviationDeclaration *,
SmallVector<AbbrevEntry, 2>>;
std::unordered_map<const DWARFUnit *, AbbrevEntryTy> NewAbbrevEntries;
/// DWARF context containing abbreviations.
DWARFContext &Context;
@ -777,6 +812,27 @@ public:
PatchInfo{AttrTag, NewAttrTag, NewAttrForm});
}
/// Adds attribute \p AttrTag and \p NewAttrForm in abbreviation declaration
/// \p Abbrev belonging to CU \p Unit .
void addAttribute(const DWARFUnit &Unit,
const DWARFAbbreviationDeclaration *Abbrev,
dwarf::Attribute AttrTag, dwarf::Form AttrForm) {
assert(&Unit.getContext() == &Context &&
"cannot update attribute from a different DWARF context");
std::lock_guard<std::mutex> Lock(WriterMutex);
bool AlreadyAdded = false;
for (AbbrevEntry &E : NewAbbrevEntries[&Unit][Abbrev])
if (E.Attr == AttrTag) {
AlreadyAdded = true;
break;
}
if (AlreadyAdded)
return;
NewAbbrevEntries[&Unit][Abbrev].emplace_back(
AbbrevEntry{AttrTag, AttrForm});
}
/// Return a buffer with concatenated abbrev sections for all CUs and TUs
/// in the associated DWARF context. Section offsets could be queried using
/// getAbbreviationsOffsetForUnit() interface. For DWP, we are using DWOId
@ -882,6 +938,17 @@ public:
}
};
struct AttrInfo {
DWARFFormValue V;
uint64_t Offset;
uint32_t Size; // Size of the attribute.
};
Optional<AttrInfo>
findAttributeInfo(const DWARFDie DIE,
const DWARFAbbreviationDeclaration *AbbrevDecl,
uint32_t Index);
} // namespace bolt
} // namespace llvm

View File

@ -39,6 +39,43 @@ class MCSymbol;
namespace bolt {
/// Finds attributes FormValue and Offset.
///
/// \param DIE die to look up in.
/// \param Index the attribute index to extract.
/// \return an optional AttrInfo with DWARFFormValue and Offset.
Optional<AttrInfo>
findAttributeInfo(const DWARFDie DIE,
const DWARFAbbreviationDeclaration *AbbrevDecl,
uint32_t Index) {
const DWARFUnit &U = *DIE.getDwarfUnit();
uint64_t Offset =
AbbrevDecl->getAttributeOffsetFromIndex(Index, DIE.getOffset(), U);
Optional<DWARFFormValue> Value =
AbbrevDecl->getAttributeValueFromOffset(Index, Offset, U);
if (!Value)
return None;
// AttributeSpec
const DWARFAbbreviationDeclaration::AttributeSpec *AttrVal =
AbbrevDecl->attributes().begin() + Index;
uint32_t ValSize = 0;
Optional<int64_t> ValSizeOpt = AttrVal->getByteSize(U);
if (ValSizeOpt) {
ValSize = static_cast<uint32_t>(*ValSizeOpt);
} else {
DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
uint64_t NewOffset = Offset;
DWARFFormValue::skipValue(Value->getForm(), DebugInfoData, &NewOffset,
U.getFormParams());
// This includes entire size of the entry, which might not be just the
// encoding part. For example for DW_AT_loc it will include expression
// location.
ValSize = NewOffset - Offset;
}
return AttrInfo{*Value, Offset, ValSize};
}
const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0};
namespace {
@ -384,6 +421,40 @@ void DebugInfoBinaryPatcher::addDestinationReferenceLabel(uint64_t Offset) {
DebugPatches.emplace_back(new DestinationReferenceLabel(Offset));
}
static std::string encodeLE(size_t ByteSize, uint64_t NewValue) {
std::string LE64(ByteSize, 0);
for (size_t I = 0; I < ByteSize; ++I) {
LE64[I] = NewValue & 0xff;
NewValue >>= 8;
}
return LE64;
}
void DebugInfoBinaryPatcher::insertNewEntry(const DWARFDie &DIE,
uint32_t Value) {
std::string StrValue = encodeLE(4, Value);
insertNewEntry(DIE, std::move(StrValue));
}
void DebugInfoBinaryPatcher::insertNewEntry(const DWARFDie &DIE,
std::string &&Value) {
const DWARFAbbreviationDeclaration *AbbrevDecl =
DIE.getAbbreviationDeclarationPtr();
// In case this DIE has no attributes.
uint32_t Offset = DIE.getOffset() + 1;
size_t NumOfAttributes = AbbrevDecl->getNumAttributes();
if (NumOfAttributes) {
Optional<AttrInfo> Val =
findAttributeInfo(DIE, AbbrevDecl, NumOfAttributes - 1);
assert(Val && "Invalid Value.");
Offset = Val->Offset + Val->Size - DWPUnitOffset;
}
std::lock_guard<std::mutex> Lock(WriterMutex);
DebugPatches.emplace_back(new NewDebugEntry(Offset, std::move(Value)));
}
void DebugInfoBinaryPatcher::addReferenceToPatch(uint64_t Offset,
uint32_t DestinationOffset,
uint32_t OldValueSize,
@ -430,15 +501,6 @@ void SimpleBinaryPatcher::addBytePatch(uint64_t Offset, uint8_t Value) {
Patches.emplace_back(Offset, std::move(Str));
}
static std::string encodeLE(size_t ByteSize, uint64_t NewValue) {
std::string LE64(ByteSize, 0);
for (size_t I = 0; I < ByteSize; ++I) {
LE64[I] = NewValue & 0xff;
NewValue >>= 8;
}
return LE64;
}
void SimpleBinaryPatcher::addLEPatch(uint64_t Offset, uint64_t NewValue,
size_t ByteSize) {
Patches.emplace_back(Offset, encodeLE(ByteSize, NewValue));
@ -481,6 +543,18 @@ CUOffsetMap DebugInfoBinaryPatcher::computeNewOffsets(DWARFContext &DWCtx,
CUOffsetMap CUMap;
std::sort(DebugPatches.begin(), DebugPatches.end(),
[](const UniquePatchPtrType &V1, const UniquePatchPtrType &V2) {
if (V1.get()->Offset == V2.get()->Offset) {
if (V1->Kind == DebugPatchKind::NewDebugEntry &&
V2->Kind == DebugPatchKind::NewDebugEntry)
return reinterpret_cast<const NewDebugEntry *>(V1.get())
->CurrentOrder <
reinterpret_cast<const NewDebugEntry *>(V2.get())
->CurrentOrder;
// This is a case where we are modifying first entry of next
// DIE, and adding a new one.
return V1->Kind == DebugPatchKind::NewDebugEntry;
}
return V1.get()->Offset < V2.get()->Offset;
});
@ -541,12 +615,19 @@ CUOffsetMap DebugInfoBinaryPatcher::computeNewOffsets(DWARFContext &DWCtx,
CUMap[PreviousOffset].Length += PreviousChangeInSize;
PreviousChangeInSize = 0;
PreviousOffset = CUOffset;
break;
}
case DebugPatchKind::NewDebugEntry: {
NewDebugEntry *NDE = reinterpret_cast<NewDebugEntry *>(P);
PreviousChangeInSize += NDE->Value.size();
break;
}
}
}
CUMap[PreviousOffset].Length += PreviousChangeInSize;
return CUMap;
}
uint32_t DebugInfoBinaryPatcher::NewDebugEntry::OrderCounter = 0;
std::string DebugInfoBinaryPatcher::patchBinary(StringRef BinaryContents) {
std::string NewBinaryContents;
@ -644,9 +725,17 @@ std::string DebugInfoBinaryPatcher::patchBinary(StringRef BinaryContents) {
LengthPatches.push_back({NewCUOffset, 0});
break;
}
case DebugPatchKind::NewDebugEntry: {
NewDebugEntry *NDE = reinterpret_cast<NewDebugEntry *>(P);
Offset = NDE->Offset;
OldValueSize = 0;
ByteSequence = NDE->Value;
break;
}
}
assert(Offset + ByteSequence.size() <= BinaryContents.size() &&
assert((P->Kind == DebugPatchKind::NewDebugEntry ||
Offset + ByteSequence.size() <= BinaryContents.size()) &&
"Applied patch runs over binary size.");
uint32_t Length = Offset - StartOffset;
NewBinaryContents.append(BinaryContents.substr(StartOffset, Length).data(),
@ -699,6 +788,7 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
return;
const PatchesTy &UnitPatches = Patches[&Unit];
const AbbrevEntryTy &AbbrevEntries = NewAbbrevEntries[&Unit];
// We are duplicating abbrev sections, to handle the case where for one CU we
// modify it, but for another we don't.
@ -706,6 +796,7 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
AbbrevData &UnitData = *UnitDataPtr.get();
UnitData.Buffer = std::make_unique<DebugBufferVector>();
UnitData.Stream = std::make_unique<raw_svector_ostream>(*UnitData.Buffer);
raw_svector_ostream &OS = *UnitData.Stream.get();
// Returns true if AbbrevData is re-used, false otherwise.
@ -724,7 +815,7 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
};
// Take a fast path if there are no patches to apply. Simply copy the original
// contents.
if (UnitPatches.empty()) {
if (UnitPatches.empty() && AbbrevEntries.empty()) {
StringRef AbbrevSectionContents =
Unit.isDWOUnit() ? Unit.getContext().getDWARFObj().getAbbrevDWOSection()
: Unit.getContext().getDWARFObj().getAbbrevSection();
@ -808,7 +899,14 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
if (AttrSpec.isImplicitConst())
encodeSLEB128(AttrSpec.getImplicitConstValue(), OS);
}
const auto Entries = AbbrevEntries.find(&Abbrev);
// Adding new Abbrevs for inserted entries.
if (Entries != AbbrevEntries.end()) {
for (const AbbrevEntry &Entry : Entries->second) {
encodeULEB128(Entry.Attr, OS);
encodeULEB128(Entry.Form, OS);
}
}
encodeULEB128(0, OS);
encodeULEB128(0, OS);
}

View File

@ -51,49 +51,8 @@ static void printDie(const DWARFDie &DIE) {
DIE.dump(dbgs(), 0, DumpOpts);
}
struct AttrInfo {
DWARFFormValue V;
uint64_t Offset;
uint32_t Size; // Size of the attribute.
};
/// Finds attributes FormValue and Offset.
///
/// \param DIE die to look up in.
/// \param Index the attribute index to extract.
/// \return an optional AttrInfo with DWARFFormValue and Offset.
static Optional<AttrInfo>
findAttributeInfo(const DWARFDie DIE,
const DWARFAbbreviationDeclaration *AbbrevDecl,
uint32_t Index) {
const DWARFUnit &U = *DIE.getDwarfUnit();
uint64_t Offset =
AbbrevDecl->getAttributeOffsetFromIndex(Index, DIE.getOffset(), U);
Optional<DWARFFormValue> Value =
AbbrevDecl->getAttributeValueFromOffset(Index, Offset, U);
if (!Value)
return None;
// AttributeSpec
const DWARFAbbreviationDeclaration::AttributeSpec *AttrVal =
AbbrevDecl->attributes().begin() + Index;
uint32_t ValSize = 0;
Optional<int64_t> ValSizeOpt = AttrVal->getByteSize(U);
if (ValSizeOpt) {
ValSize = static_cast<uint32_t>(*ValSizeOpt);
} else {
DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
uint64_t NewOffset = Offset;
DWARFFormValue::skipValue(Value->getForm(), DebugInfoData, &NewOffset,
U.getFormParams());
// This includes entire size of the entry, which might not be just the
// encoding part. For example for DW_AT_loc it will include expression
// location.
ValSize = NewOffset - Offset;
}
return AttrInfo{*Value, Offset, ValSize};
}
namespace llvm {
namespace bolt {
/// Finds attributes FormValue and Offset.
///
/// \param DIE die to look up in.
@ -112,6 +71,8 @@ static Optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
return None;
return findAttributeInfo(DIE, AbbrevDecl, *Index);
}
} // namespace bolt
} // namespace llvm
using namespace llvm;
using namespace llvm::support::endian;
@ -691,12 +652,11 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
return;
}
AbbrevWriter.addAttributePatch(
*DIE.getDwarfUnit(), AbbreviationDecl, dwarf::DW_AT_low_pc,
dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_indirect);
DebugInfoPatcher.addUDataPatch(LowPCAttrInfo->Offset, dwarf::DW_FORM_udata,
1);
DebugInfoPatcher.addUDataPatch(LowPCAttrInfo->Offset + 1, *RangesBase, 7);
AbbrevWriter.addAttribute(*DIE.getDwarfUnit(), AbbreviationDecl,
dwarf::DW_AT_GNU_ranges_base,
dwarf::DW_FORM_sec_offset);
reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
.insertNewEntry(DIE, *RangesBase);
return;
}
@ -1458,9 +1418,8 @@ void DWARFRewriter::convertToRangesPatchAbbrev(
// there.
if (RangesBase) {
assert(LowPCForm != dwarf::DW_FORM_GNU_addr_index);
AbbrevWriter.addAttributePatch(Unit, Abbrev, dwarf::DW_AT_low_pc,
dwarf::DW_AT_GNU_ranges_base,
dwarf::DW_FORM_sec_offset);
AbbrevWriter.addAttribute(Unit, Abbrev, dwarf::DW_AT_GNU_ranges_base,
dwarf::DW_FORM_sec_offset);
}
AbbrevWriter.addAttributePatch(Unit, Abbrev, dwarf::DW_AT_high_pc,
@ -1486,11 +1445,11 @@ void DWARFRewriter::convertToRangesPatchDebugInfo(
// Ranges are relative to DW_AT_GNU_ranges_base.
BaseOffset = DebugInfoPatcher.getRangeBase();
} else {
// If case DW_AT_low_pc was converted into DW_AT_GNU_ranges_base
DebugInfoPatcher.addLE64Patch(LowPCOffset, 0);
// If DW_AT_GNU_ranges_base was inserted.
if (RangesBase)
DebugInfoPatcher.addLE32Patch(LowPCOffset, *RangesBase, 8);
else
DebugInfoPatcher.addLE64Patch(LowPCOffset, 0);
reinterpret_cast<DebugInfoBinaryPatcher &>(DebugInfoPatcher)
.insertNewEntry(DIE, *RangesBase);
}
DebugInfoPatcher.addLE32Patch(HighPCOffset, RangesSectionOffset - BaseOffset,
HighPCVal->Size);

View File

@ -0,0 +1,69 @@
; RUN: rm -rf %t
; RUN: mkdir %t
; RUN: cd %t
; RUN: llc -split-dwarf-file=foo.dwo -split-dwarf-output=foo.dwo -O0 -mtriple=x86_64-unknown-linux-gnu -filetype=obj %s -o=foo.o
; RUN: %clang %cflags foo.o -o foo.exe
; RUN: llvm-bolt foo.exe -o foo.exe.bolt --update-debug-sections
; RUN: llvm-dwarfdump --debug-info foo.exe | FileCheck -check-prefix=PRE-BOLT %s
; RUN: llvm-dwarfdump --debug-info foo.exe.bolt | FileCheck %s
; This tests checks that DW_AT_GNU_ranges_base is added at the end of the CU.
; PRE-BOLT: DW_AT_GNU_addr_base
; PRE-BOLT-NOT: DW_AT_GNU_ranges_base
; CHECK: DW_AT_GNU_addr_base
; CHECK-NEXT: DW_AT_GNU_ranges_base
; int foo() {
; return 3;
; }
;
; int main() {
; return foo();
; }
; ModuleID = 'main.cpp'
source_filename = "main.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: mustprogress noinline nounwind optnone uwtable
define dso_local noundef i32 @_Z3foov() #0 !dbg !8 {
entry:
ret i32 3, !dbg !13
}
; Function Attrs: mustprogress noinline norecurse nounwind optnone uwtable
define dso_local noundef i32 @main() #1 !dbg !14 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%call = call noundef i32 @_Z3foov(), !dbg !15
ret i32 %call, !dbg !16
}
attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress noinline norecurse nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6}
!llvm.ident = !{!7}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "main.dwo", emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: GNU)
!1 = !DIFile(filename: "main.cpp", directory: ".")
!2 = !{i32 7, !"Dwarf Version", i32 4}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"uwtable", i32 1}
!6 = !{i32 7, !"frame-pointer", i32 2}
!7 = !{!"clang"}
!8 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
!9 = !DISubroutineType(types: !10)
!10 = !{!11}
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!12 = !{}
!13 = !DILocation(line: 2, column: 3, scope: !8)
!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
!15 = !DILocation(line: 6, column: 10, scope: !14)
!16 = !DILocation(line: 6, column: 3, scope: !14)