Improve DWARF parsing speed by improving DWARFAbbreviationDeclaration

This patch gets a DWARF parsing speed improvement by having DWARFAbbreviationDeclaration instances know if they have a fixed byte size. If an abbreviation has a fixed byte size that can be calculated given a DWARFUnit, then parsing a DIE becomes two steps: parse ULEB128 abbrev code, and then add constant size to the offset.

This patch also adds a fixed byte size to each DWARFAbbreviationDeclaration::AttributeSpec so that attributes can quickly skip their values if needed without the need to lookup the fixed for size.

Notable improvements:

- DWARFAbbreviationDeclaration::findAttributeIndex() now returns an Optional<uint32_t> instead of a uint32_t and we no longer have to look for the magic -1U return value
- Optional<uint32_t> DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute attr) const;
- DWARFAbbreviationDeclaration now has a getAttributeValue() function that extracts an attribute value given a DIE offset that takes advantage of the DWARFAbbreviationDeclaration::AttributeSpec::ByteSize
- bool DWARFAbbreviationDeclaration::getAttributeValue(const uint32_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U, DWARFFormValue &FormValue) const;
- A DWARFAbbreviationDeclaration instance can return a fixed byte size for itself so DWARF parsing is faster:
- Optional<size_t> DWARFAbbreviationDeclaration::getFixedAttributesByteSize(const DWARFUnit &U) const;
- Any functions that used to take a "const DWARFUnit *U" that would crash if U was NULL now take a "const DWARFUnit &U" and are only called with a valid DWARFUnit

Differential Revision: https://reviews.llvm.org/D26567



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286924 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Greg Clayton 2016-11-15 01:23:06 +00:00
parent 648ba5f925
commit b07cdeaee2
7 changed files with 215 additions and 48 deletions

View File

@ -10,20 +10,34 @@
#ifndef LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
#define LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Dwarf.h"
namespace llvm {
class DWARFUnit;
class DWARFFormValue;
class raw_ostream;
class DWARFAbbreviationDeclaration {
public:
struct AttributeSpec {
AttributeSpec(dwarf::Attribute A, dwarf::Form F) : Attr(A), Form(F) {}
AttributeSpec(dwarf::Attribute A, dwarf::Form F, Optional<uint8_t> S)
: Attr(A), Form(F), ByteSize(S) {}
dwarf::Attribute Attr;
dwarf::Form Form;
/// If ByteSize has a value, then it contains the fixed size in bytes for
/// the Form in this object. If ByteSize doesn't have a value, then the
/// byte size of Form either varies according to the DWARFUnit that it is
/// contained in or the value size varies and must be decoded from the
/// debug information in order to determine its size.
Optional<uint8_t> ByteSize;
/// Get the fixed byte size of this Form if possible. This function might
/// use the DWARFUnit to calculate the size of the Form, like for
/// DW_AT_address and DW_AT_ref_addr, so this isn't just an accessor for
/// the ByteSize member.
Optional<uint8_t> getByteSize(const DWARFUnit &U) const;
};
typedef SmallVector<AttributeSpec, 8> AttributeSpecVector;
@ -46,18 +60,70 @@ public:
return dwarf::Form(0);
}
uint32_t findAttributeIndex(dwarf::Attribute attr) const;
/// Get the index of the specified attribute.
///
/// Searches the this abbreviation declaration for the index of the specified
/// attribute.
///
/// \param Attr DWARF attribute to search for.
/// \returns Optional index of the attribute if found, None otherwise.
Optional<uint32_t> findAttributeIndex(dwarf::Attribute attr) const;
/// Extract a DWARF form value from a DIE specified by DIE offset.
///
/// Extract an attribute value for a DWARFUnit given the DIE offset and the
/// attribute.
///
/// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation
/// code in the .debug_info data.
/// \param Attr DWARF attribute to search for.
/// \param U the DWARFUnit the contains the DIE.
/// \param FormValue the form value that will be filled in.
/// \returns true if the attribute was extracted into \p FormValue.
bool getAttributeValue(const uint32_t DIEOffset, const dwarf::Attribute Attr,
const DWARFUnit &U, DWARFFormValue &FormValue) const;
bool extract(DataExtractor Data, uint32_t* OffsetPtr);
void dump(raw_ostream &OS) const;
// Return an optional byte size of all attribute data in this abbreviation
// if a constant byte size can be calculated given a DWARFUnit. This allows
// DWARF parsing to be faster as many DWARF DIEs have a fixed byte size.
Optional<size_t> getFixedAttributesByteSize(const DWARFUnit &U) const;
private:
void clear();
/// A helper structure that can quickly determine the size in bytes of an
/// abbreviation declaration.
struct FixedSizeInfo {
/// The fixed byte size for fixed size forms.
uint16_t NumBytes;
/// Number of DW_FORM_address forms in this abbrevation declaration.
uint8_t NumAddrs;
/// Number of DW_FORM_ref_addr forms in this abbrevation declaration.
uint8_t NumRefAddrs;
/// Number of 4 byte in DWARF32 and 8 byte in DWARF64 forms.
uint8_t NumDwarfOffsets;
/// Constructor
FixedSizeInfo()
: NumBytes(0), NumAddrs(0), NumRefAddrs(0), NumDwarfOffsets(0) {}
/// Calculate the fixed size in bytes given a DWARFUnit.
///
/// \param U the DWARFUnit to use when determing the byte size.
/// \returns the size in bytes for all attribute data in this abbreviation.
/// The returned size does not include bytes for the ULEB128 abbreviation
/// code
size_t getByteSize(const DWARFUnit &U) const;
};
uint32_t Code;
dwarf::Tag Tag;
uint8_t CodeByteSize;
bool HasChildren;
AttributeSpecVector AttributeSpecs;
/// If this abbreviation has a fixed byte size then FixedAttributeSize member
/// variable below will have a value.
Optional<FixedSizeInfo> FixedAttributeSize;
};
}

View File

@ -47,7 +47,10 @@ public:
/// Extracts a debug info entry, which is a child of a given unit,
/// starting at a given offset. If DIE can't be extracted, returns false and
/// doesn't change OffsetPtr.
bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr);
bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr);
/// High performance extraction should use this call.
bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
const DataExtractor &DebugInfoData, uint32_t UEndOffset);
uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
bool isNULL() const { return AbbrevDecl == nullptr; }

View File

@ -56,6 +56,7 @@ private:
public:
DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F), U(nullptr) {}
dwarf::Form getForm() const { return Form; }
void setForm(dwarf::Form F) { Form = F; }
bool isFormClass(FormClass FC) const;
const DWARFUnit *getUnit() const { return U; }
void dump(raw_ostream &OS) const;

View File

@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@ -17,8 +19,10 @@ using namespace dwarf;
void DWARFAbbreviationDeclaration::clear() {
Code = 0;
Tag = DW_TAG_null;
CodeByteSize = 0;
HasChildren = false;
AttributeSpecs.clear();
FixedAttributeSize.reset();
}
DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
@ -29,10 +33,12 @@ bool
DWARFAbbreviationDeclaration::extract(DataExtractor Data,
uint32_t* OffsetPtr) {
clear();
const uint32_t Offset = *OffsetPtr;
Code = Data.getULEB128(OffsetPtr);
if (Code == 0) {
return false;
}
CodeByteSize = *OffsetPtr - Offset;
Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr));
if (Tag == DW_TAG_null) {
clear();
@ -40,12 +46,52 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data,
}
uint8_t ChildrenByte = Data.getU8(OffsetPtr);
HasChildren = (ChildrenByte == DW_CHILDREN_yes);
// Assign a value to our optional FixedAttributeSize member variable. If
// this member variable still has a value after the while loop below, then
// all attribute data in this abbreviation declaration has a fixed byte size.
FixedAttributeSize = FixedSizeInfo();
// Read all of the abbreviation attributes and forms.
while (true) {
auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr));
auto F = static_cast<Form>(Data.getULEB128(OffsetPtr));
if (A && F) {
AttributeSpecs.push_back(AttributeSpec(A, F));
auto FixedFormByteSize = DWARFFormValue::getFixedByteSize(F);
AttributeSpecs.push_back(AttributeSpec(A, F, FixedFormByteSize));
// If this abbrevation still has a fixed byte size, then update the
// FixedAttributeSize as needed.
if (FixedAttributeSize) {
if (FixedFormByteSize)
FixedAttributeSize->NumBytes += *FixedFormByteSize;
else {
switch (F) {
case DW_FORM_addr:
++FixedAttributeSize->NumAddrs;
break;
case DW_FORM_ref_addr:
++FixedAttributeSize->NumRefAddrs;
break;
case DW_FORM_strp:
case DW_FORM_GNU_ref_alt:
case DW_FORM_GNU_strp_alt:
case DW_FORM_line_strp:
case DW_FORM_sec_offset:
case DW_FORM_strp_sup:
case DW_FORM_ref_sup:
++FixedAttributeSize->NumDwarfOffsets;
break;
default:
// Indicate we no longer have a fixed byte size for this
// abbreviation by clearing the FixedAttributeSize optional value
// so it doesn't have a value.
FixedAttributeSize.reset();
break;
}
}
}
} else if (A == 0 && F == 0) {
// We successfully reached the end of this abbreviation declaration
// since both attribute and form are zero.
@ -88,11 +134,64 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
OS << '\n';
}
uint32_t
DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute attr) const {
Optional<uint32_t>
DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const {
for (uint32_t i = 0, e = AttributeSpecs.size(); i != e; ++i) {
if (AttributeSpecs[i].Attr == attr)
if (AttributeSpecs[i].Attr == Attr)
return i;
}
return -1U;
return None;
}
bool DWARFAbbreviationDeclaration::getAttributeValue(
const uint32_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U,
DWARFFormValue &FormValue) const {
Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
if (!MatchAttrIndex)
return false;
auto DebugInfoData = U.getDebugInfoExtractor();
// Add the byte size of ULEB that for the abbrev Code so we can start
// skipping the attribute data.
uint32_t Offset = DIEOffset + CodeByteSize;
uint32_t AttrIndex = 0;
for (const auto &Spec : AttributeSpecs) {
if (*MatchAttrIndex == AttrIndex) {
// We have arrived at the attribute to extract, extract if from Offset.
FormValue.setForm(Spec.Form);
return FormValue.extractValue(DebugInfoData, &Offset, &U);
}
// March Offset along until we get to the attribute we want.
if (Optional<uint8_t> FixedSize = Spec.getByteSize(U))
Offset += *FixedSize;
else
DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, &U);
++AttrIndex;
}
return false;
}
size_t DWARFAbbreviationDeclaration::FixedSizeInfo::getByteSize(
const DWARFUnit &U) const {
size_t ByteSize = NumBytes;
if (NumAddrs)
ByteSize += NumAddrs * U.getAddressByteSize();
if (NumRefAddrs)
ByteSize += NumRefAddrs * U.getRefAddrByteSize();
if (NumDwarfOffsets)
ByteSize += NumDwarfOffsets * U.getDwarfOffsetByteSize();
return ByteSize;
}
Optional<uint8_t> DWARFAbbreviationDeclaration::AttributeSpec::getByteSize(
const DWARFUnit &U) const {
return ByteSize ? ByteSize : DWARFFormValue::getFixedByteSize(Form, &U);
}
Optional<size_t> DWARFAbbreviationDeclaration::getFixedAttributesByteSize(
const DWARFUnit &U) const {
if (FixedAttributeSize)
return FixedAttributeSize->getByteSize(U);
return None;
}

View File

@ -30,7 +30,7 @@ static const DWARFUnit *findUnitAndExtractFast(DWARFDebugInfoEntryMinimal &DIE,
const DWARFUnit *Unit,
uint32_t *Offset) {
Unit = Unit->getUnitSection().getUnitForOffset(*Offset);
return (Unit && DIE.extractFast(Unit, Offset)) ? Unit : nullptr;
return (Unit && DIE.extractFast(*Unit, Offset)) ? Unit : nullptr;
}
void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, DWARFUnit *u,
@ -183,11 +183,17 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
OS << ")\n";
}
bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U,
bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit &U,
uint32_t *OffsetPtr) {
DataExtractor DebugInfoData = U.getDebugInfoExtractor();
const uint32_t UEndOffset = U.getNextUnitOffset();
return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset);
}
bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit &U,
uint32_t *OffsetPtr,
const DataExtractor &DebugInfoData,
uint32_t UEndOffset) {
Offset = *OffsetPtr;
DataExtractor DebugInfoData = U->getDebugInfoExtractor();
uint32_t UEndOffset = U->getNextUnitOffset();
if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset))
return false;
uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
@ -196,21 +202,29 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U,
AbbrevDecl = nullptr;
return true;
}
AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
AbbrevDecl = U.getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
if (nullptr == AbbrevDecl) {
// Restore the original offset.
*OffsetPtr = Offset;
return false;
}
// See if all attributes in this DIE have fixed byte sizes. If so, we can
// just add this size to the offset to skip to the next DIE.
if (Optional<size_t> FixedSize = AbbrevDecl->getFixedAttributesByteSize(U)) {
*OffsetPtr += *FixedSize;
return true;
}
// Skip all data in the .debug_info for the attributes
for (const auto &AttrSpec : AbbrevDecl->attributes()) {
auto Form = AttrSpec.Form;
if (Optional<uint8_t> FixedSize = DWARFFormValue::getFixedByteSize(Form, U))
// Check if this attribute has a fixed byte size.
if (Optional<uint8_t> FixedSize = AttrSpec.getByteSize(U)) {
// Attribute byte size if fixed, just add the size to the offset.
*OffsetPtr += *FixedSize;
else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U)) {
// Restore the original offset.
} else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData,
OffsetPtr, &U)) {
// We failed to skip this attribute's value, restore the original offset
// and return the failure status.
*OffsetPtr = Offset;
return false;
}
@ -230,27 +244,9 @@ bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const {
bool DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFUnit *U,
dwarf::Attribute Attr, DWARFFormValue &FormValue) const {
if (!AbbrevDecl)
if (!AbbrevDecl || !U)
return false;
uint32_t AttrIdx = AbbrevDecl->findAttributeIndex(Attr);
if (AttrIdx == -1U)
return false;
DataExtractor DebugInfoData = U->getDebugInfoExtractor();
uint32_t DebugInfoOffset = getOffset();
// Skip the abbreviation code so we are at the data for the attributes
DebugInfoData.getULEB128(&DebugInfoOffset);
// Skip preceding attribute values.
for (uint32_t i = 0; i < AttrIdx; ++i) {
DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(i),
DebugInfoData, &DebugInfoOffset, U);
}
FormValue = DWARFFormValue(AbbrevDecl->getFormByIndex(AttrIdx));
return FormValue.extractValue(DebugInfoData, &DebugInfoOffset, U);
return AbbrevDecl->getAttributeValue(Offset, Attr, *U, FormValue);
}
const char *DWARFDebugInfoEntryMinimal::getAttributeValueAsString(

View File

@ -196,10 +196,11 @@ void DWARFUnit::extractDIEsToVector(
uint32_t DIEOffset = Offset + getHeaderSize();
uint32_t NextCUOffset = getNextUnitOffset();
DWARFDebugInfoEntryMinimal DIE;
DataExtractor DebugInfoData = getDebugInfoExtractor();
uint32_t Depth = 0;
bool IsCUDie = true;
while (DIEOffset < NextCUOffset && DIE.extractFast(this, &DIEOffset)) {
while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset)) {
if (IsCUDie) {
if (AppendCUDie)
Dies.push_back(DIE);

View File

@ -2082,20 +2082,21 @@ unsigned DwarfLinker::shouldKeepVariableDIE(RelocationManager &RelocMgr,
// Global variables with constant value can always be kept.
if (!(Flags & TF_InFunctionScope) &&
Abbrev->findAttributeIndex(dwarf::DW_AT_const_value) != -1U) {
Abbrev->findAttributeIndex(dwarf::DW_AT_const_value)) {
MyInfo.InDebugMap = true;
return Flags | TF_Keep;
}
uint32_t LocationIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_location);
if (LocationIdx == -1U)
Optional<uint32_t> LocationIdx =
Abbrev->findAttributeIndex(dwarf::DW_AT_location);
if (!LocationIdx)
return Flags;
uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode());
const DWARFUnit &OrigUnit = Unit.getOrigUnit();
uint32_t LocationOffset, LocationEndOffset;
std::tie(LocationOffset, LocationEndOffset) =
getAttributeOffsets(Abbrev, LocationIdx, Offset, OrigUnit);
getAttributeOffsets(Abbrev, *LocationIdx, Offset, OrigUnit);
// See if there is a relocation to a valid debug map entry inside
// this variable's location. The order is important here. We want to
@ -2122,15 +2123,15 @@ unsigned DwarfLinker::shouldKeepSubprogramDIE(
Flags |= TF_InFunctionScope;
uint32_t LowPcIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_low_pc);
if (LowPcIdx == -1U)
Optional<uint32_t> LowPcIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_low_pc);
if (!LowPcIdx)
return Flags;
uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode());
const DWARFUnit &OrigUnit = Unit.getOrigUnit();
uint32_t LowPcOffset, LowPcEndOffset;
std::tie(LowPcOffset, LowPcEndOffset) =
getAttributeOffsets(Abbrev, LowPcIdx, Offset, OrigUnit);
getAttributeOffsets(Abbrev, *LowPcIdx, Offset, OrigUnit);
uint64_t LowPc =
DIE.getAttributeValueAsAddress(&OrigUnit, dwarf::DW_AT_low_pc, -1ULL);