[BOLT][DWARF] Don't create extra .debug_str_offsets contributions

With ThinLTO mutliple CUs can share the same .debug_str_offsets contribution. We
were creating a new one for each CU. This lead to a binary size increase.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D139214
This commit is contained in:
Alexander Yermolovich 2022-12-07 12:22:58 -08:00
parent a5bd76a6e3
commit f7a2131766
4 changed files with 335 additions and 12 deletions

View File

@ -431,7 +431,7 @@ public:
void updateAddressMap(uint32_t Index, uint32_t Address);
/// Writes out current sections entry into .debug_str_offsets.
void finalizeSection();
void finalizeSection(DWARFUnit &Unit);
/// Returns False if no strings were added to .debug_str.
bool isFinalized() const { return !StrOffsetsBuffer->empty(); }
@ -445,8 +445,10 @@ private:
std::unique_ptr<DebugStrOffsetsBufferVector> StrOffsetsBuffer;
std::unique_ptr<raw_svector_ostream> StrOffsetsStream;
std::map<uint32_t, uint32_t> IndexToAddressMap;
DenseSet<uint64_t> ProcessedBaseOffsets;
// Section size not including header.
uint32_t CurrentSectionSize{0};
bool StrOffsetSectionWasModified = false;
};
using DebugStrBufferVector = SmallVector<char, 16>;

View File

@ -1083,20 +1083,37 @@ void DebugStrOffsetsWriter::initialize(
void DebugStrOffsetsWriter::updateAddressMap(uint32_t Index, uint32_t Address) {
assert(IndexToAddressMap.count(Index) > 0 && "Index is not found.");
IndexToAddressMap[Index] = Address;
StrOffsetSectionWasModified = true;
}
void DebugStrOffsetsWriter::finalizeSection() {
void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit) {
if (IndexToAddressMap.empty())
return;
// Writing out the header for each section.
support::endian::write(*StrOffsetsStream, CurrentSectionSize + 4,
support::little);
support::endian::write(*StrOffsetsStream, static_cast<uint16_t>(5),
support::little);
support::endian::write(*StrOffsetsStream, static_cast<uint16_t>(0),
support::little);
for (const auto &Entry : IndexToAddressMap)
support::endian::write(*StrOffsetsStream, Entry.second, support::little);
std::optional<AttrInfo> AttrVal =
findAttributeInfo(Unit.getUnitDIE(), dwarf::DW_AT_str_offsets_base);
assert(AttrVal && "DW_AT_str_offsets_base not present.");
std::optional<uint64_t> Val = AttrVal->V.getAsSectionOffset();
assert(Val && "DW_AT_str_offsets_base Value not present.");
auto RetVal = ProcessedBaseOffsets.insert(*Val);
if (RetVal.second) {
// Writing out the header for each section.
support::endian::write(*StrOffsetsStream, CurrentSectionSize + 4,
support::little);
support::endian::write(*StrOffsetsStream, static_cast<uint16_t>(5),
support::little);
support::endian::write(*StrOffsetsStream, static_cast<uint16_t>(0),
support::little);
for (const auto &Entry : IndexToAddressMap)
support::endian::write(*StrOffsetsStream, Entry.second, support::little);
}
// Will print error if we already processed this contribution, and now
// skipping it, but it was modified.
if (!RetVal.second && StrOffsetSectionWasModified)
errs() << "BOLT-WARNING: skipping string offsets section for CU at offset "
<< Twine::utohexstr(Unit.getOffset()) << ", but it was modified\n";
StrOffsetSectionWasModified = false;
IndexToAddressMap.clear();
}

View File

@ -317,7 +317,7 @@ void DWARFRewriter::updateDebugInfo() {
RangesBase = RangesSectionWriter->getSectionOffset() +
getDWARF5RngListLocListHeaderSize();
RangesSectionWriter->initSection(*Unit);
StrOffstsWriter->finalizeSection();
StrOffstsWriter->finalizeSection(*Unit);
}
DebugInfoPatcher->addUnitBaseOffsetLabel(Unit->getOffset());

View File

@ -0,0 +1,304 @@
# REQUIRES: system-linux
# RUN: llvm-mc --filetype=obj --triple x86_64 %s -o %tmain.o --defsym MAIN=0
# RUN: llvm-mc --filetype=obj --triple x86_64 %s -o %thelper.o
# RUN: %clang %cflags %tmain.o %thelper.o -o %tmain.exe
# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections
# RUN: llvm-dwarfdump --debug-info %tmain.exe.bolt > %tout.text
# RUN: llvm-dwarfdump --show-section-sizes %tmain.exe >> %tout.text
# RUN: llvm-dwarfdump --show-section-sizes %tmain.exe.bolt >> %tout.text
# RUN: cat %tout.text | FileCheck %s
# This test checks that with DWARF5 when two CUs share the same .debug_str_offsets
# entry BOLT does not create a duplicate.
# CHECK: DW_AT_str_offsets_base (0x[[#%.8x,ADDR:]]
# CHECK: DW_AT_str_offsets_base (0x[[#ADDR]]
# CHECK: .debug_str_offsets [[#ADDR2:]]
# CHECK: .debug_str_offsets [[#ADDR2]]
# main.cpp
# int main(){
# return 0;
# }
# helper.cpp
# void foo(){}
## Create two CUs, with dwo_ids 0 and 1 respectively.
.ifdef MAIN
.text
.file "main.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.Lfunc_begin0:
.file 0 "." "main.cpp" md5 0x32c197b0a8b855eb3d7573c993ada862
.loc 0 1 0 # main.cpp:1:0
.cfi_startproc
# %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movl $0, -4(%rbp)
.Ltmp0:
.loc 0 2 1 prologue_end # main.cpp:2:1
xorl %eax, %eax
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Ltmp1:
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 37 # DW_FORM_strx1
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 114 # DW_AT_str_offsets_base
.byte 23 # DW_FORM_sec_offset
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 37 # DW_FORM_strx1
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 115 # DW_AT_addr_base
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 0 # DW_CHILDREN_no
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 64 # DW_AT_frame_base
.byte 24 # DW_FORM_exprloc
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 3 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 1 # Abbrev [1] 0xc:0x2b DW_TAG_compile_unit
.byte 0 # DW_AT_producer
.short 33 # DW_AT_language
.byte 1 # DW_AT_name
.long .Lstr_offsets_base0 # DW_AT_str_offsets_base
.long .Lline_table_start0 # DW_AT_stmt_list
.byte 2 # DW_AT_comp_dir
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.long .Laddr_table_base0 # DW_AT_addr_base
.byte 2 # Abbrev [2] 0x23:0xf DW_TAG_subprogram
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 1 # DW_AT_frame_base
.byte 86
.byte 3 # DW_AT_name
.byte 0 # DW_AT_decl_file
.byte 1 # DW_AT_decl_line
.long 50 # DW_AT_type
# DW_AT_external
.byte 3 # Abbrev [3] 0x32:0x4 DW_TAG_base_type
.byte 4 # DW_AT_name
.byte 5 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_end0:
.section .debug_str_offsets,"",@progbits
.long 24 # Length of String Offsets Set
.short 5
.short 0
.Lstr_offsets_base0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "clang version 15.0.0" # string offset=0
.Linfo_string1:
.asciz "main.cpp" # string offset=146
.Linfo_string2:
.asciz "." # string offset=155
.Linfo_string3:
.asciz "main" # string offset=198
.Linfo_string4:
.asciz "int" # string offset=203
.section .debug_str_offsets,"",@progbits
.long .Linfo_string0
.long .Linfo_string1
.long .Linfo_string2
.long .Linfo_string3
.long .Linfo_string4
.section .debug_addr,"",@progbits
.long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
.Ldebug_addr_start0:
.short 5 # DWARF version number
.byte 8 # Address size
.byte 0 # Segment selector size
.Laddr_table_base0:
.quad .Lfunc_begin0
.Ldebug_addr_end0:
.ident "clang version 15.0.0"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:
.else
.text
.file "helper.cpp"
.globl _Z3foov # -- Begin function _Z3foov
.p2align 4, 0x90
.type _Z3foov,@function
_Z3foov: # @_Z3foov
.Lfunc_begin0:
.file 0 "." "helper.cpp" md5 0x5f98e4807e4f8781c26a82faf819f8a7
.loc 0 1 0 # helper.cpp:1:0
.cfi_startproc
# %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
.Ltmp0:
.loc 0 1 12 prologue_end # helper.cpp:1:12
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Ltmp1:
.Lfunc_end0:
.size _Z3foov, .Lfunc_end0-_Z3foov
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 37 # DW_FORM_strx1
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 114 # DW_AT_str_offsets_base
.byte 23 # DW_FORM_sec_offset
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 37 # DW_FORM_strx1
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 115 # DW_AT_addr_base
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 0 # DW_CHILDREN_no
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 64 # DW_AT_frame_base
.byte 24 # DW_FORM_exprloc
.byte 110 # DW_AT_linkage_name
.byte 37 # DW_FORM_strx1
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 1 # Abbrev [1] 0xc:0x24 DW_TAG_compile_unit
.byte 0 # DW_AT_producer
.short 33 # DW_AT_language
.byte 1 # DW_AT_name
.long 0x8 # DW_AT_str_offsets_base Manually modified to be the same as first CU
.long .Lline_table_start0 # DW_AT_stmt_list
.byte 2 # DW_AT_comp_dir
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.long .Laddr_table_base0 # DW_AT_addr_base
.byte 2 # Abbrev [2] 0x23:0xc DW_TAG_subprogram
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 1 # DW_AT_frame_base
.byte 86
.byte 3 # DW_AT_linkage_name
.byte 4 # DW_AT_name
.byte 0 # DW_AT_decl_file
.byte 1 # DW_AT_decl_line
# DW_AT_external
.byte 0 # End Of Children Mark
.Ldebug_info_end0:
# Manually removed .debug_str_offsets and .debug_str
.section .debug_addr,"",@progbits
.long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
.Ldebug_addr_start0:
.short 5 # DWARF version number
.byte 8 # Address size
.byte 0 # Segment selector size
.Laddr_table_base0:
.quad .Lfunc_begin0
.Ldebug_addr_end0:
.ident "clang version 15.0.0"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:
.endif