[BOLT][DWARF] Fix for .debug_line with DWARF5

There was a bug in a code that pre-populated line string for a case where parts
of .debug_line are not processed by BOLT, but copied as raw data. We were not
switching sections. This resulted in parts of the binary being over-written with
debug data.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D154544
This commit is contained in:
Alexander Yermolovich 2023-07-06 11:34:18 -07:00
parent ade8db573e
commit 66e943b1a9
7 changed files with 501 additions and 7 deletions

View File

@ -1655,25 +1655,27 @@ void DwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
// Helper function to parse .debug_line_str, and populate one we are using.
// For functions that we do not modify we output them as raw data.
// Re-constructing .debug_line_str so that offsets are correct for those
// debut line tables.
// debug line tables.
// Bonus is that when we output a final binary we can re-use .debug_line_str
// section. So we don't have to do the SHF_ALLOC trick we did with
// .debug_line.
static void parseAndPopulateDebugLineStr(BinarySection &LineStrSection,
MCDwarfLineStr &LineStr,
BinaryContext &BC,
MCStreamer &Streamer) {
BinaryContext &BC) {
DataExtractor StrData(LineStrSection.getContents(),
BC.DwCtx->isLittleEndian(), 0);
uint64_t Offset = 0;
while (StrData.isValidOffset(Offset)) {
const uint64_t StrOffset = Offset;
Error Err = Error::success();
const char *CStr = StrData.getCStr(&Offset, &Err);
if (Err) {
errs() << "BOLT-ERROR: could not extract string from .debug_line_str";
continue;
}
LineStr.emitRef(&Streamer, CStr);
const size_t NewOffset = LineStr.addString(CStr);
assert(StrOffset == NewOffset &&
"New offset in .debug_line_str doesn't match original offset");
}
}
@ -1693,11 +1695,12 @@ void DwarfLineTable::emit(BinaryContext &BC, MCStreamer &Streamer) {
std::optional<MCDwarfLineStr> LineStr;
ErrorOr<BinarySection &> LineStrSection =
BC.getUniqueSectionByName(".debug_line_str");
// Some versions of GCC output DWARF5 .debug_info, but DWARF4 or lower
// .debug_line
// .debug_line, so need to check if section exists.
if (LineStrSection) {
LineStr.emplace(*BC.Ctx);
parseAndPopulateDebugLineStr(*LineStrSection, *LineStr, BC, Streamer);
parseAndPopulateDebugLineStr(*LineStrSection, *LineStr, BC);
}
// Switch to the section where the table will be emitted into.

View File

@ -0,0 +1,129 @@
# int Foo = 0;
.text
.file "helperVariable.cpp"
.file 0 "/test" "helperVariable.cpp" md5 0xb6e6130198b21a44b5db4247fccb359d
.type Foo,@object # @Foo
.bss
.globl Foo
.p2align 2, 0x0
Foo:
.long 0 # 0x0
.size Foo, 4
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 37 # DW_FORM_strx1
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 114 # DW_AT_str_offsets_base
.byte 23 # DW_FORM_sec_offset
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 37 # DW_FORM_strx1
.byte 115 # DW_AT_addr_base
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 52 # DW_TAG_variable
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 2 # DW_AT_location
.byte 24 # DW_FORM_exprloc
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 3 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 1 # Abbrev [1] 0xc:0x22 DW_TAG_compile_unit
.byte 0 # DW_AT_producer
.short 33 # DW_AT_language
.byte 1 # DW_AT_name
.long .Lstr_offsets_base0 # DW_AT_str_offsets_base
.long .Lline_table_start0 # DW_AT_stmt_list
.byte 2 # DW_AT_comp_dir
.long .Laddr_table_base0 # DW_AT_addr_base
.byte 2 # Abbrev [2] 0x1e:0xb DW_TAG_variable
.byte 3 # DW_AT_name
.long 41 # DW_AT_type
# DW_AT_external
.byte 0 # DW_AT_decl_file
.byte 1 # DW_AT_decl_line
.byte 2 # DW_AT_location
.byte 161
.byte 0
.byte 3 # Abbrev [3] 0x29:0x4 DW_TAG_base_type
.byte 4 # DW_AT_name
.byte 5 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_end0:
.section .debug_str_offsets,"",@progbits
.long 24 # Length of String Offsets Set
.short 5
.short 0
.Lstr_offsets_base0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0
.Linfo_string1:
.asciz "helperVariable.cpp" # string offset=105
.Linfo_string2:
.asciz "/test" # string offset=124
.Linfo_string3:
.asciz "Foo" # string offset=162
.Linfo_string4:
.asciz "int" # string offset=166
.section .debug_str_offsets,"",@progbits
.long .Linfo_string0
.long .Linfo_string1
.long .Linfo_string2
.long .Linfo_string3
.long .Linfo_string4
.section .debug_addr,"",@progbits
.long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
.Ldebug_addr_start0:
.short 5 # DWARF version number
.byte 8 # Address size
.byte 0 # Segment selector size
.Laddr_table_base0:
.quad Foo
.Ldebug_addr_end0:
.ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:

View File

@ -0,0 +1,165 @@
# int foo() {
# return 0;
# }
.text
.file "helper.cpp"
.globl _Z3foov # -- Begin function _Z3foov
.p2align 4, 0x90
.type _Z3foov,@function
_Z3foov: # @_Z3foov
.Lfunc_begin0:
.file 0 "/test" "helper.cpp" md5 0x95a4a33e7fe9423970d3c798fdf8ed43
.loc 0 1 0 # helper.cpp:1:0
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
.Ltmp0:
.loc 0 2 3 prologue_end # helper.cpp:2:3
xorl %eax, %eax
.loc 0 2 3 epilogue_begin is_stmt 0 # helper.cpp:2:3
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Ltmp1:
.Lfunc_end0:
.size _Z3foov, .Lfunc_end0-_Z3foov
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 37 # DW_FORM_strx1
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 114 # DW_AT_str_offsets_base
.byte 23 # DW_FORM_sec_offset
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 37 # DW_FORM_strx1
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 115 # DW_AT_addr_base
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 0 # DW_CHILDREN_no
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 64 # DW_AT_frame_base
.byte 24 # DW_FORM_exprloc
.byte 110 # DW_AT_linkage_name
.byte 37 # DW_FORM_strx1
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 3 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 1 # Abbrev [1] 0xc:0x2c DW_TAG_compile_unit
.byte 0 # DW_AT_producer
.short 33 # DW_AT_language
.byte 1 # DW_AT_name
.long .Lstr_offsets_base0 # DW_AT_str_offsets_base
.long .Lline_table_start0 # DW_AT_stmt_list
.byte 2 # DW_AT_comp_dir
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.long .Laddr_table_base0 # DW_AT_addr_base
.byte 2 # Abbrev [2] 0x23:0x10 DW_TAG_subprogram
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 1 # DW_AT_frame_base
.byte 86
.byte 3 # DW_AT_linkage_name
.byte 4 # DW_AT_name
.byte 0 # DW_AT_decl_file
.byte 1 # DW_AT_decl_line
.long 51 # DW_AT_type
# DW_AT_external
.byte 3 # Abbrev [3] 0x33:0x4 DW_TAG_base_type
.byte 5 # DW_AT_name
.byte 5 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_end0:
.section .debug_str_offsets,"",@progbits
.long 28 # Length of String Offsets Set
.short 5
.short 0
.Lstr_offsets_base0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0
.Linfo_string1:
.asciz "helper.cpp" # string offset=105
.Linfo_string2:
.asciz "/test" # string offset=116
.Linfo_string3:
.asciz "_Z3foov" # string offset=154
.Linfo_string4:
.asciz "foo" # string offset=162
.Linfo_string5:
.asciz "int" # string offset=166
.section .debug_str_offsets,"",@progbits
.long .Linfo_string0
.long .Linfo_string1
.long .Linfo_string2
.long .Linfo_string3
.long .Linfo_string4
.long .Linfo_string5
.section .debug_addr,"",@progbits
.long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
.Ldebug_addr_start0:
.short 5 # DWARF version number
.byte 8 # Address size
.byte 0 # Segment selector size
.Laddr_table_base0:
.quad .Lfunc_begin0
.Ldebug_addr_end0:
.ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:

View File

@ -0,0 +1,160 @@
# int main() {
# return 0;
# }
.text
.file "main.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.Lfunc_begin0:
.file 0 "/test" "main.cpp" md5 0x7228a872dc174332f3151a7ac3344b26
.loc 0 1 0 # main.cpp:1:0
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movl $0, -4(%rbp)
.Ltmp0:
.loc 0 2 2 prologue_end # main.cpp:2:2
xorl %eax, %eax
.loc 0 2 2 epilogue_begin is_stmt 0 # main.cpp:2:2
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Ltmp1:
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 37 # DW_FORM_strx1
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 114 # DW_AT_str_offsets_base
.byte 23 # DW_FORM_sec_offset
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 37 # DW_FORM_strx1
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 115 # DW_AT_addr_base
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 0 # DW_CHILDREN_no
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 64 # DW_AT_frame_base
.byte 24 # DW_FORM_exprloc
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 3 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 1 # Abbrev [1] 0xc:0x2b DW_TAG_compile_unit
.byte 0 # DW_AT_producer
.short 33 # DW_AT_language
.byte 1 # DW_AT_name
.long .Lstr_offsets_base0 # DW_AT_str_offsets_base
.long .Lline_table_start0 # DW_AT_stmt_list
.byte 2 # DW_AT_comp_dir
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.long .Laddr_table_base0 # DW_AT_addr_base
.byte 2 # Abbrev [2] 0x23:0xf DW_TAG_subprogram
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 1 # DW_AT_frame_base
.byte 86
.byte 3 # DW_AT_name
.byte 0 # DW_AT_decl_file
.byte 1 # DW_AT_decl_line
.long 50 # DW_AT_type
# DW_AT_external
.byte 3 # Abbrev [3] 0x32:0x4 DW_TAG_base_type
.byte 4 # DW_AT_name
.byte 5 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_end0:
.section .debug_str_offsets,"",@progbits
.long 24 # Length of String Offsets Set
.short 5
.short 0
.Lstr_offsets_base0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0
.Linfo_string1:
.asciz "main.cpp" # string offset=105
.Linfo_string2:
.asciz "/test" # string offset=114
.Linfo_string3:
.asciz "main" # string offset=152
.Linfo_string4:
.asciz "int" # string offset=157
.section .debug_str_offsets,"",@progbits
.long .Linfo_string0
.long .Linfo_string1
.long .Linfo_string2
.long .Linfo_string3
.long .Linfo_string4
.section .debug_addr,"",@progbits
.long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
.Ldebug_addr_start0:
.short 5 # DWARF version number
.byte 8 # Address size
.byte 0 # Segment selector size
.Laddr_table_base0:
.quad .Lfunc_begin0
.Ldebug_addr_end0:
.ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:

View File

@ -0,0 +1,29 @@
# REQUIRES: system-linux
# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-debug-line-not-modified-main.s -o %t1.o
# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-debug-line-not-modified-helper-variable.s -o %t2.o
# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-debug-line-not-modified-helper.s -o %t3.o
# RUN: %clang %cflags -dwarf-5 %t1.o %t2.o %t3.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
# RUN: llvm-dwarfdump --show-form --verbose --debug-line %t.bolt | FileCheck --check-prefix=POSTCHECK %s
# This test checks that BOLT generates correct debug_line_str when one of CU contributions is not modified.
# POSTCHECK: version: 5
# POSTCHECK: include_directories[ 0] = .debug_line_str[{{.*}}] = "/test"
# POSTCHECK: file_names[ 0]:
# POSTCHECK: name: .debug_line_str[{{.*}}] = "main.cpp"
# POSTCHECK: dir_index: 0
# POSTCHECK: md5_checksum: 7228a872dc174332f3151a7ac3344b26
# POSTCHECK: version: 5
# POSTCHECK: include_directories[ 0] = .debug_line_str[{{.*}}] = "/test"
# POSTCHECK: file_names[ 0]:
# POSTCHECK: name: .debug_line_str[{{.*}}] = "helperVariable.cpp"
# POSTCHECK: dir_index: 0
# POSTCHECK: md5_checksum: b6e6130198b21a44b5db4247fccb359d
# POSTCHECK: version: 5
# POSTCHECK: include_directories[ 0] = .debug_line_str[{{.*}}] = "/test"
# POSTCHECK: file_names[ 0]:
# POSTCHECK: name: .debug_line_str[{{.*}}] = "helper.cpp"
# POSTCHECK: dir_index: 0
# POSTCHECK: md5_checksum: 95a4a33e7fe9423970d3c798fdf8ed43

View File

@ -70,6 +70,10 @@ public:
/// Returns finalized section.
SmallString<0> getFinalizedData();
/// Adds path \p Path to the line string. Returns offset in the
/// .debug_line_str section.
size_t addString(StringRef Path);
};
/// Instances of this class represent the name of the dwarf .file directive and

View File

@ -350,10 +350,14 @@ SmallString<0> MCDwarfLineStr::getFinalizedData() {
return Data;
}
size_t MCDwarfLineStr::addString(StringRef Path) {
return LineStrings.add(Path);
}
void MCDwarfLineStr::emitRef(MCStreamer *MCOS, StringRef Path) {
int RefSize =
dwarf::getDwarfOffsetByteSize(MCOS->getContext().getDwarfFormat());
size_t Offset = LineStrings.add(Path);
size_t Offset = addString(Path);
if (UseRelocs) {
MCContext &Ctx = MCOS->getContext();
MCOS->emitValue(makeStartPlusIntExpr(Ctx, *LineStrLabel, Offset), RefSize);