mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-06 04:35:04 +00:00
1007 lines
34 KiB
C++
1007 lines
34 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
|
|
|
// Copyright (c) 2006, 2011, 2012 Google Inc.
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
|
|
|
|
// (derived from)
|
|
// dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
|
|
// Find all the debugging info in a file and dump it as a Breakpad symbol file.
|
|
//
|
|
// dump_symbols.h: Read debugging information from an ELF file, and write
|
|
// it out as a Breakpad symbol file.
|
|
|
|
// This file is derived from the following files in
|
|
// toolkit/crashreporter/google-breakpad:
|
|
// src/common/linux/dump_symbols.cc
|
|
// src/common/linux/elfutils.cc
|
|
// src/common/linux/file_id.cc
|
|
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <arpa/inet.h>
|
|
|
|
#include <set>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "mozilla/Assertions.h"
|
|
|
|
#include "LulPlatformMacros.h"
|
|
#include "LulCommonExt.h"
|
|
#include "LulDwarfExt.h"
|
|
#if defined(LUL_PLAT_arm_android)
|
|
# include "LulExidxExt.h"
|
|
#endif
|
|
#include "LulElfInt.h"
|
|
#include "LulMainInt.h"
|
|
|
|
|
|
#if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
|
|
// bionic and older glibsc don't define it
|
|
# define SHT_ARM_EXIDX (SHT_LOPROC + 1)
|
|
#endif
|
|
|
|
|
|
// This namespace contains helper functions.
|
|
namespace {
|
|
|
|
using lul::DwarfCFIToModule;
|
|
using lul::FindElfSectionByName;
|
|
using lul::GetOffset;
|
|
using lul::IsValidElf;
|
|
using lul::Module;
|
|
using lul::UniqueString;
|
|
using lul::scoped_ptr;
|
|
using lul::Summariser;
|
|
using std::string;
|
|
using std::vector;
|
|
using std::set;
|
|
|
|
//
|
|
// FDWrapper
|
|
//
|
|
// Wrapper class to make sure opened file is closed.
|
|
//
|
|
class FDWrapper {
|
|
public:
|
|
explicit FDWrapper(int fd) :
|
|
fd_(fd) {}
|
|
~FDWrapper() {
|
|
if (fd_ != -1)
|
|
close(fd_);
|
|
}
|
|
int get() {
|
|
return fd_;
|
|
}
|
|
int release() {
|
|
int fd = fd_;
|
|
fd_ = -1;
|
|
return fd;
|
|
}
|
|
private:
|
|
int fd_;
|
|
};
|
|
|
|
//
|
|
// MmapWrapper
|
|
//
|
|
// Wrapper class to make sure mapped regions are unmapped.
|
|
//
|
|
class MmapWrapper {
|
|
public:
|
|
MmapWrapper() : is_set_(false) {}
|
|
~MmapWrapper() {
|
|
if (is_set_ && base_ != NULL) {
|
|
MOZ_ASSERT(size_ > 0);
|
|
munmap(base_, size_);
|
|
}
|
|
}
|
|
void set(void *mapped_address, size_t mapped_size) {
|
|
is_set_ = true;
|
|
base_ = mapped_address;
|
|
size_ = mapped_size;
|
|
}
|
|
void release() {
|
|
MOZ_ASSERT(is_set_);
|
|
is_set_ = false;
|
|
base_ = NULL;
|
|
size_ = 0;
|
|
}
|
|
|
|
private:
|
|
bool is_set_;
|
|
void *base_;
|
|
size_t size_;
|
|
};
|
|
|
|
|
|
// Set NUM_DW_REGNAMES to be the number of Dwarf register names
|
|
// appropriate to the machine architecture given in HEADER. Return
|
|
// true on success, or false if HEADER's machine architecture is not
|
|
// supported.
|
|
template<typename ElfClass>
|
|
bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
|
|
unsigned int* num_dw_regnames) {
|
|
switch (elf_header->e_machine) {
|
|
case EM_386:
|
|
*num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
|
|
return true;
|
|
case EM_ARM:
|
|
*num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
|
|
return true;
|
|
case EM_X86_64:
|
|
*num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
|
|
return true;
|
|
default:
|
|
MOZ_ASSERT(0);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
template<typename ElfClass>
|
|
bool LoadDwarfCFI(const string& dwarf_filename,
|
|
const typename ElfClass::Ehdr* elf_header,
|
|
const char* section_name,
|
|
const typename ElfClass::Shdr* section,
|
|
const bool eh_frame,
|
|
const typename ElfClass::Shdr* got_section,
|
|
const typename ElfClass::Shdr* text_section,
|
|
const bool big_endian,
|
|
SecMap* smap,
|
|
uintptr_t text_bias,
|
|
void (*log)(const char*)) {
|
|
// Find the appropriate set of register names for this file's
|
|
// architecture.
|
|
unsigned int num_dw_regs = 0;
|
|
if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
|
|
fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
|
|
" cannot convert DWARF call frame information\n",
|
|
dwarf_filename.c_str(), elf_header->e_machine);
|
|
return false;
|
|
}
|
|
|
|
const lul::Endianness endianness
|
|
= big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
|
|
|
|
// Find the call frame information and its size.
|
|
const char* cfi =
|
|
GetOffset<ElfClass, char>(elf_header, section->sh_offset);
|
|
size_t cfi_size = section->sh_size;
|
|
|
|
// Plug together the parser, handler, and their entourages.
|
|
|
|
// Here's a summariser, which will receive the output of the
|
|
// parser, create summaries, and add them to |smap|.
|
|
Summariser* summ = new Summariser(smap, text_bias, log);
|
|
|
|
DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
|
|
DwarfCFIToModule handler(num_dw_regs, &module_reporter, summ);
|
|
lul::ByteReader byte_reader(endianness);
|
|
|
|
byte_reader.SetAddressSize(ElfClass::kAddrSize);
|
|
|
|
// Provide the base addresses for .eh_frame encoded pointers, if
|
|
// possible.
|
|
byte_reader.SetCFIDataBase(section->sh_addr, cfi);
|
|
if (got_section)
|
|
byte_reader.SetDataBase(got_section->sh_addr);
|
|
if (text_section)
|
|
byte_reader.SetTextBase(text_section->sh_addr);
|
|
|
|
lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
|
|
section_name);
|
|
lul::CallFrameInfo parser(cfi, cfi_size,
|
|
&byte_reader, &handler, &dwarf_reporter,
|
|
eh_frame);
|
|
parser.Start();
|
|
|
|
delete summ;
|
|
return true;
|
|
}
|
|
|
|
#if defined(LUL_PLAT_arm_android)
|
|
template<typename ElfClass>
|
|
bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header,
|
|
const typename ElfClass::Shdr* exidx_section,
|
|
const typename ElfClass::Shdr* extab_section,
|
|
uint32_t loading_addr,
|
|
uintptr_t text_bias,
|
|
SecMap* smap,
|
|
void (*log)(const char*)) {
|
|
// To do this properly we need to know:
|
|
// * the bounds of the .ARM.exidx section in the mapped image
|
|
// * the bounds of the .ARM.extab section in the mapped image
|
|
// * the vma of the last byte in the text section associated with the .exidx
|
|
// The first two are easy. The third is a bit tricky. If we can't
|
|
// figure out what it is, just pass in zero.
|
|
// Note that we are reading EXIDX directly out of the mapped in
|
|
// executable image. Unlike with the CFI reader, there is no
|
|
// auxiliary, temporary mapping used to read the unwind data.
|
|
//
|
|
// An .exidx section is always required, but the .extab section
|
|
// can be optionally omitted, provided that .exidx does not refer
|
|
// to it. If the .exidx is erroneous and does refer to .extab even
|
|
// though .extab is missing, the range checks done by GET_EX_U32 in
|
|
// ExceptionTableInfo::ExtabEntryExtract should prevent any invalid
|
|
// memory accesses, and cause the .extab to be rejected as invalid.
|
|
const char *exidx_img
|
|
= GetOffset<ElfClass, char>(elf_header, exidx_section->sh_offset);
|
|
size_t exidx_size = exidx_section->sh_size;
|
|
const char *extab_img
|
|
= extab_section
|
|
? GetOffset<ElfClass, char>(elf_header, extab_section->sh_offset)
|
|
: nullptr;
|
|
size_t extab_size = extab_section ? extab_section->sh_size : 0;
|
|
|
|
// The sh_link field of the exidx section gives the section number
|
|
// for the associated text section.
|
|
uint32_t exidx_text_last_svma = 0;
|
|
int exidx_text_sno = exidx_section->sh_link;
|
|
typedef typename ElfClass::Shdr Shdr;
|
|
// |sections| points to the section header table
|
|
const Shdr* sections
|
|
= GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
|
|
const int num_sections = elf_header->e_shnum;
|
|
if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) {
|
|
const Shdr* exidx_text_shdr = §ions[exidx_text_sno];
|
|
if (exidx_text_shdr->sh_size > 0) {
|
|
exidx_text_last_svma
|
|
= exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1;
|
|
}
|
|
}
|
|
|
|
lul::ARMExToModule handler(smap, log);
|
|
lul::ExceptionTableInfo
|
|
parser(exidx_img, exidx_size, extab_img, extab_size, exidx_text_last_svma,
|
|
&handler,
|
|
reinterpret_cast<const char*>(elf_header),
|
|
loading_addr, text_bias, log);
|
|
parser.Start();
|
|
return true;
|
|
}
|
|
#endif /* defined(LUL_PLAT_arm_android) */
|
|
|
|
bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
|
|
void** elf_header) {
|
|
int obj_fd = open(obj_file.c_str(), O_RDONLY);
|
|
if (obj_fd < 0) {
|
|
fprintf(stderr, "Failed to open ELF file '%s': %s\n",
|
|
obj_file.c_str(), strerror(errno));
|
|
return false;
|
|
}
|
|
FDWrapper obj_fd_wrapper(obj_fd);
|
|
struct stat st;
|
|
if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
|
|
fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
|
|
obj_file.c_str(), strerror(errno));
|
|
return false;
|
|
}
|
|
// Mapping it read-only is good enough. In any case, mapping it
|
|
// read-write confuses Valgrind's debuginfo acquire/discard
|
|
// heuristics, making it hard to profile the profiler.
|
|
void *obj_base = mmap(nullptr, st.st_size,
|
|
PROT_READ, MAP_PRIVATE, obj_fd, 0);
|
|
if (obj_base == MAP_FAILED) {
|
|
fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
|
|
obj_file.c_str(), strerror(errno));
|
|
return false;
|
|
}
|
|
map_wrapper->set(obj_base, st.st_size);
|
|
*elf_header = obj_base;
|
|
if (!IsValidElf(*elf_header)) {
|
|
fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Get the endianness of ELF_HEADER. If it's invalid, return false.
|
|
template<typename ElfClass>
|
|
bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
|
|
bool* big_endian) {
|
|
if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
|
|
*big_endian = false;
|
|
return true;
|
|
}
|
|
if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
|
|
*big_endian = true;
|
|
return true;
|
|
}
|
|
|
|
fprintf(stderr, "bad data encoding in ELF header: %d\n",
|
|
elf_header->e_ident[EI_DATA]);
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// LoadSymbolsInfo
|
|
//
|
|
// Holds the state between the two calls to LoadSymbols() in case it's necessary
|
|
// to follow the .gnu_debuglink section and load debug information from a
|
|
// different file.
|
|
//
|
|
template<typename ElfClass>
|
|
class LoadSymbolsInfo {
|
|
public:
|
|
typedef typename ElfClass::Addr Addr;
|
|
|
|
explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) :
|
|
debug_dirs_(dbg_dirs),
|
|
has_loading_addr_(false) {}
|
|
|
|
// Keeps track of which sections have been loaded so sections don't
|
|
// accidentally get loaded twice from two different files.
|
|
void LoadedSection(const string §ion) {
|
|
if (loaded_sections_.count(section) == 0) {
|
|
loaded_sections_.insert(section);
|
|
} else {
|
|
fprintf(stderr, "Section %s has already been loaded.\n",
|
|
section.c_str());
|
|
}
|
|
}
|
|
|
|
string debuglink_file() const {
|
|
return debuglink_file_;
|
|
}
|
|
|
|
private:
|
|
const vector<string>& debug_dirs_; // Directories in which to
|
|
// search for the debug ELF file.
|
|
|
|
string debuglink_file_; // Full path to the debug ELF file.
|
|
|
|
bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
|
|
|
|
set<string> loaded_sections_; // Tracks the Loaded ELF sections
|
|
// between calls to LoadSymbols().
|
|
};
|
|
|
|
// Find the preferred loading address of the binary.
|
|
template<typename ElfClass>
|
|
typename ElfClass::Addr GetLoadingAddress(
|
|
const typename ElfClass::Phdr* program_headers,
|
|
int nheader) {
|
|
typedef typename ElfClass::Phdr Phdr;
|
|
|
|
// For non-PIC executables (e_type == ET_EXEC), the load address is
|
|
// the start address of the first PT_LOAD segment. (ELF requires
|
|
// the segments to be sorted by load address.) For PIC executables
|
|
// and dynamic libraries (e_type == ET_DYN), this address will
|
|
// normally be zero.
|
|
for (int i = 0; i < nheader; ++i) {
|
|
const Phdr& header = program_headers[i];
|
|
if (header.p_type == PT_LOAD)
|
|
return header.p_vaddr;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
template<typename ElfClass>
|
|
bool LoadSymbols(const string& obj_file,
|
|
const bool big_endian,
|
|
const typename ElfClass::Ehdr* elf_header,
|
|
const bool read_gnu_debug_link,
|
|
LoadSymbolsInfo<ElfClass>* info,
|
|
SecMap* smap,
|
|
void* rx_avma,
|
|
void (*log)(const char*)) {
|
|
typedef typename ElfClass::Phdr Phdr;
|
|
typedef typename ElfClass::Shdr Shdr;
|
|
|
|
char buf[500];
|
|
snprintf(buf, sizeof(buf), "LoadSymbols: BEGIN %s\n", obj_file.c_str());
|
|
buf[sizeof(buf)-1] = 0;
|
|
log(buf);
|
|
|
|
// This is how the text bias is calculated.
|
|
// BEGIN CALCULATE BIAS
|
|
uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
|
|
GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
|
|
elf_header->e_phnum);
|
|
uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
|
|
snprintf(buf, sizeof(buf),
|
|
"LoadSymbols: rx_avma=%llx, text_bias=%llx",
|
|
(unsigned long long int)(uintptr_t)rx_avma,
|
|
(unsigned long long int)text_bias);
|
|
buf[sizeof(buf)-1] = 0;
|
|
log(buf);
|
|
// END CALCULATE BIAS
|
|
|
|
const Shdr* sections =
|
|
GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
|
|
const Shdr* section_names = sections + elf_header->e_shstrndx;
|
|
const char* names =
|
|
GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
|
|
const char *names_end = names + section_names->sh_size;
|
|
bool found_usable_info = false;
|
|
|
|
// Dwarf Call Frame Information (CFI) is actually independent from
|
|
// the other DWARF debugging information, and can be used alone.
|
|
const Shdr* dwarf_cfi_section =
|
|
FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
if (dwarf_cfi_section) {
|
|
// Ignore the return value of this function; even without call frame
|
|
// information, the other debugging information could be perfectly
|
|
// useful.
|
|
info->LoadedSection(".debug_frame");
|
|
bool result =
|
|
LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
|
|
dwarf_cfi_section, false, 0, 0, big_endian,
|
|
smap, text_bias, log);
|
|
found_usable_info = found_usable_info || result;
|
|
if (result)
|
|
log("LoadSymbols: read CFI from .debug_frame");
|
|
}
|
|
|
|
// Linux C++ exception handling information can also provide
|
|
// unwinding data.
|
|
const Shdr* eh_frame_section =
|
|
FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
if (eh_frame_section) {
|
|
// Pointers in .eh_frame data may be relative to the base addresses of
|
|
// certain sections. Provide those sections if present.
|
|
const Shdr* got_section =
|
|
FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
const Shdr* text_section =
|
|
FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
info->LoadedSection(".eh_frame");
|
|
// As above, ignore the return value of this function.
|
|
bool result =
|
|
LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
|
|
eh_frame_section, true,
|
|
got_section, text_section, big_endian,
|
|
smap, text_bias, log);
|
|
found_usable_info = found_usable_info || result;
|
|
if (result)
|
|
log("LoadSymbols: read CFI from .eh_frame");
|
|
}
|
|
|
|
# if defined(LUL_PLAT_arm_android)
|
|
// ARM has special unwind tables that can be used. .exidx is
|
|
// always required, and .extab is normally required, but may
|
|
// be omitted if it is empty. See comments on LoadARMexidx()
|
|
// for more details.
|
|
const Shdr* arm_exidx_section =
|
|
FindElfSectionByName<ElfClass>(".ARM.exidx", SHT_ARM_EXIDX,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
const Shdr* arm_extab_section =
|
|
FindElfSectionByName<ElfClass>(".ARM.extab", SHT_PROGBITS,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
const Shdr* debug_info_section =
|
|
FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
// Only load information from this section if there isn't a .debug_info
|
|
// section.
|
|
if (!debug_info_section && arm_exidx_section) {
|
|
info->LoadedSection(".ARM.exidx");
|
|
if (arm_extab_section)
|
|
info->LoadedSection(".ARM.extab");
|
|
bool result = LoadARMexidx<ElfClass>(elf_header,
|
|
arm_exidx_section, arm_extab_section,
|
|
loading_addr, text_bias, smap, log);
|
|
found_usable_info = found_usable_info || result;
|
|
if (result)
|
|
log("LoadSymbols: read EXIDX from .ARM.{exidx,extab}");
|
|
}
|
|
# endif /* defined(LUL_PLAT_arm_android) */
|
|
|
|
snprintf(buf, sizeof(buf), "LoadSymbols: END %s\n", obj_file.c_str());
|
|
buf[sizeof(buf)-1] = 0;
|
|
log(buf);
|
|
|
|
return found_usable_info;
|
|
}
|
|
|
|
// Return the breakpad symbol file identifier for the architecture of
|
|
// ELF_HEADER.
|
|
template<typename ElfClass>
|
|
const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
|
|
typedef typename ElfClass::Half Half;
|
|
Half arch = elf_header->e_machine;
|
|
switch (arch) {
|
|
case EM_386: return "x86";
|
|
case EM_ARM: return "arm";
|
|
case EM_MIPS: return "mips";
|
|
case EM_PPC64: return "ppc64";
|
|
case EM_PPC: return "ppc";
|
|
case EM_S390: return "s390";
|
|
case EM_SPARC: return "sparc";
|
|
case EM_SPARCV9: return "sparcv9";
|
|
case EM_X86_64: return "x86_64";
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
// Format the Elf file identifier in IDENTIFIER as a UUID with the
|
|
// dashes removed.
|
|
string FormatIdentifier(unsigned char identifier[16]) {
|
|
char identifier_str[40];
|
|
lul::FileID::ConvertIdentifierToString(
|
|
identifier,
|
|
identifier_str,
|
|
sizeof(identifier_str));
|
|
string id_no_dash;
|
|
for (int i = 0; identifier_str[i] != '\0'; ++i)
|
|
if (identifier_str[i] != '-')
|
|
id_no_dash += identifier_str[i];
|
|
// Add an extra "0" by the end. PDB files on Windows have an 'age'
|
|
// number appended to the end of the file identifier; this isn't
|
|
// really used or necessary on other platforms, but be consistent.
|
|
id_no_dash += '0';
|
|
return id_no_dash;
|
|
}
|
|
|
|
// Return the non-directory portion of FILENAME: the portion after the
|
|
// last slash, or the whole filename if there are no slashes.
|
|
string BaseFileName(const string &filename) {
|
|
// Lots of copies! basename's behavior is less than ideal.
|
|
char *c_filename = strdup(filename.c_str());
|
|
string base = basename(c_filename);
|
|
free(c_filename);
|
|
return base;
|
|
}
|
|
|
|
template<typename ElfClass>
|
|
bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
|
|
const string& obj_filename,
|
|
const vector<string>& debug_dirs,
|
|
SecMap* smap, void* rx_avma,
|
|
void (*log)(const char*)) {
|
|
typedef typename ElfClass::Ehdr Ehdr;
|
|
|
|
unsigned char identifier[16];
|
|
if (!lul
|
|
::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
|
|
fprintf(stderr, "%s: unable to generate file identifier\n",
|
|
obj_filename.c_str());
|
|
return false;
|
|
}
|
|
|
|
const char *architecture = ElfArchitecture<ElfClass>(elf_header);
|
|
if (!architecture) {
|
|
fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
|
|
obj_filename.c_str(), elf_header->e_machine);
|
|
return false;
|
|
}
|
|
|
|
// Figure out what endianness this file is.
|
|
bool big_endian;
|
|
if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
|
|
return false;
|
|
|
|
string name = BaseFileName(obj_filename);
|
|
string os = "Linux";
|
|
string id = FormatIdentifier(identifier);
|
|
|
|
LoadSymbolsInfo<ElfClass> info(debug_dirs);
|
|
if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
|
|
!debug_dirs.empty(), &info,
|
|
smap, rx_avma, log)) {
|
|
const string debuglink_file = info.debuglink_file();
|
|
if (debuglink_file.empty())
|
|
return false;
|
|
|
|
// Load debuglink ELF file.
|
|
fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
|
|
MmapWrapper debug_map_wrapper;
|
|
Ehdr* debug_elf_header = NULL;
|
|
if (!LoadELF(debuglink_file, &debug_map_wrapper,
|
|
reinterpret_cast<void**>(&debug_elf_header)))
|
|
return false;
|
|
// Sanity checks to make sure everything matches up.
|
|
const char *debug_architecture =
|
|
ElfArchitecture<ElfClass>(debug_elf_header);
|
|
if (!debug_architecture) {
|
|
fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
|
|
debuglink_file.c_str(), debug_elf_header->e_machine);
|
|
return false;
|
|
}
|
|
if (strcmp(architecture, debug_architecture)) {
|
|
fprintf(stderr, "%s with ELF machine architecture %s does not match "
|
|
"%s with ELF architecture %s\n",
|
|
debuglink_file.c_str(), debug_architecture,
|
|
obj_filename.c_str(), architecture);
|
|
return false;
|
|
}
|
|
|
|
bool debug_big_endian;
|
|
if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
|
|
return false;
|
|
if (debug_big_endian != big_endian) {
|
|
fprintf(stderr, "%s and %s does not match in endianness\n",
|
|
obj_filename.c_str(), debuglink_file.c_str());
|
|
return false;
|
|
}
|
|
|
|
if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
|
|
debug_elf_header, false, &info,
|
|
smap, rx_avma, log)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace (anon)
|
|
|
|
|
|
namespace lul {
|
|
|
|
bool ReadSymbolDataInternal(const uint8_t* obj_file,
|
|
const string& obj_filename,
|
|
const vector<string>& debug_dirs,
|
|
SecMap* smap, void* rx_avma,
|
|
void (*log)(const char*)) {
|
|
|
|
if (!IsValidElf(obj_file)) {
|
|
fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
|
|
return false;
|
|
}
|
|
|
|
int elfclass = ElfClass(obj_file);
|
|
if (elfclass == ELFCLASS32) {
|
|
return ReadSymbolDataElfClass<ElfClass32>(
|
|
reinterpret_cast<const Elf32_Ehdr*>(obj_file),
|
|
obj_filename, debug_dirs, smap, rx_avma, log);
|
|
}
|
|
if (elfclass == ELFCLASS64) {
|
|
return ReadSymbolDataElfClass<ElfClass64>(
|
|
reinterpret_cast<const Elf64_Ehdr*>(obj_file),
|
|
obj_filename, debug_dirs, smap, rx_avma, log);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ReadSymbolData(const string& obj_file,
|
|
const vector<string>& debug_dirs,
|
|
SecMap* smap, void* rx_avma,
|
|
void (*log)(const char*)) {
|
|
MmapWrapper map_wrapper;
|
|
void* elf_header = NULL;
|
|
if (!LoadELF(obj_file, &map_wrapper, &elf_header))
|
|
return false;
|
|
|
|
return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
|
|
obj_file, debug_dirs, smap, rx_avma, log);
|
|
}
|
|
|
|
|
|
namespace {
|
|
|
|
template<typename ElfClass>
|
|
void FindElfClassSection(const char *elf_base,
|
|
const char *section_name,
|
|
typename ElfClass::Word section_type,
|
|
const void **section_start,
|
|
int *section_size) {
|
|
typedef typename ElfClass::Ehdr Ehdr;
|
|
typedef typename ElfClass::Shdr Shdr;
|
|
|
|
MOZ_ASSERT(elf_base);
|
|
MOZ_ASSERT(section_start);
|
|
MOZ_ASSERT(section_size);
|
|
|
|
MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
|
|
|
|
const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
|
|
MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
|
|
|
|
const Shdr* sections =
|
|
GetOffset<ElfClass,Shdr>(elf_header, elf_header->e_shoff);
|
|
const Shdr* section_names = sections + elf_header->e_shstrndx;
|
|
const char* names =
|
|
GetOffset<ElfClass,char>(elf_header, section_names->sh_offset);
|
|
const char *names_end = names + section_names->sh_size;
|
|
|
|
const Shdr* section =
|
|
FindElfSectionByName<ElfClass>(section_name, section_type,
|
|
sections, names, names_end,
|
|
elf_header->e_shnum);
|
|
|
|
if (section != NULL && section->sh_size > 0) {
|
|
*section_start = elf_base + section->sh_offset;
|
|
*section_size = section->sh_size;
|
|
}
|
|
}
|
|
|
|
template<typename ElfClass>
|
|
void FindElfClassSegment(const char *elf_base,
|
|
typename ElfClass::Word segment_type,
|
|
const void **segment_start,
|
|
int *segment_size) {
|
|
typedef typename ElfClass::Ehdr Ehdr;
|
|
typedef typename ElfClass::Phdr Phdr;
|
|
|
|
MOZ_ASSERT(elf_base);
|
|
MOZ_ASSERT(segment_start);
|
|
MOZ_ASSERT(segment_size);
|
|
|
|
MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
|
|
|
|
const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
|
|
MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
|
|
|
|
const Phdr* phdrs =
|
|
GetOffset<ElfClass,Phdr>(elf_header, elf_header->e_phoff);
|
|
|
|
for (int i = 0; i < elf_header->e_phnum; ++i) {
|
|
if (phdrs[i].p_type == segment_type) {
|
|
*segment_start = elf_base + phdrs[i].p_offset;
|
|
*segment_size = phdrs[i].p_filesz;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace (anon)
|
|
|
|
bool IsValidElf(const void* elf_base) {
|
|
return strncmp(reinterpret_cast<const char*>(elf_base),
|
|
ELFMAG, SELFMAG) == 0;
|
|
}
|
|
|
|
int ElfClass(const void* elf_base) {
|
|
const ElfW(Ehdr)* elf_header =
|
|
reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
|
|
|
|
return elf_header->e_ident[EI_CLASS];
|
|
}
|
|
|
|
bool FindElfSection(const void *elf_mapped_base,
|
|
const char *section_name,
|
|
uint32_t section_type,
|
|
const void **section_start,
|
|
int *section_size,
|
|
int *elfclass) {
|
|
MOZ_ASSERT(elf_mapped_base);
|
|
MOZ_ASSERT(section_start);
|
|
MOZ_ASSERT(section_size);
|
|
|
|
*section_start = NULL;
|
|
*section_size = 0;
|
|
|
|
if (!IsValidElf(elf_mapped_base))
|
|
return false;
|
|
|
|
int cls = ElfClass(elf_mapped_base);
|
|
if (elfclass) {
|
|
*elfclass = cls;
|
|
}
|
|
|
|
const char* elf_base =
|
|
static_cast<const char*>(elf_mapped_base);
|
|
|
|
if (cls == ELFCLASS32) {
|
|
FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
|
|
section_start, section_size);
|
|
return *section_start != NULL;
|
|
} else if (cls == ELFCLASS64) {
|
|
FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
|
|
section_start, section_size);
|
|
return *section_start != NULL;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool FindElfSegment(const void *elf_mapped_base,
|
|
uint32_t segment_type,
|
|
const void **segment_start,
|
|
int *segment_size,
|
|
int *elfclass) {
|
|
MOZ_ASSERT(elf_mapped_base);
|
|
MOZ_ASSERT(segment_start);
|
|
MOZ_ASSERT(segment_size);
|
|
|
|
*segment_start = NULL;
|
|
*segment_size = 0;
|
|
|
|
if (!IsValidElf(elf_mapped_base))
|
|
return false;
|
|
|
|
int cls = ElfClass(elf_mapped_base);
|
|
if (elfclass) {
|
|
*elfclass = cls;
|
|
}
|
|
|
|
const char* elf_base =
|
|
static_cast<const char*>(elf_mapped_base);
|
|
|
|
if (cls == ELFCLASS32) {
|
|
FindElfClassSegment<ElfClass32>(elf_base, segment_type,
|
|
segment_start, segment_size);
|
|
return *segment_start != NULL;
|
|
} else if (cls == ELFCLASS64) {
|
|
FindElfClassSegment<ElfClass64>(elf_base, segment_type,
|
|
segment_start, segment_size);
|
|
return *segment_start != NULL;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
// (derived from)
|
|
// file_id.cc: Return a unique identifier for a file
|
|
//
|
|
// See file_id.h for documentation
|
|
//
|
|
|
|
// ELF note name and desc are 32-bits word padded.
|
|
#define NOTE_PADDING(a) ((a + 3) & ~3)
|
|
|
|
// These functions are also used inside the crashed process, so be safe
|
|
// and use the syscall/libc wrappers instead of direct syscalls or libc.
|
|
|
|
template<typename ElfClass>
|
|
static bool ElfClassBuildIDNoteIdentifier(const void *section, int length,
|
|
uint8_t identifier[kMDGUIDSize]) {
|
|
typedef typename ElfClass::Nhdr Nhdr;
|
|
|
|
const void* section_end = reinterpret_cast<const char*>(section) + length;
|
|
const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
|
|
while (reinterpret_cast<const void *>(note_header) < section_end) {
|
|
if (note_header->n_type == NT_GNU_BUILD_ID)
|
|
break;
|
|
note_header = reinterpret_cast<const Nhdr*>(
|
|
reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
|
|
NOTE_PADDING(note_header->n_namesz) +
|
|
NOTE_PADDING(note_header->n_descsz));
|
|
}
|
|
if (reinterpret_cast<const void *>(note_header) >= section_end ||
|
|
note_header->n_descsz == 0) {
|
|
return false;
|
|
}
|
|
|
|
const char* build_id = reinterpret_cast<const char*>(note_header) +
|
|
sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
|
|
// Copy as many bits of the build ID as will fit
|
|
// into the GUID space.
|
|
memset(identifier, 0, kMDGUIDSize);
|
|
memcpy(identifier, build_id,
|
|
std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
|
|
|
|
return true;
|
|
}
|
|
|
|
// Attempt to locate a .note.gnu.build-id section in an ELF binary
|
|
// and copy as many bytes of it as will fit into |identifier|.
|
|
static bool FindElfBuildIDNote(const void *elf_mapped_base,
|
|
uint8_t identifier[kMDGUIDSize]) {
|
|
void* note_section;
|
|
int note_size, elfclass;
|
|
if ((!FindElfSegment(elf_mapped_base, PT_NOTE,
|
|
(const void**)¬e_section, ¬e_size, &elfclass) ||
|
|
note_size == 0) &&
|
|
(!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
|
|
(const void**)¬e_section, ¬e_size, &elfclass) ||
|
|
note_size == 0)) {
|
|
return false;
|
|
}
|
|
|
|
if (elfclass == ELFCLASS32) {
|
|
return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
|
|
identifier);
|
|
} else if (elfclass == ELFCLASS64) {
|
|
return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
|
|
identifier);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Attempt to locate the .text section of an ELF binary and generate
|
|
// a simple hash by XORing the first page worth of bytes into |identifier|.
|
|
static bool HashElfTextSection(const void *elf_mapped_base,
|
|
uint8_t identifier[kMDGUIDSize]) {
|
|
void* text_section;
|
|
int text_size;
|
|
if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
|
|
(const void**)&text_section, &text_size, NULL) ||
|
|
text_size == 0) {
|
|
return false;
|
|
}
|
|
|
|
memset(identifier, 0, kMDGUIDSize);
|
|
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
|
|
const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
|
|
while (ptr < ptr_end) {
|
|
for (unsigned i = 0; i < kMDGUIDSize; i++)
|
|
identifier[i] ^= ptr[i];
|
|
ptr += kMDGUIDSize;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// static
|
|
bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
|
|
uint8_t identifier[kMDGUIDSize]) {
|
|
// Look for a build id note first.
|
|
if (FindElfBuildIDNote(base, identifier))
|
|
return true;
|
|
|
|
// Fall back on hashing the first page of the text section.
|
|
return HashElfTextSection(base, identifier);
|
|
}
|
|
|
|
// static
|
|
void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
|
|
char* buffer, int buffer_length) {
|
|
uint8_t identifier_swapped[kMDGUIDSize];
|
|
|
|
// Endian-ness swap to match dump processor expectation.
|
|
memcpy(identifier_swapped, identifier, kMDGUIDSize);
|
|
uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
|
|
*data1 = htonl(*data1);
|
|
uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
|
|
*data2 = htons(*data2);
|
|
uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
|
|
*data3 = htons(*data3);
|
|
|
|
int buffer_idx = 0;
|
|
for (unsigned int idx = 0;
|
|
(buffer_idx < buffer_length) && (idx < kMDGUIDSize);
|
|
++idx) {
|
|
int hi = (identifier_swapped[idx] >> 4) & 0x0F;
|
|
int lo = (identifier_swapped[idx]) & 0x0F;
|
|
|
|
if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
|
|
buffer[buffer_idx++] = '-';
|
|
|
|
buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
|
|
buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
|
|
}
|
|
|
|
// NULL terminate
|
|
buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
|
|
}
|
|
|
|
} // namespace lul
|