/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ // Copyright (c) 2006, 2011, 2012 Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Restructured in 2009 by: Jim Blandy // (derived from) // dump_symbols.cc: implement google_breakpad::WriteSymbolFile: // Find all the debugging info in a file and dump it as a Breakpad symbol file. // // dump_symbols.h: Read debugging information from an ELF file, and write // it out as a Breakpad symbol file. // This file is derived from the following files in // toolkit/crashreporter/google-breakpad: // src/common/linux/dump_symbols.cc // src/common/linux/elfutils.cc // src/common/linux/file_id.cc #include #include #include #include #include #include #include #include #include #include #include #include "mozilla/Assertions.h" #include "LulPlatformMacros.h" #include "LulCommonExt.h" #include "LulDwarfExt.h" #if defined(LUL_PLAT_arm_android) # include "LulExidxExt.h" #endif #include "LulElfInt.h" #include "LulMainInt.h" #if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) // bionic and older glibsc don't define it # define SHT_ARM_EXIDX (SHT_LOPROC + 1) #endif // This namespace contains helper functions. namespace { using lul::DwarfCFIToModule; using lul::FindElfSectionByName; using lul::GetOffset; using lul::IsValidElf; using lul::Module; using lul::UniqueString; using lul::scoped_ptr; using lul::Summariser; using std::string; using std::vector; using std::set; // // FDWrapper // // Wrapper class to make sure opened file is closed. // class FDWrapper { public: explicit FDWrapper(int fd) : fd_(fd) {} ~FDWrapper() { if (fd_ != -1) close(fd_); } int get() { return fd_; } int release() { int fd = fd_; fd_ = -1; return fd; } private: int fd_; }; // // MmapWrapper // // Wrapper class to make sure mapped regions are unmapped. // class MmapWrapper { public: MmapWrapper() : is_set_(false) {} ~MmapWrapper() { if (is_set_ && base_ != NULL) { MOZ_ASSERT(size_ > 0); munmap(base_, size_); } } void set(void *mapped_address, size_t mapped_size) { is_set_ = true; base_ = mapped_address; size_ = mapped_size; } void release() { MOZ_ASSERT(is_set_); is_set_ = false; base_ = NULL; size_ = 0; } private: bool is_set_; void *base_; size_t size_; }; // Set NUM_DW_REGNAMES to be the number of Dwarf register names // appropriate to the machine architecture given in HEADER. Return // true on success, or false if HEADER's machine architecture is not // supported. template bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, unsigned int* num_dw_regnames) { switch (elf_header->e_machine) { case EM_386: *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); return true; case EM_ARM: *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); return true; case EM_X86_64: *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); return true; default: MOZ_ASSERT(0); return false; } } template bool LoadDwarfCFI(const string& dwarf_filename, const typename ElfClass::Ehdr* elf_header, const char* section_name, const typename ElfClass::Shdr* section, const bool eh_frame, const typename ElfClass::Shdr* got_section, const typename ElfClass::Shdr* text_section, const bool big_endian, SecMap* smap, uintptr_t text_bias, void (*log)(const char*)) { // Find the appropriate set of register names for this file's // architecture. unsigned int num_dw_regs = 0; if (!DwarfCFIRegisterNames(elf_header, &num_dw_regs)) { fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" " cannot convert DWARF call frame information\n", dwarf_filename.c_str(), elf_header->e_machine); return false; } const lul::Endianness endianness = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; // Find the call frame information and its size. const char* cfi = GetOffset(elf_header, section->sh_offset); size_t cfi_size = section->sh_size; // Plug together the parser, handler, and their entourages. // Here's a summariser, which will receive the output of the // parser, create summaries, and add them to |smap|. Summariser* summ = new Summariser(smap, text_bias, log); DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); DwarfCFIToModule handler(num_dw_regs, &module_reporter, summ); lul::ByteReader byte_reader(endianness); byte_reader.SetAddressSize(ElfClass::kAddrSize); // Provide the base addresses for .eh_frame encoded pointers, if // possible. byte_reader.SetCFIDataBase(section->sh_addr, cfi); if (got_section) byte_reader.SetDataBase(got_section->sh_addr); if (text_section) byte_reader.SetTextBase(text_section->sh_addr); lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, section_name); lul::CallFrameInfo parser(cfi, cfi_size, &byte_reader, &handler, &dwarf_reporter, eh_frame); parser.Start(); delete summ; return true; } #if defined(LUL_PLAT_arm_android) template bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header, const typename ElfClass::Shdr* exidx_section, const typename ElfClass::Shdr* extab_section, uintptr_t text_bias, uintptr_t rx_avma, size_t rx_size, SecMap* smap, void (*log)(const char*)) { // To do this properly we need to know: // * the bounds of the .ARM.exidx section in the process image // * the bounds of the .ARM.extab section in the process image // * the vma of the last byte in the text section associated with the .exidx // The first two are easy. The third is a bit tricky. If we can't // figure out what it is, just pass in zero. // Note that we are reading EXIDX directly out of the mapped in // executable image. Unlike with the CFI reader, there is no // auxiliary, temporary mapping used to read the unwind data. // // An .exidx section is always required, but the .extab section // can be optionally omitted, provided that .exidx does not refer // to it. If the .exidx is erroneous and does refer to .extab even // though .extab is missing, the range checks done by GET_EX_U32 in // ExceptionTableInfo::ExtabEntryExtract should prevent any invalid // memory accesses, and cause the .extab to be rejected as invalid. uintptr_t exidx_svma = exidx_section->sh_addr; uintptr_t exidx_avma = exidx_svma + text_bias; size_t exidx_size = exidx_section->sh_size; uintptr_t extab_svma = 0; uintptr_t extab_avma = 0; size_t extab_size = 0; if (extab_section) { extab_svma = extab_section->sh_addr; extab_avma = extab_svma + text_bias; extab_size = extab_section->sh_size; } // Because we are reading EXIDX directly out of the executing image, // we need to be careful to check that the relevant sections have // really been mapped with r permissions, so as to guarantee that // reading them won't segfault. Do this by checking that rx mapped // area covers the exidx and extab as mapped in. if (rx_size == 0) // This seems sufficiently bogus that we shouldn't proceed further. return false; if (exidx_size == 0) // There's no EXIDX data. No point in continuing. return false; if (!(exidx_avma >= rx_avma && exidx_avma + exidx_size <= rx_avma + rx_size)) // The mapped .exidx isn't entirely inside the rx area. return false; if (extab_section && !(extab_avma >= rx_avma && extab_avma + extab_size <= rx_avma + rx_size)) // There an .extab section, but it isn't entirely inside the rx area. return false; // The sh_link field of the exidx section gives the section number // for the associated text section. uint32_t exidx_text_last_avma = 0; int exidx_text_sno = exidx_section->sh_link; typedef typename ElfClass::Shdr Shdr; // |sections| points to the section header table const Shdr* sections = GetOffset(elf_header, elf_header->e_shoff); const int num_sections = elf_header->e_shnum; if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) { const Shdr* exidx_text_shdr = §ions[exidx_text_sno]; if (exidx_text_shdr->sh_size > 0) { uint32_t exidx_text_last_svma = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1; exidx_text_last_avma = exidx_text_last_svma + text_bias; } } lul::ARMExToModule handler(smap, log); lul::ExceptionTableInfo parser(reinterpret_cast(exidx_avma), exidx_size, reinterpret_cast(extab_avma), extab_size, exidx_text_last_avma, &handler, log); parser.Start(); return true; } #endif /* defined(LUL_PLAT_arm_android) */ bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, void** elf_header) { int obj_fd = open(obj_file.c_str(), O_RDONLY); if (obj_fd < 0) { fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(), strerror(errno)); return false; } FDWrapper obj_fd_wrapper(obj_fd); struct stat st; if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(), strerror(errno)); return false; } // Mapping it read-only is good enough. In any case, mapping it // read-write confuses Valgrind's debuginfo acquire/discard // heuristics, making it hard to profile the profiler. void *obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0); if (obj_base == MAP_FAILED) { fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(), strerror(errno)); return false; } map_wrapper->set(obj_base, st.st_size); *elf_header = obj_base; if (!IsValidElf(*elf_header)) { fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); return false; } return true; } // Get the endianness of ELF_HEADER. If it's invalid, return false. template bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, bool* big_endian) { if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { *big_endian = false; return true; } if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { *big_endian = true; return true; } fprintf(stderr, "bad data encoding in ELF header: %d\n", elf_header->e_ident[EI_DATA]); return false; } // // LoadSymbolsInfo // // Holds the state between the two calls to LoadSymbols() in case it's necessary // to follow the .gnu_debuglink section and load debug information from a // different file. // template class LoadSymbolsInfo { public: typedef typename ElfClass::Addr Addr; explicit LoadSymbolsInfo(const vector& dbg_dirs) : debug_dirs_(dbg_dirs), has_loading_addr_(false) {} // Keeps track of which sections have been loaded so sections don't // accidentally get loaded twice from two different files. void LoadedSection(const string §ion) { if (loaded_sections_.count(section) == 0) { loaded_sections_.insert(section); } else { fprintf(stderr, "Section %s has already been loaded.\n", section.c_str()); } } string debuglink_file() const { return debuglink_file_; } private: const vector& debug_dirs_; // Directories in which to // search for the debug ELF file. string debuglink_file_; // Full path to the debug ELF file. bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. set loaded_sections_; // Tracks the Loaded ELF sections // between calls to LoadSymbols(). }; // Find the preferred loading address of the binary. template typename ElfClass::Addr GetLoadingAddress( const typename ElfClass::Phdr* program_headers, int nheader) { typedef typename ElfClass::Phdr Phdr; // For non-PIC executables (e_type == ET_EXEC), the load address is // the start address of the first PT_LOAD segment. (ELF requires // the segments to be sorted by load address.) For PIC executables // and dynamic libraries (e_type == ET_DYN), this address will // normally be zero. for (int i = 0; i < nheader; ++i) { const Phdr& header = program_headers[i]; if (header.p_type == PT_LOAD) return header.p_vaddr; } return 0; } template bool LoadSymbols(const string& obj_file, const bool big_endian, const typename ElfClass::Ehdr* elf_header, const bool read_gnu_debug_link, LoadSymbolsInfo* info, SecMap* smap, void* rx_avma, size_t rx_size, void (*log)(const char*)) { typedef typename ElfClass::Phdr Phdr; typedef typename ElfClass::Shdr Shdr; char buf[500]; snprintf(buf, sizeof(buf), "LoadSymbols: BEGIN %s\n", obj_file.c_str()); buf[sizeof(buf)-1] = 0; log(buf); // This is how the text bias is calculated. // BEGIN CALCULATE BIAS uintptr_t loading_addr = GetLoadingAddress( GetOffset(elf_header, elf_header->e_phoff), elf_header->e_phnum); uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; snprintf(buf, sizeof(buf), "LoadSymbols: rx_avma=%llx, text_bias=%llx", (unsigned long long int)(uintptr_t)rx_avma, (unsigned long long int)text_bias); buf[sizeof(buf)-1] = 0; log(buf); // END CALCULATE BIAS const Shdr* sections = GetOffset(elf_header, elf_header->e_shoff); const Shdr* section_names = sections + elf_header->e_shstrndx; const char* names = GetOffset(elf_header, section_names->sh_offset); const char *names_end = names + section_names->sh_size; bool found_usable_info = false; // Dwarf Call Frame Information (CFI) is actually independent from // the other DWARF debugging information, and can be used alone. const Shdr* dwarf_cfi_section = FindElfSectionByName(".debug_frame", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); if (dwarf_cfi_section) { // Ignore the return value of this function; even without call frame // information, the other debugging information could be perfectly // useful. info->LoadedSection(".debug_frame"); bool result = LoadDwarfCFI(obj_file, elf_header, ".debug_frame", dwarf_cfi_section, false, 0, 0, big_endian, smap, text_bias, log); found_usable_info = found_usable_info || result; if (result) log("LoadSymbols: read CFI from .debug_frame"); } // Linux C++ exception handling information can also provide // unwinding data. const Shdr* eh_frame_section = FindElfSectionByName(".eh_frame", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); if (eh_frame_section) { // Pointers in .eh_frame data may be relative to the base addresses of // certain sections. Provide those sections if present. const Shdr* got_section = FindElfSectionByName(".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); const Shdr* text_section = FindElfSectionByName(".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); info->LoadedSection(".eh_frame"); // As above, ignore the return value of this function. bool result = LoadDwarfCFI(obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section, text_section, big_endian, smap, text_bias, log); found_usable_info = found_usable_info || result; if (result) log("LoadSymbols: read CFI from .eh_frame"); } # if defined(LUL_PLAT_arm_android) // ARM has special unwind tables that can be used. .exidx is // always required, and .extab is normally required, but may // be omitted if it is empty. See comments on LoadARMexidx() // for more details. const Shdr* arm_exidx_section = FindElfSectionByName(".ARM.exidx", SHT_ARM_EXIDX, sections, names, names_end, elf_header->e_shnum); const Shdr* arm_extab_section = FindElfSectionByName(".ARM.extab", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); const Shdr* debug_info_section = FindElfSectionByName(".debug_info", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); // Only load information from this section if there isn't a .debug_info // section. if (!debug_info_section && arm_exidx_section) { info->LoadedSection(".ARM.exidx"); if (arm_extab_section) info->LoadedSection(".ARM.extab"); bool result = LoadARMexidx(elf_header, arm_exidx_section, arm_extab_section, text_bias, reinterpret_cast(rx_avma), rx_size, smap, log); found_usable_info = found_usable_info || result; if (result) log("LoadSymbols: read EXIDX from .ARM.{exidx,extab}"); } # endif /* defined(LUL_PLAT_arm_android) */ snprintf(buf, sizeof(buf), "LoadSymbols: END %s\n", obj_file.c_str()); buf[sizeof(buf)-1] = 0; log(buf); return found_usable_info; } // Return the breakpad symbol file identifier for the architecture of // ELF_HEADER. template const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { typedef typename ElfClass::Half Half; Half arch = elf_header->e_machine; switch (arch) { case EM_386: return "x86"; case EM_ARM: return "arm"; case EM_MIPS: return "mips"; case EM_PPC64: return "ppc64"; case EM_PPC: return "ppc"; case EM_S390: return "s390"; case EM_SPARC: return "sparc"; case EM_SPARCV9: return "sparcv9"; case EM_X86_64: return "x86_64"; default: return NULL; } } // Format the Elf file identifier in IDENTIFIER as a UUID with the // dashes removed. string FormatIdentifier(unsigned char identifier[16]) { char identifier_str[40]; lul::FileID::ConvertIdentifierToString( identifier, identifier_str, sizeof(identifier_str)); string id_no_dash; for (int i = 0; identifier_str[i] != '\0'; ++i) if (identifier_str[i] != '-') id_no_dash += identifier_str[i]; // Add an extra "0" by the end. PDB files on Windows have an 'age' // number appended to the end of the file identifier; this isn't // really used or necessary on other platforms, but be consistent. id_no_dash += '0'; return id_no_dash; } // Return the non-directory portion of FILENAME: the portion after the // last slash, or the whole filename if there are no slashes. string BaseFileName(const string &filename) { // Lots of copies! basename's behavior is less than ideal. char *c_filename = strdup(filename.c_str()); string base = basename(c_filename); free(c_filename); return base; } template bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, const string& obj_filename, const vector& debug_dirs, SecMap* smap, void* rx_avma, size_t rx_size, void (*log)(const char*)) { typedef typename ElfClass::Ehdr Ehdr; unsigned char identifier[16]; if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { fprintf(stderr, "%s: unable to generate file identifier\n", obj_filename.c_str()); return false; } const char *architecture = ElfArchitecture(elf_header); if (!architecture) { fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", obj_filename.c_str(), elf_header->e_machine); return false; } // Figure out what endianness this file is. bool big_endian; if (!ElfEndianness(elf_header, &big_endian)) return false; string name = BaseFileName(obj_filename); string os = "Linux"; string id = FormatIdentifier(identifier); LoadSymbolsInfo info(debug_dirs); if (!LoadSymbols(obj_filename, big_endian, elf_header, !debug_dirs.empty(), &info, smap, rx_avma, rx_size, log)) { const string debuglink_file = info.debuglink_file(); if (debuglink_file.empty()) return false; // Load debuglink ELF file. fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); MmapWrapper debug_map_wrapper; Ehdr* debug_elf_header = NULL; if (!LoadELF(debuglink_file, &debug_map_wrapper, reinterpret_cast(&debug_elf_header))) return false; // Sanity checks to make sure everything matches up. const char *debug_architecture = ElfArchitecture(debug_elf_header); if (!debug_architecture) { fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", debuglink_file.c_str(), debug_elf_header->e_machine); return false; } if (strcmp(architecture, debug_architecture)) { fprintf(stderr, "%s with ELF machine architecture %s does not match " "%s with ELF architecture %s\n", debuglink_file.c_str(), debug_architecture, obj_filename.c_str(), architecture); return false; } bool debug_big_endian; if (!ElfEndianness(debug_elf_header, &debug_big_endian)) return false; if (debug_big_endian != big_endian) { fprintf(stderr, "%s and %s does not match in endianness\n", obj_filename.c_str(), debuglink_file.c_str()); return false; } if (!LoadSymbols(debuglink_file, debug_big_endian, debug_elf_header, false, &info, smap, rx_avma, rx_size, log)) { return false; } } return true; } } // namespace (anon) namespace lul { bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, const vector& debug_dirs, SecMap* smap, void* rx_avma, size_t rx_size, void (*log)(const char*)) { if (!IsValidElf(obj_file)) { fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); return false; } int elfclass = ElfClass(obj_file); if (elfclass == ELFCLASS32) { return ReadSymbolDataElfClass( reinterpret_cast(obj_file), obj_filename, debug_dirs, smap, rx_avma, rx_size, log); } if (elfclass == ELFCLASS64) { return ReadSymbolDataElfClass( reinterpret_cast(obj_file), obj_filename, debug_dirs, smap, rx_avma, rx_size, log); } return false; } bool ReadSymbolData(const string& obj_file, const vector& debug_dirs, SecMap* smap, void* rx_avma, size_t rx_size, void (*log)(const char*)) { MmapWrapper map_wrapper; void* elf_header = NULL; if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false; return ReadSymbolDataInternal(reinterpret_cast(elf_header), obj_file, debug_dirs, smap, rx_avma, rx_size, log); } namespace { template void FindElfClassSection(const char *elf_base, const char *section_name, typename ElfClass::Word section_type, const void **section_start, int *section_size) { typedef typename ElfClass::Ehdr Ehdr; typedef typename ElfClass::Shdr Shdr; MOZ_ASSERT(elf_base); MOZ_ASSERT(section_start); MOZ_ASSERT(section_size); MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); const Ehdr* elf_header = reinterpret_cast(elf_base); MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); const Shdr* sections = GetOffset(elf_header, elf_header->e_shoff); const Shdr* section_names = sections + elf_header->e_shstrndx; const char* names = GetOffset(elf_header, section_names->sh_offset); const char *names_end = names + section_names->sh_size; const Shdr* section = FindElfSectionByName(section_name, section_type, sections, names, names_end, elf_header->e_shnum); if (section != NULL && section->sh_size > 0) { *section_start = elf_base + section->sh_offset; *section_size = section->sh_size; } } template void FindElfClassSegment(const char *elf_base, typename ElfClass::Word segment_type, const void **segment_start, int *segment_size) { typedef typename ElfClass::Ehdr Ehdr; typedef typename ElfClass::Phdr Phdr; MOZ_ASSERT(elf_base); MOZ_ASSERT(segment_start); MOZ_ASSERT(segment_size); MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); const Ehdr* elf_header = reinterpret_cast(elf_base); MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); const Phdr* phdrs = GetOffset(elf_header, elf_header->e_phoff); for (int i = 0; i < elf_header->e_phnum; ++i) { if (phdrs[i].p_type == segment_type) { *segment_start = elf_base + phdrs[i].p_offset; *segment_size = phdrs[i].p_filesz; return; } } } } // namespace (anon) bool IsValidElf(const void* elf_base) { return strncmp(reinterpret_cast(elf_base), ELFMAG, SELFMAG) == 0; } int ElfClass(const void* elf_base) { const ElfW(Ehdr)* elf_header = reinterpret_cast(elf_base); return elf_header->e_ident[EI_CLASS]; } bool FindElfSection(const void *elf_mapped_base, const char *section_name, uint32_t section_type, const void **section_start, int *section_size, int *elfclass) { MOZ_ASSERT(elf_mapped_base); MOZ_ASSERT(section_start); MOZ_ASSERT(section_size); *section_start = NULL; *section_size = 0; if (!IsValidElf(elf_mapped_base)) return false; int cls = ElfClass(elf_mapped_base); if (elfclass) { *elfclass = cls; } const char* elf_base = static_cast(elf_mapped_base); if (cls == ELFCLASS32) { FindElfClassSection(elf_base, section_name, section_type, section_start, section_size); return *section_start != NULL; } else if (cls == ELFCLASS64) { FindElfClassSection(elf_base, section_name, section_type, section_start, section_size); return *section_start != NULL; } return false; } bool FindElfSegment(const void *elf_mapped_base, uint32_t segment_type, const void **segment_start, int *segment_size, int *elfclass) { MOZ_ASSERT(elf_mapped_base); MOZ_ASSERT(segment_start); MOZ_ASSERT(segment_size); *segment_start = NULL; *segment_size = 0; if (!IsValidElf(elf_mapped_base)) return false; int cls = ElfClass(elf_mapped_base); if (elfclass) { *elfclass = cls; } const char* elf_base = static_cast(elf_mapped_base); if (cls == ELFCLASS32) { FindElfClassSegment(elf_base, segment_type, segment_start, segment_size); return *segment_start != NULL; } else if (cls == ELFCLASS64) { FindElfClassSegment(elf_base, segment_type, segment_start, segment_size); return *segment_start != NULL; } return false; } // (derived from) // file_id.cc: Return a unique identifier for a file // // See file_id.h for documentation // // ELF note name and desc are 32-bits word padded. #define NOTE_PADDING(a) ((a + 3) & ~3) // These functions are also used inside the crashed process, so be safe // and use the syscall/libc wrappers instead of direct syscalls or libc. template static bool ElfClassBuildIDNoteIdentifier(const void *section, int length, uint8_t identifier[kMDGUIDSize]) { typedef typename ElfClass::Nhdr Nhdr; const void* section_end = reinterpret_cast(section) + length; const Nhdr* note_header = reinterpret_cast(section); while (reinterpret_cast(note_header) < section_end) { if (note_header->n_type == NT_GNU_BUILD_ID) break; note_header = reinterpret_cast( reinterpret_cast(note_header) + sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz) + NOTE_PADDING(note_header->n_descsz)); } if (reinterpret_cast(note_header) >= section_end || note_header->n_descsz == 0) { return false; } const char* build_id = reinterpret_cast(note_header) + sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); // Copy as many bits of the build ID as will fit // into the GUID space. memset(identifier, 0, kMDGUIDSize); memcpy(identifier, build_id, std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); return true; } // Attempt to locate a .note.gnu.build-id section in an ELF binary // and copy as many bytes of it as will fit into |identifier|. static bool FindElfBuildIDNote(const void *elf_mapped_base, uint8_t identifier[kMDGUIDSize]) { void* note_section; int note_size, elfclass; if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)¬e_section, ¬e_size, &elfclass) || note_size == 0) && (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, (const void**)¬e_section, ¬e_size, &elfclass) || note_size == 0)) { return false; } if (elfclass == ELFCLASS32) { return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); } else if (elfclass == ELFCLASS64) { return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); } return false; } // Attempt to locate the .text section of an ELF binary and generate // a simple hash by XORing the first page worth of bytes into |identifier|. static bool HashElfTextSection(const void *elf_mapped_base, uint8_t identifier[kMDGUIDSize]) { void* text_section; int text_size; if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, (const void**)&text_section, &text_size, NULL) || text_size == 0) { return false; } memset(identifier, 0, kMDGUIDSize); const uint8_t* ptr = reinterpret_cast(text_section); const uint8_t* ptr_end = ptr + std::min(text_size, 4096); while (ptr < ptr_end) { for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i]; ptr += kMDGUIDSize; } return true; } // static bool FileID::ElfFileIdentifierFromMappedFile(const void* base, uint8_t identifier[kMDGUIDSize]) { // Look for a build id note first. if (FindElfBuildIDNote(base, identifier)) return true; // Fall back on hashing the first page of the text section. return HashElfTextSection(base, identifier); } // static void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], char* buffer, int buffer_length) { uint8_t identifier_swapped[kMDGUIDSize]; // Endian-ness swap to match dump processor expectation. memcpy(identifier_swapped, identifier, kMDGUIDSize); uint32_t* data1 = reinterpret_cast(identifier_swapped); *data1 = htonl(*data1); uint16_t* data2 = reinterpret_cast(identifier_swapped + 4); *data2 = htons(*data2); uint16_t* data3 = reinterpret_cast(identifier_swapped + 6); *data3 = htons(*data3); int buffer_idx = 0; for (unsigned int idx = 0; (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) { int hi = (identifier_swapped[idx] >> 4) & 0x0F; int lo = (identifier_swapped[idx]) & 0x0F; if (idx == 4 || idx == 6 || idx == 8 || idx == 10) buffer[buffer_idx++] = '-'; buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; } // NULL terminate buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; } } // namespace lul