2000-11-28 19:53:59 +00:00
|
|
|
/* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
|
|
|
*
|
2012-05-21 11:12:37 +00:00
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2004-04-25 21:07:34 +00:00
|
|
|
|
|
|
|
/* This program reads an ELF file and computes information about
|
|
|
|
* redundancies.
|
2000-11-28 19:53:59 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <fstream>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <map>
|
|
|
|
#include <elf.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
2001-10-16 15:45:41 +00:00
|
|
|
char* opt_type = "func";
|
|
|
|
char* opt_section = ".text";
|
2000-11-28 19:53:59 +00:00
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
static void
|
|
|
|
hexdump(ostream& out, const char* bytes, size_t count)
|
|
|
|
{
|
|
|
|
hex(out);
|
|
|
|
|
|
|
|
size_t off = 0;
|
|
|
|
while (off < count) {
|
|
|
|
out.form("%08lx: ", off);
|
|
|
|
|
|
|
|
const char* p = bytes + off;
|
|
|
|
|
|
|
|
int j = 0;
|
|
|
|
while (j < 16) {
|
|
|
|
out.form("%02x", p[j++] & 0xff);
|
|
|
|
if (j + off >= count)
|
|
|
|
break;
|
|
|
|
|
|
|
|
out.form("%02x ", p[j++] & 0xff);
|
|
|
|
if (j + off >= count)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pad
|
|
|
|
for (; j < 16; ++j)
|
|
|
|
out << ((j%2) ? " " : " ");
|
|
|
|
|
|
|
|
for (j = 0; j < 16; ++j) {
|
|
|
|
if (j + off < count)
|
|
|
|
out.put(isprint(p[j]) ? p[j] : '.');
|
|
|
|
}
|
|
|
|
|
|
|
|
out << endl;
|
|
|
|
off += 16;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
int
|
|
|
|
verify_elf_header(const Elf32_Ehdr* hdr)
|
|
|
|
{
|
|
|
|
if (hdr->e_ident[EI_MAG0] != ELFMAG0
|
|
|
|
|| hdr->e_ident[EI_MAG1] != ELFMAG1
|
|
|
|
|| hdr->e_ident[EI_MAG2] != ELFMAG2
|
|
|
|
|| hdr->e_ident[EI_MAG3] != ELFMAG3) {
|
|
|
|
cerr << "not an elf file" << endl;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->e_ident[EI_CLASS] != ELFCLASS32) {
|
|
|
|
cerr << "not a 32-bit elf file" << endl;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) {
|
|
|
|
cerr << "not a little endian elf file" << endl;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->e_ident[EI_VERSION] != EV_CURRENT) {
|
|
|
|
cerr << "incompatible version" << endl;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
class elf_symbol : public Elf32_Sym
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
elf_symbol(const Elf32_Sym& sym)
|
|
|
|
{ ::memcpy(static_cast<Elf32_Sym*>(this), &sym, sizeof(Elf32_Sym)); }
|
|
|
|
|
|
|
|
friend bool operator==(const elf_symbol& lhs, const elf_symbol& rhs) {
|
|
|
|
return 0 == ::memcmp(static_cast<const Elf32_Sym*>(&lhs),
|
|
|
|
static_cast<const Elf32_Sym*>(&rhs),
|
|
|
|
sizeof(Elf32_Sym)); }
|
|
|
|
};
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
static const char*
|
|
|
|
st_bind(unsigned char info)
|
|
|
|
{
|
|
|
|
switch (ELF32_ST_BIND(info)) {
|
|
|
|
case STB_LOCAL: return "local";
|
|
|
|
case STB_GLOBAL: return "global";
|
|
|
|
case STB_WEAK: return "weak";
|
|
|
|
default: return "unknown";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char*
|
|
|
|
st_type(unsigned char info)
|
|
|
|
{
|
|
|
|
switch (ELF32_ST_TYPE(info)) {
|
|
|
|
case STT_NOTYPE: return "none";
|
|
|
|
case STT_OBJECT: return "object";
|
|
|
|
case STT_FUNC: return "func";
|
|
|
|
case STT_SECTION: return "section";
|
|
|
|
case STT_FILE: return "file";
|
|
|
|
default: return "unknown";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned char
|
|
|
|
st_type(const char* type)
|
|
|
|
{
|
|
|
|
if (strcmp(type, "none") == 0) {
|
|
|
|
return STT_NOTYPE;
|
|
|
|
}
|
|
|
|
else if (strcmp(type, "object") == 0) {
|
|
|
|
return STT_OBJECT;
|
|
|
|
}
|
|
|
|
else if (strcmp(type, "func") == 0) {
|
|
|
|
return STT_FUNC;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
typedef vector<elf_symbol> elf_symbol_table;
|
|
|
|
typedef map< basic_string<char>, elf_symbol_table > elf_text_map;
|
|
|
|
|
|
|
|
void
|
|
|
|
process_mapping(char* mapping, size_t size)
|
|
|
|
{
|
|
|
|
const Elf32_Ehdr* ehdr = reinterpret_cast<Elf32_Ehdr*>(mapping);
|
|
|
|
if (verify_elf_header(ehdr) < 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// find the section headers
|
|
|
|
const Elf32_Shdr* shdrs = reinterpret_cast<Elf32_Shdr*>(mapping + ehdr->e_shoff);
|
|
|
|
|
|
|
|
// find the section header string table, .shstrtab
|
|
|
|
const Elf32_Shdr* shstrtabsh = shdrs + ehdr->e_shstrndx;
|
|
|
|
const char* shstrtab = mapping + shstrtabsh->sh_offset;
|
|
|
|
|
|
|
|
// find the sections we care about
|
|
|
|
const Elf32_Shdr *symtabsh, *strtabsh, *textsh;
|
|
|
|
int textndx;
|
|
|
|
|
|
|
|
for (int i = 0; i < ehdr->e_shnum; ++i) {
|
|
|
|
basic_string<char> name(shstrtab + shdrs[i].sh_name);
|
|
|
|
if (name == opt_section) {
|
|
|
|
textsh = shdrs + i;
|
|
|
|
textndx = i;
|
|
|
|
}
|
|
|
|
else if (name == ".symtab") {
|
|
|
|
symtabsh = shdrs + i;
|
|
|
|
}
|
|
|
|
else if (name == ".strtab") {
|
|
|
|
strtabsh = shdrs + i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// find the .strtab
|
|
|
|
char* strtab = mapping + strtabsh->sh_offset;
|
|
|
|
|
|
|
|
// find the .text
|
|
|
|
char* text = mapping + textsh->sh_offset;
|
|
|
|
int textaddr = textsh->sh_addr;
|
|
|
|
|
|
|
|
// find the symbol table
|
|
|
|
int nentries = symtabsh->sh_size / sizeof(Elf32_Sym);
|
|
|
|
Elf32_Sym* symtab = reinterpret_cast<Elf32_Sym*>(mapping + symtabsh->sh_offset);
|
|
|
|
|
|
|
|
// look for symbols in the .text section
|
|
|
|
elf_text_map textmap;
|
|
|
|
|
|
|
|
for (int i = 0; i < nentries; ++i) {
|
|
|
|
const Elf32_Sym* sym = symtab + i;
|
|
|
|
if (sym->st_shndx == textndx &&
|
|
|
|
ELF32_ST_TYPE(sym->st_info) == st_type(opt_type) &&
|
|
|
|
sym->st_size) {
|
|
|
|
basic_string<char> functext(text + sym->st_value - textaddr, sym->st_size);
|
|
|
|
|
|
|
|
elf_symbol_table& syms = textmap[functext];
|
|
|
|
if (syms.end() == find(syms.begin(), syms.end(), elf_symbol(*sym)))
|
|
|
|
syms.insert(syms.end(), *sym);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int uniquebytes = 0, totalbytes = 0;
|
|
|
|
int uniquecount = 0, totalcount = 0;
|
|
|
|
|
|
|
|
for (elf_text_map::const_iterator entry = textmap.begin();
|
|
|
|
entry != textmap.end();
|
|
|
|
++entry) {
|
|
|
|
const elf_symbol_table& syms = entry->second;
|
|
|
|
|
|
|
|
if (syms.size() <= 1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
int sz = syms.begin()->st_size;
|
|
|
|
uniquebytes += sz;
|
|
|
|
totalbytes += sz * syms.size();
|
|
|
|
uniquecount += 1;
|
|
|
|
totalcount += syms.size();
|
|
|
|
|
|
|
|
for (elf_symbol_table::const_iterator sym = syms.begin(); sym != syms.end(); ++sym)
|
|
|
|
cout << strtab + sym->st_name << endl;
|
|
|
|
|
|
|
|
dec(cout);
|
|
|
|
cout << syms.size() << " copies of " << sz << " bytes";
|
|
|
|
cout << " (" << ((syms.size() - 1) * sz) << " redundant bytes)" << endl;
|
|
|
|
|
|
|
|
hexdump(cout, entry->first.data(), entry->first.size());
|
|
|
|
cout << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
dec(cout);
|
|
|
|
cout << "bytes unique=" << uniquebytes << ", total=" << totalbytes << endl;
|
|
|
|
cout << "entries unique=" << uniquecount << ", total=" << totalcount << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
process_file(const char* name)
|
|
|
|
{
|
|
|
|
int fd = open(name, O_RDWR);
|
|
|
|
if (fd >= 0) {
|
|
|
|
struct stat statbuf;
|
|
|
|
if (fstat(fd, &statbuf) >= 0) {
|
|
|
|
size_t size = statbuf.st_size;
|
|
|
|
|
|
|
|
void* mapping = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
|
|
|
|
if (mapping != MAP_FAILED) {
|
|
|
|
process_mapping(static_cast<char*>(mapping), size);
|
|
|
|
munmap(mapping, size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(fd);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
usage()
|
|
|
|
{
|
|
|
|
cerr << "foldelf [--section=<section>] [--type=<type>] [file ...]\n\
|
|
|
|
--section, -s the section of the ELF file to scan; defaults\n\
|
|
|
|
to ``.text''. Valid values include any section\n\
|
|
|
|
of the ELF file.\n\
|
|
|
|
--type, -t the type of object to examine in the section;\n\
|
|
|
|
defaults to ``func''. Valid values include\n\
|
|
|
|
``none'', ``func'', or ``object''.\n";
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct option opts[] = {
|
|
|
|
{ "type", required_argument, 0, 't' },
|
|
|
|
{ "section", required_argument, 0, 's' },
|
|
|
|
{ "help", no_argument, 0, '?' },
|
|
|
|
{ 0, 0, 0, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
while (1) {
|
|
|
|
int option_index = 0;
|
|
|
|
int c = getopt_long(argc, argv, "t:s:", opts, &option_index);
|
|
|
|
|
|
|
|
if (c < 0) break;
|
|
|
|
|
|
|
|
switch (c) {
|
|
|
|
case 't':
|
|
|
|
opt_type = optarg;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 's':
|
|
|
|
opt_section = optarg;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '?':
|
|
|
|
usage();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = optind; i < argc; ++i)
|
|
|
|
process_file(argv[i]);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|