radare2/libr/anal/dwarf_process.c
pancake 529b25c1f0 Fix oobread in dwarf (done by wargio at rizin's repo)
* Rizin's Commit b35d3ea195b1a7a85688ea02a16935802a65f773
* Author: wargio <wargio@libero.it>
2022-09-29 17:38:37 +02:00

1918 lines
52 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* radare - LGPL - Copyright 2012-2022 - houndthe */
#include <r_anal.h>
#include <r_bin_dwarf.h>
#include "base_types.h"
typedef struct dwarf_parse_context_t {
const RAnal *anal;
const RBinDwarfDie *all_dies;
const ut64 count;
Sdb *sdb;
HtUP/*<ut64 offset, DwarfDie *die>*/ *die_map;
HtUP/*<offset, RBinDwarfLocList*>*/ *locations;
const char *lang; // for demangling
} Context;
typedef struct dwarf_function_t {
ut64 addr;
const char *name;
const char *signature;
bool is_external;
bool is_method;
bool is_virtual;
bool is_trampoline; // intermediary in making call to another func
ut8 access; // public = 1, protected = 2, private = 3, if not set assume private
ut64 vtable_addr; // location description
ut64 call_conv; // normal || program || nocall
} Function;
typedef enum dwarf_location_kind {
LOCATION_UNKNOWN = 0,
LOCATION_GLOBAL = 1,
LOCATION_BP = 2,
LOCATION_SP = 3,
LOCATION_REGISTER = 4,
} VariableLocationKind;
typedef struct dwarf_var_location_t {
VariableLocationKind kind;
ut64 address;
ut64 reg_num;
st64 offset;
const char *reg_name; /* string literal */
} VariableLocation;
typedef struct dwarf_variable_t {
VariableLocation *location;
char *name;
char *type;
} Variable;
static void variable_free(Variable *var) {
if (var) {
free (var->name);
free (var->location);
free (var->type);
free (var);
}
}
/* return -1 if attr isn't found */
static inline st32 find_attr_idx(const RBinDwarfDie *die, st32 attr_name) {
st32 i;
r_return_val_if_fail (die, -1);
for (i = 0; i < die->count; i++) {
if (die->attr_values[i].attr_name == attr_name) {
return i;
}
}
return -1;
}
/* return NULL if attr isn't found */
static RBinDwarfAttrValue *find_attr(const RBinDwarfDie *die, st32 attr_name) {
st32 i;
r_return_val_if_fail (die, NULL);
for (i = 0; i < die->count; i++) {
if (die->attr_values[i].attr_name == attr_name) {
return &die->attr_values[i];
}
}
return NULL;
}
/**
* @brief Pasted from r_strbuf_*
* Prepends string before a last occurence of character c
* Used to replicate proper C declaration for function pointers
* @param sb
* @param s
* @param c
*/
static bool strbuf_rev_prepend_char(RStrBuf *sb, const char *s, int c) {
r_return_val_if_fail (sb && s, false);
size_t l = strlen (s);
// fast path if no chars to append
if (l == 0) {
return true;
}
size_t newlen = l + sb->len;
char *ns = malloc (newlen + 1);
if (!ns) {
return false;
}
bool ret = false;
char *sb_str = sb->ptr ? sb->ptr : sb->buf;
char *pivot = strrchr (sb_str, c);
if (pivot) {
size_t idx = pivot - sb_str;
memcpy (ns, sb_str, idx);
memcpy (ns + idx, s, l);
memcpy (ns + idx + l, sb_str + idx, sb->len - idx);
ns[newlen] = 0;
ret = r_strbuf_set (sb, ns);
} else {
free (ns);
}
free (ns);
return ret;
}
/**
* @brief Pasted from r_strbuf_*
* Appends string after a first occurence of character c
* Used to replicate proper C declaration for function pointers
* @param sb
* @param s
* @param needle
*/
static bool strbuf_rev_append_char(RStrBuf *sb, const char *s, const char *needle) {
r_return_val_if_fail (sb && s, false);
size_t l = strlen (s);
// fast path if no chars to append
if (l == 0) {
return true;
}
bool ret = false;
char *sb_str = sb->ptr ? sb->ptr : sb->buf;
char *pivot = strstr (sb_str, needle);
if (!pivot) {
return false;
}
pivot += strlen (needle);
size_t idx = pivot - sb_str;
size_t newlen = l + sb->len;
char *ns = malloc (newlen + 1);
if (ns) {
memcpy (ns, sb_str, idx);
memcpy (ns + idx, s, l);
memcpy (ns + idx + l, sb_str + idx, sb->len - idx);
ns[newlen] = 0;
ret = r_strbuf_set (sb, ns);
free (ns);
}
return ret;
}
static inline char *create_type_name_from_offset(ut64 offset) {
return r_str_newf ("type_0x%" PFMT64x, offset);
}
/**
* @brief Get the DIE name or create unique one from it's offset
*
* @param die
* @return char* DIEs name or NULL if error
*/
static char *get_die_name(const RBinDwarfDie *die) {
st32 name_attr_idx = find_attr_idx (die, DW_AT_name);
if (name_attr_idx < 0 || name_attr_idx >= die->count) {
return NULL;
}
RBinDwarfAttrValue *av = &die->attr_values[name_attr_idx];
if (av->kind == DW_AT_KIND_STRING && name_attr_idx != -1 && av->string.content) {
return strdup (av->string.content);
}
return create_type_name_from_offset (die->offset);
}
/**
* @brief Get the DIE size in bits
*
* @param die
* @return ut64 size in bits or 0 if not found
*/
static ut64 get_die_size(const RBinDwarfDie *die) {
ut64 size = 0;
st32 byte_size_idx = find_attr_idx (die, DW_AT_byte_size);
if (byte_size_idx != -1) {
size = die->attr_values[byte_size_idx].uconstant * CHAR_BIT;
} else {
st32 bit_size_idx = find_attr_idx (die, DW_AT_bit_size);
if (bit_size_idx != -1) {
size = die->attr_values[bit_size_idx].uconstant;
}
}
return size;
}
/**
* @brief Parses array type entry signature into strbuf
*
* @param ctx
* @param idx index of the current entry
* @param strbuf strbuf to store the type into
* @return st32 -1 if error else 0
*/
static void parse_array_type(Context *ctx, int idx, RStrBuf *strbuf) {
if (idx < 0 || idx >= ctx->count) {
return;
}
const RBinDwarfDie *die = &ctx->all_dies[idx++];
if (die->has_children) {
int child_depth = 1;
size_t j;
for (j = idx; child_depth > 0 && j < ctx->count; j++) {
const RBinDwarfDie *child_die = &ctx->all_dies[j];
// right now we skip non direct descendats of the structure
// can be also DW_TAG_suprogram for class methods or tag for templates
if (child_depth == 1 && child_die->tag == DW_TAG_subrange_type) {
size_t i;
for (i = 0; i < child_die->count; i++) {
const RBinDwarfAttrValue *value = &child_die->attr_values[i];
switch (value->attr_name) {
case DW_AT_upper_bound:
case DW_AT_count:
r_strbuf_appendf (strbuf, "[%" PFMT64d "]", value->uconstant + 1);
break;
default:
break;
}
}
}
if (child_die->has_children) {
child_depth++;
}
// sibling list is terminated by null entry
if (child_die->abbrev_code == 0) {
child_depth--;
}
}
}
}
/**
* @brief Recursively parses type entry of a certain offset into strbuf
* saves type size into *size
*
* @param ctx
* @param offset offset of the type entry
* @param strbuf string to store the type into
* @param size ptr to size of a type to fill up (can be NULL if unwanted)
* @return st32 -1 if error else DW_TAG of the entry
*
* TODO make cache for type entries, one type is usually referenced
* multiple times which means it's parsed multiple times instead of once
*/
static st32 parse_type(Context *ctx, const ut64 offset, RStrBuf *strbuf, ut64 *size, HtUP **visited) {
r_return_val_if_fail (strbuf, -1);
RBinDwarfDie *die = ht_up_find (ctx->die_map, offset, NULL);
if (!die) {
return -1;
}
bool root = false;
if (!visited) {
root = true;
SetU *su = set_u_new ();
visited = malloc (sizeof (void*));
*visited = su;
}
if (visited && set_u_contains (*visited, offset)) {
R_LOG_WARN ("anal.dwarf.parse_type: infinite recursion detected");
return -1;
}
set_u_add (*visited, offset);
st32 type_idx;
st32 tag;
char *name = NULL;
// get size of first type DIE that has size
if (size && *size == 0) {
*size = get_die_size (die);
}
switch (die->tag) {
// this should be recursive search for the type until you find base/user defined type
case DW_TAG_pointer_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx == -1) {
r_strbuf_append (strbuf, "void");
r_strbuf_append (strbuf, " *");
} else {
tag = parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
if (tag == DW_TAG_subroutine_type) {
strbuf_rev_prepend_char (strbuf, "(*)", '(');
} else if (tag == DW_TAG_pointer_type) {
if (!strbuf_rev_append_char (strbuf, "*", "(*")) {
strbuf_rev_prepend_char (strbuf, "*", '*');
}
} else {
r_strbuf_append (strbuf, " *");
}
}
break;
// We won't parse them as a complete type, because that will already be done
// so just a name now
case DW_TAG_typedef:
case DW_TAG_base_type:
case DW_TAG_structure_type:
case DW_TAG_enumeration_type:
case DW_TAG_union_type:
case DW_TAG_class_type:
name = get_die_name (die);
if (name) {
r_strbuf_append (strbuf, name);
free (name);
}
break;
case DW_TAG_subroutine_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx == -1) {
r_strbuf_append (strbuf, "void");
} else {
parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
}
r_strbuf_append (strbuf, " (");
if (die->has_children) { // has parameters
}
r_strbuf_append (strbuf, ")");
break;
case DW_TAG_array_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx != -1) {
parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
}
parse_array_type (ctx, die - ctx->all_dies, strbuf);
break;
case DW_TAG_const_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx != -1) {
parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
}
r_strbuf_append (strbuf, " const");
break;
case DW_TAG_volatile_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx != -1) {
parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
}
r_strbuf_append (strbuf, " volatile");
break;
case DW_TAG_restrict_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx != -1) {
parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
}
r_strbuf_append (strbuf, " restrict");
break;
case DW_TAG_rvalue_reference_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx != -1) {
parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
}
r_strbuf_append (strbuf, " &&");
break;
case DW_TAG_reference_type:
type_idx = find_attr_idx (die, DW_AT_type);
if (type_idx != -1) {
parse_type (ctx, die->attr_values[type_idx].reference, strbuf, size, visited);
}
r_strbuf_append (strbuf, " &");
break;
default:
break;
}
if (root) {
set_u_free (*visited);
free (visited);
}
return (st32)die->tag;
}
/**
* @brief Parses structured entry into *result RAnalStructMember
* http://www.dwarfstd.org/doc/DWARF4.pdf#page=102&zoom=100,0,0
*
* @param ctx
* @param idx index of the current entry
* @param result ptr to result member to fill up
* @return RAnalStructMember* ptr to parsed Member
*/
static RAnalStructMember *parse_struct_member(Context *ctx, ut64 idx, RAnalStructMember *result) {
r_return_val_if_fail (result, NULL);
const RBinDwarfDie *die = &ctx->all_dies[idx];
char *name = NULL;
char *type = NULL;
ut64 offset = 0;
ut64 size = 0;
RStrBuf strbuf;
r_strbuf_init (&strbuf);
size_t i;
for (i = 0; i < die->count; i++) {
RBinDwarfAttrValue *value = &die->attr_values[i];
switch (die->attr_values[i].attr_name) {
case DW_AT_name:
free (name);
name = get_die_name (die);
if (!name) {
goto cleanup;
}
break;
case DW_AT_type:
parse_type (ctx, value->reference, &strbuf, &size, NULL);
free (type);
type = r_strbuf_drain_nofree (&strbuf);
if (!type || !*type) {
goto cleanup;
}
break;
case DW_AT_data_member_location:
/*
2 cases, 1.: If val is integer, it offset in bytes from
the beginning of containing entity. If containing entity has
a bit offset, member has that bit offset aswell
2.: value is a location description
http://www.dwarfstd.org/doc/DWARF4.pdf#page=39&zoom=100,0,0
*/
offset = value->uconstant;
break;
case DW_AT_accessibility: // private, public etc.
case DW_AT_mutable: // flag is it is mutable
case DW_AT_data_bit_offset:
/*
int that specifies the number of bits from beginning
of containing entity to the beginning of the data member
*/
break;
// If the size of a data member is not the same as the
// size of the type given for the data member
case DW_AT_byte_size:
size = value->uconstant * CHAR_BIT;
break;
case DW_AT_bit_size:
size = value->uconstant;
break;
case DW_AT_containing_type:
default:
break;
}
}
result->name = name;
result->type = type;
result->offset = offset;
result->size = size;
return result;
cleanup:
free (name);
free (type);
return NULL;
}
/**
* @brief Parses enum entry into *result RAnalEnumCase
* http://www.dwarfstd.org/doc/DWARF4.pdf#page=110&zoom=100,0,0
*
* @param ctx
* @param idx index of the current entry
* @param result ptr to result case to fill up
* @return RAnalEnumCase* Ptr to parsed enum case
*/
static RAnalEnumCase *parse_enumerator(Context *ctx, ut64 idx, RAnalEnumCase *result) {
const RBinDwarfDie *die = &ctx->all_dies[idx];
char *name = NULL;
int val = 0;
size_t i;
// Enumerator has DW_AT_name and DW_AT_const_value
for (i = 0; i < die->count; i++) {
RBinDwarfAttrValue *value = &die->attr_values[i];
switch (die->attr_values[i].attr_name) {
case DW_AT_name:
name = get_die_name (die);
if (!name) {
goto cleanup;
}
break;
case DW_AT_const_value:
// ?? can be block, sdata, data, string w/e
val = value->uconstant; // TODO solve the encoding, I don't know in which union member is it store
break;
default:
break;
}
}
result->name = name;
result->val = (int)val;
return result;
cleanup:
free (name);
return NULL;
}
/**
* @brief Parses a structured entry (structs, classes, unions) into
* RAnalBaseType and saves it using r_anal_save_base_type ()
*
* @param ctx
* @param idx index of the current entry
*/
// http://www.dwarfstd.org/doc/DWARF4.pdf#page=102&zoom=100,0,0
static void parse_structure_type(Context *ctx, ut64 idx) {
const RBinDwarfDie *die = &ctx->all_dies[idx];
RAnalBaseTypeKind kind;
if (die->tag == DW_TAG_union_type) {
kind = R_ANAL_BASE_TYPE_KIND_UNION;
} else {
kind = R_ANAL_BASE_TYPE_KIND_STRUCT;
}
RAnalBaseType *base_type = r_anal_base_type_new (kind);
if (!base_type) {
return;
}
base_type->name = get_die_name (die);
if (!base_type->name) {
goto cleanup;
}
// if it is definition of previous declaration (TODO Fix, big ugly hotfix addition)
st32 spec_attr_idx = find_attr_idx (die, DW_AT_specification);
if (spec_attr_idx != -1) {
RBinDwarfDie *decl_die = ht_up_find (ctx->die_map, die->attr_values[spec_attr_idx].reference, NULL);
if (!decl_die) {
goto cleanup;
}
st32 name_attr_idx = find_attr_idx (decl_die, DW_AT_name);
if (name_attr_idx != -1) {
free (base_type->name);
base_type->name = get_die_name (decl_die);
}
}
base_type->size = get_die_size (die);
RAnalStructMember member = {0};
// Parse out all members, can this in someway be extracted to a function?
if (die->has_children) {
int child_depth = 1; // Direct children of the node
size_t j;
idx++; // Move to the first children node
for (j = idx; child_depth > 0 && j < ctx->count; j++) {
const RBinDwarfDie *child_die = &ctx->all_dies[j];
// we take only direct descendats of the structure
// can be also DW_TAG_suprogram for class methods or tag for templates
if (child_depth == 1 && child_die->tag == DW_TAG_member) {
RAnalStructMember *result = parse_struct_member (ctx, j, &member);
if (!result) {
goto cleanup;
} else {
void *element = r_vector_push (&base_type->struct_data.members, &member);
if (!element) {
goto cleanup;
}
}
}
if (child_die->has_children) {
child_depth++;
}
if (child_die->abbrev_code == 0) { // siblings terminator
child_depth--;
}
}
}
r_anal_save_base_type (ctx->anal, base_type);
cleanup:
r_anal_base_type_free (base_type);
}
/**
* @brief Parses a enum entry into RAnalBaseType and saves it
* int Sdb using r_anal_save_base_type ()
*
* @param ctx
* @param idx index of the current entry
*/
static void parse_enum_type(Context *ctx, ut64 idx) {
const RBinDwarfDie *die = &ctx->all_dies[idx];
RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_ENUM);
if (!base_type) {
return;
}
base_type->name = get_die_name (die);
if (!base_type->name) {
goto cleanup;
}
base_type->size = get_die_size (die);
st32 type_attr_idx = find_attr_idx (die, DW_AT_type);
if (type_attr_idx != -1) {
RStrBuf strbuf;
r_strbuf_init (&strbuf);
parse_type (ctx, die->attr_values[type_attr_idx].reference, &strbuf, &base_type->size, NULL);
base_type->type = r_strbuf_drain_nofree (&strbuf);
}
RAnalEnumCase cas;
if (die->has_children) {
int child_depth = 1; // Direct children of the node
size_t j;
idx++; // Move to the first children node
for (j = idx; child_depth > 0 && j < ctx->count; j++) {
const RBinDwarfDie *child_die = &ctx->all_dies[j];
// we take only direct descendats of the structure
if (child_depth == 1 && child_die->tag == DW_TAG_enumerator) {
RAnalEnumCase *result = parse_enumerator (ctx, j, &cas);
if (!result) {
goto cleanup;
}
void *element = r_vector_push (&base_type->enum_data.cases, &cas);
if (!element) {
enum_type_case_free (result, NULL);
goto cleanup;
}
}
if (child_die->has_children) {
child_depth++;
}
// sibling list is terminated by null entry
if (child_die->abbrev_code == 0) {
child_depth--;
}
}
}
r_anal_save_base_type (ctx->anal, base_type);
cleanup:
r_anal_base_type_free (base_type);
}
/**
* @brief Parses a typedef entry into RAnalBaseType and saves it
* using r_anal_save_base_type ()
*
* http://www.dwarfstd.org/doc/DWARF4.pdf#page=96&zoom=100,0,0
*
* @param ctx
* @param idx index of the current entry
*/
static void parse_typedef(Context *ctx, ut64 idx) {
const RBinDwarfDie *die = &ctx->all_dies[idx];
char *name = NULL;
char *type = NULL;
ut64 size = 0;
RStrBuf strbuf;
r_strbuf_init (&strbuf);
size_t i;
for (i = 0; i < die->count; i++) {
RBinDwarfAttrValue *value = &die->attr_values[i];
switch (die->attr_values[i].attr_name) {
case DW_AT_name:
free (name);
name = get_die_name (die);
if (!name) {
goto cleanup;
}
break;
case DW_AT_type:
parse_type (ctx, value->reference, &strbuf, &size, NULL);
free (type);
type = r_strbuf_drain_nofree (&strbuf);
if (!type) {
goto cleanup;
}
break;
default:
break;
}
}
if (!name) { // type has to have a name for now
goto cleanup;
}
RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_TYPEDEF);
if (!base_type) {
goto cleanup;
}
base_type->name = name;
base_type->type = type;
r_anal_save_base_type (ctx->anal, base_type);
r_anal_base_type_free (base_type);
r_strbuf_fini (&strbuf);
return;
cleanup:
free (name);
free (type);
r_strbuf_fini (&strbuf);
}
static void parse_atomic_type(Context *ctx, ut64 idx) {
const RBinDwarfDie *die = &ctx->all_dies[idx];
char *name = NULL;
ut64 size = 0;
size_t i;
// TODO support endiannity and encoding in future?
for (i = 0; i < die->count; i++) {
RBinDwarfAttrValue *value = &die->attr_values[i];
switch (die->attr_values[i].attr_name) {
case DW_AT_name:
R_FREE (name);
if (value->kind == DW_AT_KIND_STRING) {
if (!value->string.content) {
name = create_type_name_from_offset (die->offset);
} else {
name = strdup (value->string.content);
}
}
if (!name) {
return;
}
break;
case DW_AT_byte_size:
size = value->uconstant * CHAR_BIT;
break;
case DW_AT_bit_size:
size = value->uconstant;
break;
case DW_AT_encoding:
default:
break;
}
}
if (!name) { // type has to have a name for now
return;
}
RAnalBaseType *base_type = r_anal_base_type_new (R_ANAL_BASE_TYPE_KIND_ATOMIC);
if (!base_type) {
free (name);
return;
}
base_type->name = name;
base_type->size = size;
r_anal_save_base_type (ctx->anal, base_type);
r_anal_base_type_free (base_type);
}
static const char *get_specification_die_name(const RBinDwarfDie *die) {
st32 linkage_name_attr_idx = find_attr_idx (die, DW_AT_linkage_name);
if (linkage_name_attr_idx != -1 && die->attr_values[linkage_name_attr_idx].string.content) {
return die->attr_values[linkage_name_attr_idx].string.content;
}
st32 name_attr_idx = find_attr_idx (die, DW_AT_name);
if (name_attr_idx != -1 && die->attr_values[name_attr_idx].string.content) {
return die->attr_values[name_attr_idx].string.content;
}
return NULL;
}
static void get_spec_die_type(Context *ctx, RBinDwarfDie *die, RStrBuf *ret_type) {
st32 attr_idx = find_attr_idx (die, DW_AT_type);
if (attr_idx != -1) {
ut64 size = 0;
parse_type (ctx, die->attr_values[attr_idx].reference, ret_type, &size, NULL);
}
}
/* For some languages linkage name is more informative like C++,
* but for Rust it's rubbish and the normal name is fine */
static bool prefer_linkage_name(const char *lang) {
if (!lang || !strcmp (lang, "rust") || !strcmp (lang, "ada")) {
return false;
}
return true;
}
static void parse_abstract_origin(Context *ctx, ut64 offset, RStrBuf *type, const char **name) {
RBinDwarfDie *die = ht_up_find (ctx->die_map, offset, NULL);
if (die) {
size_t i;
ut64 size = 0;
bool has_linkage_name = false;
bool get_linkage_name = prefer_linkage_name (ctx->lang);
for (i = 0; i < die->count; i++) {
const RBinDwarfAttrValue *val = &die->attr_values[i];
switch (val->attr_name) {
case DW_AT_name:
if (!get_linkage_name || !has_linkage_name) {
*name = val->string.content;
}
break;
case DW_AT_linkage_name:
case DW_AT_MIPS_linkage_name:
*name = val->string.content;
has_linkage_name = true;
break;
case DW_AT_type:
parse_type (ctx, val->reference, type, &size, NULL);
break;
default:
break;
}
}
}
}
/* x86_64 https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf */
// https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
static const char *map_dwarf_reg_to_x86_64_reg(ut64 reg_num, VariableLocationKind *kind) {
*kind = LOCATION_REGISTER;
switch (reg_num) {
case 0: return "rax";
case 1: return "rdx";
case 2: return "rcx";
case 3: return "rbx";
case 4: return "rsi";
case 5: return "rdi";
case 6:
*kind = LOCATION_BP;
return "rbp";
case 7:
*kind = LOCATION_SP;
return "rsp";
case 8: return "r8";
case 9: return "r9";
case 10: return "r10";
case 11: return "r11";
case 12: return "r12";
case 13: return "r13";
case 14: return "r14";
case 15: return "r15";
case 16: return "reserved"; // return address
case 17: return "xmm0";
case 18: return "xmm1";
case 19: return "xmm2";
case 20: return "xmm3";
case 21: return "xmm4";
case 22: return "xmm5";
case 23: return "xmm6";
case 24: return "xmm7";
case 25: return "xmm8";
case 26: return "xmm9";
case 27: return "xmm10";
case 28: return "xmm11";
case 29: return "xmm12";
case 30: return "xmm13";
case 31: return "xmm14";
case 32: return "xmm15";
case 33: return "st0";
case 34: return "st1";
case 35: return "st2";
case 36: return "st3";
case 37: return "st4";
case 38: return "st5";
case 39: return "st6";
case 40: return "st7";
case 41: return "mm4";
case 42: return "mm5";
case 43: return "mm6";
case 44: return "mm7";
case 45: return "mm8";
case 46: return "mm9";
case 47: return "mm10";
case 48: return "mm11";
case 49: return "rflags";
case 50: return "es";
case 51: return "cs";
case 52: return "ss";
case 53: return "ds";
case 54: return "fs";
case 55: return "gs";
case 58: return "fs.base";
case 59: return "gs.base";
case 62: return "tr";
case 63: return "ldtr";
case 64: return "mxcsr";
case 65: return "fcw";
case 66: return "fsw";
case 67: return "xmm16";
case 68: return "xmm17";
case 69: return "xmm18";
case 70: return "xmm19";
case 71: return "xmm20";
case 72: return "xmm21";
case 73: return "xmm22";
case 74: return "xmm23";
case 75: return "xmm24";
case 76: return "xmm25";
case 77: return "xmm26";
case 78: return "xmm27";
case 79: return "xmm28";
case 80: return "xmm29";
case 81: return "xmm30";
case 82: return "xmm31";
case 118: return "k0";
case 119: return "k1";
case 120: return "k2";
case 121: return "k3";
case 122: return "k4";
case 123: return "k5";
case 124: return "k6";
case 125: return "k7";
default:
*kind = LOCATION_UNKNOWN;
return "unsupported_reg";
}
}
static const char *map_dwarf_reg_to_arm64_reg(ut64 reg_num, VariableLocationKind *kind) {
*kind = LOCATION_REGISTER;
switch (reg_num) {
case 0: return "x0";
case 1: return "x1";
case 2: return "x2";
case 3: return "x3";
case 4: return "x4";
case 5: return "x5";
case 6: return "x6";
case 7: return "x7";
case 8: return "x8";
case 9: return "x9";
case 10: return "x10";
case 11: return "x11";
case 12: return "x12";
case 13: return "x13";
case 14: return "x14";
case 15: return "x15";
case 16: return "x16";
case 17: return "x17";
case 18: return "x18";
case 19: return "x19";
case 20: return "x20";
case 21: return "x21";
case 22: return "x22";
case 23: return "x23";
case 24: return "x24";
case 25: return "x25";
case 26: return "x26";
case 27: return "x27";
case 28: return "x28";
case 29: return "x29";
case 30: return "x30";
case 31: return "sp";
case 32: return "pc";
case 33: return "elr_mode";
case 34: return "rasign_state";
case 35: return "tpidrr0_el0";
case 36: return "tpidr_el0";
case 37: return "tpidr_el1";
case 38: return "tpidr_el2";
case 39: return "tpidr_el3";
}
#if 0
ARM64 - dwarf register mapping
030 X0X30 64-bit general registers (Note 1)
31 SP 64-bit stack pointer
32 PC 64-bit program counter (Note 9)
33 ELR_mode The current mode exception link register
34 RA_SIGN_STATE Return address signed state pseudo-register (Note 8)
35 TPIDRRO_ELO EL0 Read-Only Software Thread ID register
36 TPIDR_ELO EL0 Read/Write Software Thread ID register
37 TPIDR_EL1 EL1 Software Thread ID register
38 TPIDR_EL2 EL2 Software Thread ID register
39 TPIDR_EL3 EL3 Software Thread ID register
40-45 Reserved -
46 VG (Beta) 64-bit SVE vector granule pseudo-register (Note 2, Note 3)
47 FFR (Beta) VG × 8-bit SVE first fault register (Note 4)
48-63 P0-P15 (Beta) VG × 8-bit SVE predicate registers (Note 4)
64-95 V0-V31 128-bit FP/Advanced SIMD registers (Note 5, Note 7)
96-127 Z0-Z31 (Beta) VG × 64-bit SVE vector registers (Note 6, Note 7)
#endif
*kind = LOCATION_UNKNOWN;
return "unk"; // please complete the list :___
}
static const char *map_dwarf_reg_to_v850_reg(ut64 reg_num, VariableLocationKind *kind) {
*kind = LOCATION_REGISTER;
switch (reg_num) {
case 0: return "r0"; // wired to ground so it may shift
case 1: return "r1";
case 2: return "r2";
case 3: return "r3";
case 4: return "r4";
case 5: return "r5";
case 6: return "r6";
case 7: return "r7";
case 8: return "r8";
case 9: return "r9";
case 10: return "r10";
case 11: return "r11";
case 12: return "r12";
case 13: return "r13";
case 14: return "r14";
case 15: return "r15";
case 16: return "r16";
case 17: return "r17";
case 18: return "r18";
case 19: return "r19";
case 20: return "r20";
case 21: return "r21";
case 22: return "r22";
case 23: return "r23";
case 24: return "r24";
case 25: return "r25";
case 26: return "r26";
case 27: return "r27";
case 28: return "r28";
case 29: return "r29";
case 30: return "r30";
case 31: return "r31";
case 32: return "pc"; // uhm
default:
R_LOG_WARN ("Unhandled dwarf register reference number %d", (int)reg_num);
*kind = LOCATION_UNKNOWN;
return "unsupported_reg";
}
}
/* x86 https://01.org/sites/default/files/file_attach/intel386-psabi-1.0.pdf */
static const char *map_dwarf_reg_to_x86_reg(ut64 reg_num, VariableLocationKind *kind) {
*kind = LOCATION_REGISTER;
switch (reg_num) {
case 0: return "eax";
case 1: return "edx";
case 2: return "ecx";
case 3: return "ebx";
case 4:
*kind = LOCATION_SP;
return "esp";
case 5:
*kind = LOCATION_BP;
return "ebp";
case 6: return "esi";
case 7: return "edi";
case 8: return "eip"; // return address register, should not be used
case 9: return "eflags";
case 10: return "reserved";
case 11: return "st0";
case 12: return "st1";
case 13: return "st2";
case 14: return "st3";
case 15: return "st4";
case 16: return "st5";
case 17: return "st6";
case 18: return "st7";
case 19: return "reserved";
case 20: return "reserved";
case 21: return "xmm0";
case 22: return "xmm1";
case 23: return "xmm2";
case 24: return "xmm3";
case 25: return "xmm4";
case 26: return "xmm5";
case 27: return "xmm6";
case 28: return "xmm7";
case 29: return "mm0";
case 30: return "mm1";
case 31: return "mm2";
case 32: return "mm3";
case 33: return "mm4";
case 34: return "mm5";
case 35: return "mm6";
case 36: return "mm7";
case 40: return "es";
case 41: return "cs";
case 42: return "ss";
case 43: return "ds";
case 44: return "fs";
case 45: return "gs";
case 48: return "tr";
case 49: return "ldtr";
default:
R_LOG_WARN ("Unhandled dwarf register reference number %d", (int)reg_num);
*kind = LOCATION_UNKNOWN;
return "unsupported_reg";
}
}
/* https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html#DW-REG */
static const char *map_dwarf_reg_to_ppc64_reg(ut64 reg_num, VariableLocationKind *kind) {
*kind = LOCATION_REGISTER;
switch (reg_num) {
case 0: return "r0";
case 1:
*kind = LOCATION_SP;
return "r1";
case 2: return "r2";
case 3: return "r3";
case 4: return "r4";
case 5: return "r5";
case 6: return "r6";
case 7: return "r7";
case 8: return "r8";
case 9: return "r9";
case 10: return "r10";
case 11: return "r11";
case 12: return "r12";
case 13: return "r13";
case 14: return "r14";
case 15: return "r15";
case 16: return "r16";
case 17: return "r17";
case 18: return "r18";
case 19: return "r19";
case 20: return "r20";
case 21: return "r21";
case 22: return "r22";
case 23: return "r23";
case 24: return "r24";
case 25: return "r25";
case 26: return "r26";
case 27: return "r27";
case 28: return "r28";
case 29: return "r29";
case 30: return "r30";
case 31: return "r31";
default:
r_warn_if_reached ();
*kind = LOCATION_UNKNOWN;
return "unsupported_reg";
}
}
/* returns string literal register name!
* TODO add more arches */
static const char *get_dwarf_reg_name(const char *arch, int reg_num, VariableLocationKind *kind, int bits) {
R_LOG_DEBUG ("get_dwarf_reg_name %s %d", arch, bits);
if (arch) {
if (!strcmp (arch, "x86")) {
if (bits == 64) {
return map_dwarf_reg_to_x86_64_reg (reg_num, kind);
}
return map_dwarf_reg_to_x86_reg (reg_num, kind);
}
if (!strcmp (arch, "arm") && bits == 64) {
return map_dwarf_reg_to_arm64_reg (reg_num, kind);
}
if (!strcmp (arch, "v850")) {
return map_dwarf_reg_to_v850_reg (reg_num, kind);
}
if (!strcmp (arch, "ppc") && bits == 64) {
return map_dwarf_reg_to_ppc64_reg (reg_num, kind);
}
}
// this can be very anoying as its printed over 9000 times
R_LOG_WARN ("get_dwarf_reg_name: unsupported arch: '%s' with %d bits", arch, bits);
*kind = LOCATION_UNKNOWN;
return "unsupported_reg";
}
static RBinDwarfLocRange *find_largest_loc_range(RList *loc_list) {
RBinDwarfLocRange *largest = NULL;
ut64 max_range_size = 0;
RListIter *iter;
RBinDwarfLocRange *range;
r_list_foreach (loc_list, iter, range) {
ut64 diff = range->end - range->start;
if (diff > max_range_size) {
max_range_size = diff ;
largest = range;
}
}
return largest;
}
/* TODO move a lot of the parsing here into dwarf.c and do only processing here */
static VariableLocation *parse_dwarf_location(Context *ctx, const RBinDwarfAttrValue *loc, const RBinDwarfAttrValue *frame_base) {
/* reg5 - val is in register 5
fbreg <leb> - offset from frame base
regx <leb> - contents is in register X
addr <addr> - contents is in at addr
bregXX <leb> - contents is at offset from specified register
- we now support 3 options: SP, BP and register based arguments */
/* Loclist offset is usually CONSTANT or REFERENCE at older DWARF versions, new one has LocListPtr for that */
if (loc->kind != DW_AT_KIND_BLOCK && loc->kind != DW_AT_KIND_LOCLISTPTR && loc->kind != DW_AT_KIND_REFERENCE && loc->kind != DW_AT_KIND_CONSTANT) {
return NULL;
}
RBinDwarfBlock block;
if (loc->kind == DW_AT_KIND_LOCLISTPTR || loc->kind == DW_AT_KIND_REFERENCE || loc->kind == DW_AT_KIND_CONSTANT) {
ut64 offset = loc->reference;
RBinDwarfLocList *range_list = ht_up_find (ctx->locations, offset, NULL);
if (!range_list) { /* for some reason offset isn't there, wrong parsing or malformed dwarf */
return NULL;
}
/* use the largest range as a variable */
RBinDwarfLocRange *range = find_largest_loc_range (range_list->list);
if (!range) {
return NULL;
}
/* Very rough and sloppy, refactor this hacked up stuff */
block = *range->expression;
// range->expression... etc
} else {
block = loc->block;
}
VariableLocationKind kind = LOCATION_UNKNOWN;
st64 offset = 0;
ut64 address = 0;
ut64 reg_num = -1;
const char *reg_name = NULL; /* literal */
const char *arch = ctx->anal->config->arch;
const int bits = ctx->anal->config->bits;
const bool be = ctx->anal->config->big_endian;
size_t i;
for (i = 0; i < block.length; i++) {
switch (block.data[i]) {
case DW_OP_fbreg: {
/* TODO sometimes CFA is referenced, but we don't parse that yet
just an offset involving framebase of a function*/
if (i == block.length - 1) {
return NULL;
}
i++;
const ut8 *dump = block.data + i;
if (loc->block.length > block.length) {
return NULL;
}
offset = r_sleb128 (&dump, block.data + loc->block.length);
if (frame_base) {
/* recursive parsing, but frame_base should be only one, but someone
could make malicious resource exhaustion attack, so a depth counter might be cool? */
VariableLocation *location = parse_dwarf_location (ctx, frame_base, NULL);
if (location) {
location->offset += offset;
return location;
}
} else {
/* Might happen if frame_base has a frame_base reference? I don't think it can tho */
}
return NULL;
}
case DW_OP_reg0:
case DW_OP_reg1:
case DW_OP_reg2:
case DW_OP_reg3:
case DW_OP_reg4:
case DW_OP_reg5:
case DW_OP_reg6:
case DW_OP_reg7:
case DW_OP_reg8:
case DW_OP_reg9:
case DW_OP_reg10:
case DW_OP_reg11:
case DW_OP_reg12:
case DW_OP_reg13:
case DW_OP_reg14:
case DW_OP_reg15:
case DW_OP_reg16:
case DW_OP_reg17:
case DW_OP_reg18:
case DW_OP_reg19:
case DW_OP_reg20:
case DW_OP_reg21:
case DW_OP_reg22:
case DW_OP_reg23:
case DW_OP_reg24:
case DW_OP_reg25:
case DW_OP_reg26:
case DW_OP_reg27:
case DW_OP_reg28:
case DW_OP_reg29:
case DW_OP_reg30:
case DW_OP_reg31: {
/* Will mostly be used for SP based arguments */
/* TODO I need to find binaries that uses this so I can test it out*/
reg_num = block.data[i] - DW_OP_reg0; // get the reg number
reg_name = get_dwarf_reg_name (arch, reg_num, &kind, bits);
break;
}
case DW_OP_breg0:
case DW_OP_breg1:
case DW_OP_breg2:
case DW_OP_breg3:
case DW_OP_breg4:
case DW_OP_breg5:
case DW_OP_breg6:
case DW_OP_breg7:
case DW_OP_breg8:
case DW_OP_breg9:
case DW_OP_breg10:
case DW_OP_breg11:
case DW_OP_breg12:
case DW_OP_breg13:
case DW_OP_breg14:
case DW_OP_breg15:
case DW_OP_breg16:
case DW_OP_breg17:
case DW_OP_breg18:
case DW_OP_breg19:
case DW_OP_breg20:
case DW_OP_breg21:
case DW_OP_breg22:
case DW_OP_breg23:
case DW_OP_breg24:
case DW_OP_breg25:
case DW_OP_breg26:
case DW_OP_breg27:
case DW_OP_breg28:
case DW_OP_breg29:
case DW_OP_breg30:
case DW_OP_breg31: {
if (i == block.length - 1) {
return NULL;
}
/* The single operand of the DW_OP_bregn operations provides
signed LEB128 offset from the specified register. */
reg_num = block.data[i] - DW_OP_breg0; // get the reg number
const ut8 *buffer = &block.data[++i];
offset = r_sleb128 (&buffer, &block.data[block.length]);
/* TODO do a proper expression parsing, move by the amount of bytes sleb reads */
i += buffer - &block.data[0];
reg_name = get_dwarf_reg_name (arch, reg_num, &kind, bits);
break;
}
case DW_OP_bregx: {
if (i == block.length - 1) {
return NULL;
}
/* 2 operands, reg_number, offset*/
/* I need to find binaries that uses this so I can test it out*/
const ut8 *buffer = &block.data[++i];
const ut8 *buf_end = &block.data[block.length];
buffer = r_uleb128 (buffer, buf_end - buffer, &reg_num, NULL);
if (buffer == buf_end) {
return NULL;
}
offset = r_sleb128 (&buffer, buf_end);
reg_name = get_dwarf_reg_name (arch, reg_num, &kind, bits);
break;
}
case DW_OP_addr: {
/* The DW_OP_addr operation has a single operand that encodes a machine address and whose
size is the size of an address on the target machine. */
const int addr_size = bits / 8;
const ut8 *dump = &block.data[++i];
/* malformed, not enough bytes to represent address */
if (block.length - i < addr_size) {
return NULL;
}
switch (addr_size) {
case 1:
address = r_read_ble8 (dump);
break;
case 2:
address = r_read_ble16 (dump, be);
break;
case 4:
address = r_read_ble32 (dump, be);
break;
case 8:
address = r_read_ble64 (dump, be);
break;
default:
r_warn_if_reached (); /* weird addr_size */
return NULL;
}
kind = LOCATION_GLOBAL; // address
break;
}
case DW_OP_call_frame_cfa: {
// REMOVE XXX
kind = LOCATION_BP;
offset += 16;
break;
}
default:
break;
}
}
if (kind == LOCATION_UNKNOWN) {
return NULL;
}
VariableLocation *location = R_NEW0 (VariableLocation);
if (location) {
location->reg_name = reg_name;
location->reg_num = reg_num;
location->kind = kind;
location->offset = offset;
location->address = address;
}
return location;
}
static st32 parse_function_args_and_vars(Context *ctx, ut64 idx, RStrBuf *args, RList/*<Variable*>*/ *variables) {
const RBinDwarfDie *die = &ctx->all_dies[idx++];
if (die->has_children) {
int child_depth = 1;
bool get_linkage_name = prefer_linkage_name (ctx->lang);
bool has_linkage_name = false;
int argNumber = 1;
const char *name = NULL;
size_t j;
for (j = idx; child_depth > 0 && j < ctx->count; j++) {
const RBinDwarfDie *child_die = &ctx->all_dies[j];
RStrBuf type;
r_strbuf_init (&type);
if (child_die->tag == DW_TAG_formal_parameter || child_die->tag == DW_TAG_variable) {
Variable *var = R_NEW0 (Variable);
size_t i;
name = NULL;
for (i = 0; i < child_die->count; i++) {
const RBinDwarfAttrValue *val = &child_die->attr_values[i];
switch (val->attr_name) {
case DW_AT_name:
if ((!get_linkage_name || !has_linkage_name) && val->kind == DW_AT_KIND_STRING) {
name = val->string.content;
}
break;
case DW_AT_linkage_name:
case DW_AT_MIPS_linkage_name:
if (val->kind == DW_AT_KIND_STRING) {
name = val->string.content;
has_linkage_name = true;
}
break;
case DW_AT_type:
parse_type (ctx, val->reference, &type, NULL, NULL);
break;
// abstract origin is supposed to have omitted information
case DW_AT_abstract_origin:
parse_abstract_origin (ctx, val->reference, &type, &name);
break;
case DW_AT_location:
var->location = parse_dwarf_location (ctx, val, find_attr (die, DW_AT_frame_base));
break;
default:
break;
}
}
if (child_die->tag == DW_TAG_formal_parameter && child_depth == 1) {
/* arguments sometimes have only type, create generic argX */
if (type.len) {
if (name) {
var->name = strdup (name);
} else {
var->name = r_str_newf ("arg%d", argNumber);
}
r_strbuf_appendf (args, "%s %s,", r_strbuf_get (&type), var->name);
var->type = strdup (r_strbuf_get (&type));
r_list_append (variables, var);
} else {
variable_free (var);
}
argNumber++;
} else { /* DW_TAG_variable */
if (name && type.len) {
var->name = strdup (name);
var->type = strdup (r_strbuf_get (&type));
r_list_append (variables, var);
} else {
variable_free (var);
}
r_strbuf_fini (&type);
}
} else if (child_depth == 1 && child_die->tag == DW_TAG_unspecified_parameters) {
r_strbuf_append (args, "va_args ...,");
}
if (child_die->has_children) {
child_depth++;
}
if (child_die->abbrev_code == 0) { /* sibling list is terminated by null entry */
child_depth--;
}
r_strbuf_fini (&type);
}
if (args->len > 0) {
r_strbuf_slice (args, 0, args->len - 1);
}
}
return 0;
}
static void sdb_save_dwarf_function(Function *dwarf_fcn, RList/*<Variable*>*/ *variables, Sdb *sdb) {
char *sname = r_str_sanitize_sdb_key (dwarf_fcn->name);
sdb_set (sdb, sname, "fcn", 0);
char *addr_key = r_str_newf ("fcn.%s.addr", sname);
char *addr_val = r_str_newf ("0x%" PFMT64x, dwarf_fcn->addr);
sdb_set (sdb, addr_key, addr_val, 0);
free (addr_key);
free (addr_val);
/* so we can have name without sanitization */
char *name_key = r_str_newf ("fcn.%s.name", sname);
char *name_val = r_str_newf ("%s", dwarf_fcn->name);
sdb_set (sdb, name_key, name_val, 0);
free (name_key);
free (name_val);
char *signature_key = r_str_newf ("fcn.%s.sig", sname);
sdb_set (sdb, signature_key, dwarf_fcn->signature, 0);
free (signature_key);
RStrBuf vars;
r_strbuf_init (&vars);
RListIter *iter;
Variable *var;
r_list_foreach (variables, iter, var) {
if (!var->location) {
/* NULL location probably means optimized out, maybe put a comment there */
continue;
}
char *key = NULL;
char *val = NULL;
switch (var->location->kind) {
case LOCATION_BP: {
/* value = "type, storage, additional info based on storage (offset)" */
r_strbuf_appendf (&vars, "%s,", var->name);
key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
val = r_str_newf ("%s,%" PFMT64d ",%s", "b", var->location->offset, var->type);
sdb_set (sdb, key, val, 0);
break;
}
case LOCATION_SP: {
/* value = "type, storage, additional info based on storage (offset)" */
r_strbuf_appendf (&vars, "%s,", var->name);
key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
val = r_str_newf ("%s,%" PFMT64d ",%s", "s", var->location->offset, var->type);
sdb_set (sdb, key, val, 0);
break;
}
case LOCATION_GLOBAL: {
/* value = "type, storage, additional info based on storage (address)" */
r_strbuf_appendf (&vars, "%s,", var->name);
key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
val = r_str_newf ("%s,%" PFMT64u ",%s", "g", var->location->address, var->type);
sdb_set (sdb, key, val, 0);
break;
}
case LOCATION_REGISTER: {
/* value = "type, storage, additional info based on storage (register name)" */
r_strbuf_appendf (&vars, "%s,", var->name);
key = r_str_newf ("fcn.%s.var.%s", sname, var->name);
val = r_str_newf ("%s,%s,%s", "r", var->location->reg_name, var->type);
sdb_set (sdb, key, val, 0);
break;
}
default:
/* else location is unknown (optimized out), skip the var */
R_LOG_DEBUG ("dwarf: unknown variable type");
break;
}
free (key);
free (val);
}
if (vars.len > 0) { /* remove the extra , */
r_strbuf_slice (&vars, 0, vars.len - 1); /* leaks? */
}
char *vars_key = r_str_newf ("fcn.%s.vars", sname);
char *vars_val = r_str_newf ("%s", r_strbuf_get (&vars));
sdb_set (sdb, vars_key, vars_val, 0);
free (vars_key);
free (vars_val);
r_strbuf_fini (&vars);
free (sname);
}
/**
* @brief Parse function,it's arguments, variables and
* save the information into the Sdb
*
* @param ctx
* @param idx Current entry index
*/
static void parse_function(Context *ctx, ut64 idx) {
const RBinDwarfDie *die = &ctx->all_dies[idx];
Function fcn = {0};
bool has_linkage_name = false;
bool get_linkage_name = prefer_linkage_name (ctx->lang);
RStrBuf ret_type;
r_strbuf_init (&ret_type);
if (find_attr_idx (die, DW_AT_declaration) != -1) {
return; /* just declaration skip */
}
size_t i;
/* For rust binaries prefer regular name not linkage TODO */
for (i = 0; i < die->count; i++) {
RBinDwarfAttrValue *val = &die->attr_values[i];
switch (die->attr_values[i].attr_name) {
case DW_AT_name:
if (!get_linkage_name || !has_linkage_name) {
fcn.name = val->string.content;
}
break;
case DW_AT_linkage_name:
case DW_AT_MIPS_linkage_name:
fcn.name = val->string.content;
has_linkage_name = true;
break;
case DW_AT_low_pc:
case DW_AT_entry_pc:
fcn.addr = val->address;
break;
case DW_AT_specification: /* reference to declaration DIE with more info */
{
RBinDwarfDie *spec_die = ht_up_find (ctx->die_map, val->reference, NULL);
if (spec_die) {
fcn.name = get_specification_die_name (spec_die); /* I assume that if specification has a name, this DIE hasn't */
get_spec_die_type (ctx, spec_die, &ret_type);
}
break;
}
case DW_AT_type:
parse_type (ctx, val->reference, &ret_type, NULL, NULL);
break;
case DW_AT_virtuality:
fcn.is_method = true; /* method specific attr */
fcn.is_virtual = true;
break;
case DW_AT_object_pointer:
fcn.is_method = true;
break;
case DW_AT_vtable_elem_location:
fcn.is_method = true;
fcn.vtable_addr = 0; /* TODO we might use this information */
break;
case DW_AT_accessibility:
fcn.is_method = true;
fcn.access = (ut8)val->uconstant;
break;
case DW_AT_external:
fcn.is_external = true;
break;
case DW_AT_trampoline:
fcn.is_trampoline = true;
break;
case DW_AT_ranges:
case DW_AT_high_pc:
default:
break;
}
}
if (!fcn.name || !fcn.addr) { /* we need a name, faddr */
goto cleanup;
}
RStrBuf args;
r_strbuf_init (&args);
/* TODO do the same for arguments in future so we can use their location */
RList/*<Variable*>*/ *variables = r_list_new ();
parse_function_args_and_vars (ctx, idx, &args, variables);
if (ret_type.len == 0) { /* DW_AT_type is omitted in case of `void` ret type */
r_strbuf_append (&ret_type, "void");
}
r_warn_if_fail (ctx->lang);
char *new_name = ctx->anal->binb.demangle
? ctx->anal->binb.demangle (NULL, ctx->lang, fcn.name, fcn.addr, false): NULL;
fcn.name = new_name ? new_name : strdup (fcn.name);
fcn.signature = r_str_newf ("%s %s(%s);", r_strbuf_get (&ret_type), fcn.name, r_strbuf_get (&args));
sdb_save_dwarf_function (&fcn, variables, ctx->sdb);
free ((char *)fcn.signature);
free ((char *)fcn.name);
RListIter *iter;
Variable *var;
r_list_foreach (variables, iter, var) {
variable_free (var);
}
r_list_free (variables);
r_strbuf_fini (&args);
cleanup:
r_strbuf_fini (&ret_type);
}
/**
* @brief Get's language from comp unit for demangling
*
* @param die
* @return char* string literal language represantation for demangling BinDemangle
*/
static const char *parse_comp_unit_lang(const RBinDwarfDie *die) {
r_return_val_if_fail (die, NULL);
int idx = find_attr_idx (die, DW_AT_language);
const char *lang = "cxx"; // default fallback
if (idx == -1) {
/* What to do now, it should have one?, just assume C++ */
return lang;
}
const RBinDwarfAttrValue *val = &die->attr_values[idx];
r_warn_if_fail (val->kind == DW_AT_KIND_CONSTANT);
switch (val->uconstant)
{
case DW_LANG_Java:
return "java";
case DW_LANG_ObjC:
/* subideal, TODO research if dwarf gives me enough info to properly separate C++ and ObjC mangling */
case DW_LANG_ObjC_plus_plus:
return "objc";
case DW_LANG_D:
return "dlang";
case DW_LANG_Rust:
return "rust";
case DW_LANG_C_plus_plus:
case DW_LANG_C_plus_plus_14:
/* no demangling available */
case DW_LANG_Ada83:
case DW_LANG_Cobol74:
case DW_LANG_Cobol85:
case DW_LANG_Fortran77:
case DW_LANG_Fortran90:
case DW_LANG_Pascal83:
case DW_LANG_Modula2:
case DW_LANG_Ada95:
case DW_LANG_Fortran95:
case DW_LANG_PLI:
case DW_LANG_Python:
case DW_LANG_Swift:
case DW_LANG_Julia:
case DW_LANG_Dylan:
case DW_LANG_Fortran03:
case DW_LANG_Fortran08:
case DW_LANG_UPC:
case DW_LANG_C:
case DW_LANG_C89:
case DW_LANG_C99:
case DW_LANG_C11:
default:
return lang;
}
return lang;
}
/**
* @brief Delegates DIE to it's proper parsing method
*
* @param ctx
* @param idx index of the current entry
*/
static void parse_type_entry(Context *ctx, ut64 idx) {
r_return_if_fail (ctx);
const RBinDwarfDie *die = &ctx->all_dies[idx];
switch (die->tag) {
case DW_TAG_structure_type:
case DW_TAG_union_type:
case DW_TAG_class_type:
parse_structure_type (ctx, idx);
break;
case DW_TAG_enumeration_type:
parse_enum_type (ctx, idx);
break;
case DW_TAG_typedef:
parse_typedef (ctx, idx);
break;
case DW_TAG_base_type:
parse_atomic_type (ctx, idx);
break;
case DW_TAG_subprogram:
parse_function (ctx, idx);
break;
case DW_TAG_compile_unit:
/* used for name demangling */
ctx->lang = parse_comp_unit_lang (die);
default:
break;
}
}
/**
* @brief Parses type and function information out of DWARF entries
* and stores them to the sdb for further use
*
* @param anal
* @param ctx
*/
R_API void r_anal_dwarf_process_info(const RAnal *anal, RAnalDwarfContext *ctx) {
r_return_if_fail (ctx && anal);
Sdb *dwarf_sdb = sdb_ns (anal->sdb, "dwarf", 1);
size_t i, j;
const RBinDwarfDebugInfo *info = ctx->info;
for (i = 0; i < info->count; i++) {
RBinDwarfCompUnit *unit = &info->comp_units[i];
Context dw_context = { // context per unit?
.anal = anal,
.all_dies = unit->dies,
.count = unit->count,
.die_map = info->lookup_table,
.sdb = dwarf_sdb,
.locations = ctx->loc,
.lang = NULL
};
for (j = 0; j < unit->count; j++) {
parse_type_entry (&dw_context, j);
}
}
}
bool filter_sdb_function_names(void *user, const char *k, const char *v) {
(void) user;
(void) k;
return !strcmp (v, "fcn");
}
/**
* @brief Use parsed DWARF function info from Sdb in the anal functions
* XXX right now we only save parsed name and variables, we can't use signature now
* XXX refactor to be more readable
* @param anal
* @param dwarf_sdb
*/
R_API void r_anal_dwarf_integrate_functions(RAnal *anal, RFlag *flags, Sdb *dwarf_sdb) {
r_return_if_fail (anal && dwarf_sdb);
/* get all entries with value == func */
SdbList *sdb_list = sdb_foreach_list_filter (dwarf_sdb, filter_sdb_function_names, false);
SdbListIter *it;
SdbKv *kv;
/* iterate all function entries */
ls_foreach (sdb_list, it, kv) {
char *func_sname = kv->base.key;
char *addr_key = r_str_newf ("fcn.%s.addr", func_sname);
ut64 faddr = sdb_num_get (dwarf_sdb, addr_key, 0);
free (addr_key);
/* if the function is analyzed so we can edit */
RAnalFunction *fcn = r_anal_get_function_at (anal, faddr);
if (fcn) {
/* prepend dwarf debug info stuff with dbg. */
char *real_name_key = r_str_newf ("fcn.%s.name", func_sname);
char *real_name = sdb_get (dwarf_sdb, real_name_key, 0);
free (real_name_key);
char *dwf_name = r_str_newf ("dbg.%s", real_name);
free (real_name);
r_anal_function_rename (fcn, dwf_name);
free (dwf_name);
char *tmp = r_str_newf ("fcn.%s.sig", func_sname);
char *fcnstr = sdb_get (dwarf_sdb, tmp, 0);
free (tmp);
/* Apply signature as a comment at a function address */
r_meta_set_string (anal, R_META_TYPE_COMMENT, faddr, fcnstr);
free (fcnstr);
}
char *var_names_key = r_str_newf ("fcn.%s.vars", func_sname);
char *vars = sdb_get (dwarf_sdb, var_names_key, NULL);
char *var_name;
sdb_aforeach (var_name, vars) {
char *var_key = r_str_newf ("fcn.%s.var.%s", func_sname, var_name);
char *var_data = sdb_get (dwarf_sdb, var_key, NULL);
if (!var_data) {
goto loop_end;
}
char *extra = NULL;
char *kind = sdb_anext (var_data, &extra);
char *type = NULL;
extra = sdb_anext (extra, &type);
st64 offset = 0;
if (*kind != 'r') {
offset = strtol (extra, NULL, 10);
}
if (*kind == 'g') { /* global, fixed addr TODO add size to variables? */
char *global_name = r_str_newf ("global_%s", var_name);
r_flag_unset_off (flags, offset);
r_flag_set_next (flags, global_name, offset, 4);
free (global_name);
} else if (*kind == 's' && fcn) {
r_anal_function_set_var (fcn, offset - fcn->maxstack, *kind, type, 4, false, var_name);
} else if (*kind == 'r' && fcn) {
RRegItem *i = r_reg_get (anal->reg, extra, -1);
if (!i) {
goto loop_end;
}
r_anal_function_set_var (fcn, i->index, *kind, type, 4, false, var_name);
} else if (fcn) { /* kind == 'b' */
r_anal_function_set_var (fcn, offset - fcn->bp_off, *kind, type, 4, false, var_name);
}
free (var_key);
free (var_data);
loop_end:
sdb_aforeach_next (var_name);
}
free (var_names_key);
free (vars);
}
ls_free (sdb_list);
}