Improvements arm/thumb analysis ##anal

* anal: arm: emulate correctly BX PC in arm32/thumb ##anal
* anal: arm: handle BX PC in analysis to propagate bits

PC is well known without ESIL, that way it is possible to propagate the
bits to correctly create the hints

* anal: arm: modify r_anal_build_range_on_hints ##anal

This function now accepts a second parameter that specify when to
dispose hints when overlapping.

The rationale is that if this is performed in a continuos basis, old
ranges are lost when in a latter stage in the analysis is inserted a new
hint.

For example, if we have something like

0x80000 -> 16 bits
0x82000 -> 16 bits

With the previous logic this would have been become

0x8000 -> 16 bits

However, during analysis a new hint like this might happen

0x8100 -> 32 bits

Therefore, 0x8200 which was 16 bits is lost.  With the second parameter
update, we postpone this until the user print the disassembly - we wait
until the end to clean up hints to speed up the looks up. However,
during analysis we mantain all the hints.

* anal: arm: handle better anal hints to increase performance ##anal

new API r_anal_hint_get_bits_at

This saves time for example on r_anal_build_range_on_hints without the
need to use heap for RAnalHints speeding up the analysis

Added cb when calling r_anal_hint_set_bits and rbtree for anal ranges
which improves lookups

fix __anal_range_tree_find_bits_at

fix conflicts and coding style

* arm: set anal hint when BL instruction
This commit is contained in:
Álvaro Felipe Melchor 2018-11-22 21:31:54 +01:00 committed by radare
parent 9279231918
commit 23d01f869c
9 changed files with 231 additions and 58 deletions

View File

@ -55,6 +55,93 @@ static void zign_rename_for(void *user, int idx, const char *oname, const char *
r_sign_space_rename_for (anal, idx, oname, nname);
}
//not used
#if 0
static void __anal_hint_tree_calc_max_addr(RBNode *node) {
int i;
RAnalRange *range = container_of (node, RAnalRange, rb);
range->rb_max_addr = range->from;
for (i = 0; i < 2; i++) {
if (node->child[i]) {
RAnalRange *range1 = container_of (node->child[i],
RAnalRange, rb);
if (range1->rb_max_addr > range->rb_max_addr) {
range->rb_max_addr = range1->rb_max_addr;
}
}
}
}
#endif
static int __anal_hint_range_tree_cmp(const void *a_, const RBNode *b_) {
const RAnalRange *a = a_;
const RAnalRange *b = container_of (b_, const RAnalRange, rb);
if (a && b) {
ut64 from0 = a->from, from1 = b->from;
return from0 < from1 ? -1 : 1;
}
return 0;
}
static void __anal_hint_range_tree_free(RBNode *node) {
free (container_of (node, RAnalRange, rb));
}
static RAnalRange *__anal_range_hint_tree_find_at(RBNode *node, ut64 addr) {
while (node) {
RAnalRange *range = container_of (node, RAnalRange, rb);
if (range->from == addr) {
return range;
}
node = node->child[range->from < addr];
}
return NULL;
}
//not used
#if 0
static bool __anal_range_hint_tree_delete(RBNode **root, RAnalRange *data) {
if (data) {
return r_rbtree_aug_delete (root, data, __anal_hint_range_tree_cmp,
__anal_hint_range_tree_free,
__anal_hint_tree_calc_max_addr)? 1: 0;
}
return false;
}
#endif
static void __anal_range_hint_tree_insert(RBNode **root, RAnalRange *range) {
r_rbtree_aug_insert (root, range, &(range->rb),
__anal_hint_range_tree_cmp,
NULL);
}
static void __anal_add_range_on_hints(RAnal *a, ut64 addr, int bits) {
r_return_if_fail (a);
//do we have already a node with that addr? if yes then update its bits
RAnalRange *range = __anal_range_hint_tree_find_at (a->rb_hints_ranges, addr);
if (range) {
range->bits = bits;
return;
}
//otherwise insert new range into the tree
range = R_NEW0 (RAnalRange);
if (range) {
range->bits = bits;
range->from = addr;
__anal_range_hint_tree_insert (&a->rb_hints_ranges, range);
}
}
static void __anal_hint_on_bits(RAnal *a, ut64 addr, int bits, bool set) {
if (set) {
__anal_add_range_on_hints (a, addr, bits);
}
}
R_API RAnal *r_anal_new() {
int i;
RAnal *anal = R_NEW0 (RAnal);
@ -78,6 +165,7 @@ R_API RAnal *r_anal_new() {
anal->sdb_fcns = sdb_ns (anal->sdb, "fcns", 1);
anal->sdb_meta = sdb_ns (anal->sdb, "meta", 1);
anal->sdb_hints = sdb_ns (anal->sdb, "hints", 1);
anal->hint_cbs.on_bits = __anal_hint_on_bits;
anal->sdb_types = sdb_ns (anal->sdb, "types", 1);
anal->sdb_fmts = sdb_ns (anal->sdb, "spec", 1);
anal->sdb_cc = sdb_ns (anal->sdb, "cc", 1);
@ -94,7 +182,7 @@ R_API RAnal *r_anal_new() {
anal->reg = r_reg_new ();
anal->last_disasm_reg = NULL;
anal->stackptr = 0;
anal->bits_ranges = r_list_newf (free);
anal->rb_hints_ranges = NULL;
anal->lineswidth = 0;
anal->fcns = r_anal_fcn_list_new ();
anal->fcn_tree = NULL;
@ -109,6 +197,7 @@ R_API RAnal *r_anal_new() {
return anal;
}
R_API void r_anal_plugin_free (RAnalPlugin *p) {
if (p && p->fini) {
p->fini (NULL);
@ -133,7 +222,7 @@ R_API RAnal *r_anal_free(RAnal *a) {
r_syscall_free (a->syscall);
r_reg_free (a->reg);
r_anal_op_free (a->queued);
r_list_free (a->bits_ranges);
r_rbtree_free (a->rb_hints_ranges, __anal_hint_range_tree_free);
ht_up_free (a->dict_refs);
ht_up_free (a->dict_xrefs);
a->sdb = NULL;
@ -618,45 +707,70 @@ R_API bool r_anal_noreturn_at(RAnal *anal, ut64 addr) {
return false;
}
// based on anal hint we construct a list of RAnalRange to handle
// better arm/thumb though maybe handy in other contexts
R_API void r_anal_build_range_on_hints(RAnal *a) {
if (a->bits_hints_changed) {
R_API int r_anal_range_tree_find_bits_at(RBNode *root, ut64 addr) {
RAnalRange *tmp = NULL;
RBNode *ny;
RAnalRange *path[R_RBTREE_MAX_HEIGHT + 1];
int i, bits = 0, len = 0;
ut64 min_diff = UT64_MAX;
if (!root) {
return 0;
}
path[len++] = container_of (root, RAnalRange, rb);
ny = root->child[path[0]->from < addr];
if (!ny) {
return path[0]->bits;
}
tmp = container_of (ny, RAnalRange, rb);
path[len++] = tmp;
//build path of RAnalRange
while (len < R_RBTREE_MAX_HEIGHT) {
ny = ny->child[tmp->from < addr];
if (!ny) {
break;
}
tmp = container_of (ny, RAnalRange, rb);
path[len++] = tmp;
}
i = len - 1;
//find the nearest RAnalRange
while (i >= 0) {
ut64 diff = addr - path[i]->from;
if ((st64)diff < 0) {
i--;
continue;
}
if (diff < min_diff) {
bits = path[i]->bits;
min_diff = diff;
}
i--;
}
return bits;
}
R_API void r_anal_merge_hint_ranges(RAnal *a) {
if (a->merge_hints) {
SdbListIter *iter;
RListIter *it;
SdbKv *kv;
RAnalRange *range;
int range_bits = 0;
// construct again the range from hint to handle properly arm/thumb
r_list_free (a->bits_ranges);
a->bits_ranges = r_list_newf ((RListFree)free);
SdbList *sdb_range = sdb_foreach_list (a->sdb_hints, true);
//just grab when hint->bit changes with the previous one
int range_bits = 0;
r_rbtree_free (a->rb_hints_ranges, __anal_hint_range_tree_free);
a->rb_hints_ranges = NULL;
ls_foreach (sdb_range, iter, kv) {
RAnalHint *hint = r_anal_hint_from_string (a, sdb_atoi (sdbkv_key (kv) + 5), sdbkv_value (kv));
if (hint->bits && range_bits != hint->bits) {
RAnalRange *range = R_NEW0 (RAnalRange);
if (range) {
range->bits = hint->bits;
range->from = hint->addr;
range->to = UT64_MAX;
r_list_append (a->bits_ranges, range);
}
ut64 addr = sdb_atoi (sdbkv_key (kv) + 5);
int bits = r_anal_hint_get_bits_at (a, addr, sdbkv_value (kv));
if (bits && range_bits == bits) {
r_anal_hint_unset_bits (a, addr);
} else {
//remove this hint is not needed
r_anal_hint_unset_bits (a, hint->addr);
RAnalRange *range = R_NEW0 (RAnalRange);
range->bits = bits;
range->from = addr;
__anal_range_hint_tree_insert (&a->rb_hints_ranges, range);
}
range_bits = hint->bits;
r_anal_hint_free (hint);
range_bits = bits;
}
//close ranges addr
r_list_foreach (a->bits_ranges, it, range) {
if (it->n && it->n->data) {
range->to = ((RAnalRange *)(it->n->data))->from;
}
}
ls_free (sdb_range);
a->bits_hints_changed = false;
a->merge_hints = false;
}
}

View File

@ -970,7 +970,8 @@ repeat:
gotoBeach (R_ANAL_RET_END);
}
if (op.hint.new_bits) {
r_anal_hint_set_bits (anal, op.jump, op.hint.new_bits);
r_anal_hint_set_bits (anal, op.jump,
op.hint.new_bits);
}
if (idx > 0 && !overlapped) {
bbg = bbget (fcn, addr + idx, anal->opt.jmpmid && x86);

View File

@ -16,7 +16,6 @@ R_API void r_anal_hint_del(RAnal *a, ut64 addr, int size) {
} else {
setf (key, "hint.0x%08"PFMT64x, addr);
sdb_unset (a->sdb_hints, key, 0);
a->bits_hints_changed = true;
}
}
@ -64,7 +63,6 @@ R_API void r_anal_hint_set_jump(RAnal *a, ut64 addr, ut64 ptr) {
}
R_API void r_anal_hint_set_newbits(RAnal *a, ut64 addr, int bits) {
a->bits_hints_changed = true;
setHint (a, "Bits:", addr, NULL, bits);
}
@ -111,8 +109,11 @@ R_API void r_anal_hint_set_esil(RAnal *a, ut64 addr, const char *esil) {
}
R_API void r_anal_hint_set_bits(RAnal *a, ut64 addr, int bits) {
a->bits_hints_changed = true;
setHint (a, "bits:", addr, NULL, bits);
if (a && a->hint_cbs.on_bits) {
a->hint_cbs.on_bits (a, addr, bits, true);
}
a->merge_hints = true;
}
R_API void r_anal_hint_set_size(RAnal *a, ut64 addr, int size) {
@ -124,8 +125,11 @@ R_API void r_anal_hint_unset_size(RAnal *a, ut64 addr) {
}
R_API void r_anal_hint_unset_bits(RAnal *a, ut64 addr) {
a->bits_hints_changed = true;
unsetHint(a, "bits:", addr);
if (a && a->hint_cbs.on_bits) {
a->hint_cbs.on_bits (a, addr, 0, false);
}
a->merge_hints = true;
}
R_API void r_anal_hint_unset_esil(RAnal *a, ut64 addr) {
@ -179,6 +183,36 @@ R_API void r_anal_hint_free(RAnalHint *h) {
}
}
R_API int r_anal_hint_get_bits_at(RAnal *a, ut64 addr, const char *str) {
char *r, *nxt, *nxt2;
char *s = strdup (str);
int token = 0, bits = 0;
if (!s) {
return 0;
}
token = *s;
for (r = s; ; r = nxt2) {
r = sdb_anext (r, &nxt);
if (!nxt) {
break;
}
sdb_anext (nxt, &nxt2); // tokenize value
if (token) {
switch (token) {
case 'b':
bits = sdb_atoi (nxt);
break;
}
}
if (!nxt || !nxt2) {
break;
}
token = *nxt2;
}
free (s);
return bits;
}
R_API RAnalHint *r_anal_hint_from_string(RAnal *a, ut64 addr, const char *str) {
char *r, *nxt, *nxt2;
int token = 0;

View File

@ -1477,8 +1477,15 @@ static int analop_esil(RAnal *a, RAnalOp *op, ut64 addr, const ut8 *buf, int len
break;
case ARM_INS_BX:
case ARM_INS_BXJ:
r_strbuf_setf (&op->esil, "%s,pc,=", ARG(0));
{
const char *op1 = ARG (0);
if (!strcmp (op1, "pc")) {
r_strbuf_setf (&op->esil, "%d,$$,+,pc,=", pcdelta);
} else {
r_strbuf_setf (&op->esil, "%s,pc,=", ARG (0));
}
break;
}
case ARM_INS_UDF:
r_strbuf_setf (&op->esil, "%s,TRAP", ARG(0));
break;
@ -2507,6 +2514,7 @@ static ut64 lookahead(csh handle, const ut64 addr, const ut8 *buf, int len, int
static void anop32(RAnal *a, csh handle, RAnalOp *op, cs_insn *insn, bool thumb, const ut8 *buf, int len) {
const ut64 addr = op->addr;
const int pcdelta = thumb? 4 : 8;
int i;
op->cond = cond_cs2r2 (insn->detail->arm.cc);
if (op->cond == R_ANAL_COND_NV) {
@ -2763,6 +2771,7 @@ jmp $$ + 4 + ( [delta] * 2 )
op->type = R_ANAL_OP_TYPE_CALL;
op->jump = IMM(0) & UT32_MAX;
op->fail = addr + op->size;
op->hint.new_bits = a->bits;
}
break;
case ARM_INS_CBZ:
@ -2789,6 +2798,8 @@ jmp $$ + 4 + ( [delta] * 2 )
op->fail = addr+op->size;
}
op->jump = IMM(0) & UT32_MAX;
// propagate bits to create correctly hints ranges
op->hint.new_bits = a->bits;
break;
case ARM_INS_BX:
case ARM_INS_BXJ:
@ -2802,6 +2813,12 @@ jmp $$ + 4 + ( [delta] * 2 )
case ARM_REG_IP:
op->type = R_ANAL_OP_TYPE_UJMP;
break;
case ARM_REG_PC:
// bx pc is well known without ESIL
op->type = R_ANAL_OP_TYPE_UJMP;
op->jump = op->addr + pcdelta;
op->hint.new_bits = (a->bits == 32)? 16 : 32;
break;
default:
op->type = R_ANAL_OP_TYPE_UJMP;
op->eob = true;

View File

@ -1575,7 +1575,6 @@ R_API int r_core_anal_fcn(RCore *core, ut64 at, ut64 from, int reftype, int dept
//update bits based on the core->offset otherwise we could have the
//last value set and blow everything up
r_anal_build_range_on_hints (core->anal);
r_core_seek_archbits (core, at);
if (core->io->va) {

View File

@ -262,14 +262,12 @@ beach:
return ret;
}
static void choose_bits_anal_hints(RCore *core, ut64 addr, int *bits) {
RAnalRange *range;
RListIter *iter;
r_list_foreach (core->anal->bits_ranges, iter, range) {
if (addr >= range->from && addr < range->to) {
*bits = range->bits;
return;
static void __choose_bits_anal_hints(RCore *core, ut64 addr, int *bits) {
if (core->anal) {
int ret = r_anal_range_tree_find_bits_at (core->anal->rb_hints_ranges,
addr);
if (ret) {
*bits = ret;
}
}
}
@ -279,7 +277,7 @@ R_API void r_core_seek_archbits(RCore *core, ut64 addr) {
const char *arch = r_io_section_get_archbits (core->io, addr, &bits);
if (!bits && !core->fixedbits) {
//if we found bits related with anal hints pick it up
choose_bits_anal_hints (core, addr, &bits);
__choose_bits_anal_hints (core, addr, &bits);
}
if (bits && !core->fixedbits) {
r_config_set_i (core->config, "asm.bits", bits);

View File

@ -246,7 +246,6 @@ static char *getNameDelta(RCore *core, ut64 addr) {
}
static void archbits(RCore *core, ut64 addr) {
r_anal_build_range_on_hints (core->anal);
r_core_seek_archbits (core, addr);
}

View File

@ -4765,9 +4765,8 @@ toro:
if (!ds->l) {
ds->l = core->blocksize;
}
r_anal_merge_hint_ranges (core->anal);
r_cons_break_push (NULL, NULL);
r_anal_build_range_on_hints (core->anal);
for (i = idx = ret = 0; addrbytes * idx < len && ds->lines < ds->l; idx += inc, i++, ds->index += inc, ds->lines++) {
ds->at = ds->addr + idx;
ds->vat = r_core_pava (core, ds->at);
@ -5224,10 +5223,9 @@ R_API int r_core_print_disasm_instructions(RCore *core, int nb_bytes, int nb_opc
if (!ds->l) {
ds->l = ds->len;
}
r_anal_merge_hint_ranges (core->anal);
r_cons_break_push (NULL, NULL);
//build ranges to map addr with bits
r_anal_build_range_on_hints (core->anal);
#define isNotTheEnd (nb_opcodes ? j < nb_opcodes: addrbytes * i < nb_bytes)
for (i = j = 0; isNotTheEnd; i += ret, j++) {
ds->at = core->offset + i;

View File

@ -64,6 +64,8 @@ typedef struct r_anal_range_t {
ut64 from;
ut64 to;
int bits;
ut64 rb_max_addr;
RBNode rb;
} RAnalRange;
#define R_ANAL_UNMASK_TYPE(x) (x&R_ANAL_VAR_TYPE_SIZE_MASK)
@ -616,6 +618,11 @@ typedef enum {
R_ANAL_CPP_ABI_MSVC
} RAnalCPPABI;
typedef struct r_anal_hint_cb_t {
//add more cbs as needed
void (*on_bits) (struct r_anal_t *a, ut64 addr, int bits, bool set);
} RHintCb;
typedef struct r_anal_t {
char *cpu;
char *os;
@ -674,7 +681,7 @@ typedef struct r_anal_t {
Sdb *sdb_vars; // globals?
#endif
Sdb *sdb_hints; // OK
bool bits_hints_changed;
RHintCb hint_cbs;
Sdb *sdb_fcnsign; // OK
Sdb *sdb_cc; // calling conventions
//RList *hints; // XXX use better data structure here (slist?)
@ -683,7 +690,8 @@ typedef struct r_anal_t {
RList *reflines;
RList *reflines2;
//RList *noreturn;
RList /*RAnalRange*/ *bits_ranges;
RBNode *rb_hints_ranges; // <RAnalRange>
bool merge_hints;
RListComparator columnSort;
int stackptr;
bool fillval;
@ -1604,7 +1612,8 @@ R_API void r_meta_print(RAnal *a, RAnalMetaItem *d, int rad, bool show_full);
/* hints */
R_API void r_anal_build_range_on_hints (RAnal *a);
R_API void r_anal_build_range_on_hints (RAnal *a, ut64 addr, int bits);
R_API void r_anal_merge_hint_ranges(RAnal *a);
//R_API void r_anal_hint_list (RAnal *anal, int mode);
R_API RAnalHint *r_anal_hint_from_string(RAnal *a, ut64 addr, const char *str);
R_API void r_anal_hint_del (RAnal *anal, ut64 addr, int size);
@ -1639,6 +1648,10 @@ R_API void r_anal_hint_unset_ret(RAnal *a, ut64 addr);
R_API void r_anal_hint_unset_offset(RAnal *a, ut64 addr);
R_API void r_anal_hint_unset_jump(RAnal *a, ut64 addr);
R_API void r_anal_hint_unset_fail(RAnal *a, ut64 addr);
R_API int r_anal_hint_get_bits_at(RAnal *a, ut64 addr, const char *str);
R_API int r_anal_range_tree_find_bits_at(RBNode *root, ut64 addr);
R_API int r_anal_esil_eval(RAnal *anal, const char *str);
/* switch.c APIs */