/* radare - LGPL - Copyright 2009-2015 - nibble, pancake */ #include #include #include #define IFDBG if (0) static RCoreAsmHit * find_addr(RList *hits, ut64 addr); static int prune_hits_in_hit_range(RList *hits, RCoreAsmHit *hit); static int is_hit_inrange(RCoreAsmHit *hit, ut64 start_range, ut64 end_range); static int is_addr_in_range(ut64 start, ut64 end, ut64 start_range, ut64 end_range); static void add_hit_to_sorted_hits(RList* hits, ut64 addr, int len, ut8 is_valid); static int prune_hits_in_addr_range(RList *hits, ut64 addr, ut64 len, ut8 is_valid); static int rcoreasm_address_comparator(RCoreAsmHit *a, RCoreAsmHit *b){ if (a->addr == b->addr) return 0; else if (a->addr < b->addr) return -1; // a->addr > b->addr return 1; } R_API RCoreAsmHit *r_core_asm_hit_new() { RCoreAsmHit *hit = R_NEW (RCoreAsmHit); if (!hit) return NULL; hit->code = NULL; hit->len = 0; hit->addr = -1; hit->valid = R_FALSE; return hit; } R_API RList *r_core_asm_hit_list_new() { RList *list = r_list_new (); list->free = &r_core_asm_hit_free; return list; } R_API void r_core_asm_hit_free(void *_hit) { RCoreAsmHit *hit = _hit; if (hit) { if (hit->code) free (hit->code); free (hit); } } R_API char* r_core_asm_search(RCore *core, const char *input, ut64 from, ut64 to) { RAsmCode *acode; char *ret; if (!(acode = r_asm_massemble (core->assembler, input))) return NULL; ret = strdup (acode->buf_hex); r_asm_code_free (acode); return ret; } #define OPSZ 8 // TODO: add support for byte-per-byte opcode search R_API RList *r_core_asm_strsearch(RCore *core, const char *input, ut64 from, ut64 to, int maxhits, int regexp) { RCoreAsmHit *hit; RAsmOp op; RList *hits; ut64 at, toff = core->offset; ut8 *buf; int align = core->search->align; RRegex* rx = NULL; char *tok, *tokens[1024], *code = NULL, *ptr; int idx, tidx = 0, ret, len; int tokcount, matchcount, count = 0; int matches = 0; if (!*input) return NULL; if (core->blocksize<=OPSZ) { eprintf ("error: block size too small\n"); return NULL; } if (!(buf = (ut8 *)calloc (core->blocksize, 1))) return NULL; if (!(ptr = strdup (input))) { free (buf); return NULL; } if (!(hits = r_core_asm_hit_list_new ())) { free (buf); free (ptr); return NULL; } tokens[0] = NULL; for (tokcount=0; tokcount<(sizeof (tokens) / sizeof (char*)) - 1; tokcount++) { tok = strtok (tokcount? NULL: ptr, ";"); if (tok == NULL) break; tokens[tokcount] = r_str_trim_head_tail (tok); } tokens[tokcount] = NULL; r_cons_break (NULL, NULL); for (at = from, matchcount = 0; at < to; at += core->blocksize-OPSZ) { matches = 0; if (r_cons_singleton ()->breaked) break; ret = r_io_read_at (core->io, at, buf, core->blocksize); if (ret != core->blocksize) break; idx = 0, matchcount = 0; while (idxblocksize) { ut64 addr = at + idx; r_asm_set_pc (core->assembler, addr); op.buf_asm[0] = 0; op.buf_hex[0] = 0; if (!(len = r_asm_disassemble (core->assembler, &op, buf+idx, core->blocksize-idx))) { idx = (matchcount)? tidx+1: idx+1; matchcount = 0; continue; } if (tokens[matchcount]) { if (!regexp) matches = strstr(op.buf_asm, tokens[matchcount]) != NULL; else { rx = r_regex_new (tokens[matchcount], ""); matches = r_regex_exec (rx, op.buf_asm, 0, 0, 0) == 0; r_regex_free (rx); } } if (align) { if (addr % align) { matches = R_FALSE; eprintf ("NOT VALID %llx\n", addr); } } if (matches) { code = r_str_concatf (code, "%s; ", op.buf_asm); if (matchcount == tokcount-1) { if (tokcount == 1) tidx = idx; if (!(hit = r_core_asm_hit_new ())) { r_list_purge (hits); free (hits); hits = NULL; goto beach; } hit->addr = addr; hit->len = idx+len-tidx; if (hit->len == -1) { r_core_asm_hit_free (hit); goto beach; } code[strlen (code)-2] = 0; hit->code = strdup (code); r_list_append (hits, hit); R_FREE (code); matchcount = 0; idx = tidx+1; if (maxhits) { count ++; if (count >= maxhits) { //eprintf ("Error: search.maxhits reached\n"); goto beach; } } } else if (matchcount == 0) { tidx = idx; matchcount++; idx += len; } else { matchcount++; idx += len; } } else { idx = matchcount? tidx+1: idx+1; R_FREE (code); matchcount = 0; } } } r_asm_set_pc (core->assembler, toff); beach: free (buf); free (ptr); free (code); return hits; } static void add_hit_to_sorted_hits(RList* hits, ut64 addr, int len, ut8 is_valid) { RCoreAsmHit *hit = r_core_asm_hit_new(); if (hit) { IFDBG eprintf("*** Inserting instruction (valid?: %d): instr_addr: 0x%"PFMT64x" instr_len: %d\n", is_valid, addr, len ); hit->addr = addr; hit->len = len; hit->valid = is_valid; hit->code = NULL; r_list_add_sorted (hits, hit, ((RListComparator)rcoreasm_address_comparator)); } } static void add_hit_to_hits(RList* hits, ut64 addr, int len, ut8 is_valid) { RCoreAsmHit *hit = r_core_asm_hit_new(); if (hit) { IFDBG eprintf("*** Inserting instruction (valid?: %d): instr_addr: 0x%"PFMT64x" instr_len: %d\n", is_valid, addr, len); hit->addr = addr; hit->len = len; hit->valid = is_valid; hit->code = NULL; if (!r_list_append (hits, hit)){ free (hit); IFDBG eprintf ("hit couldn't be added to the list in %s at %d\n", __FILE__, __LINE__); } } } static int prune_hits_in_addr_range(RList *hits, ut64 addr, ut64 len, ut8 is_valid) { RCoreAsmHit hit; hit.addr = addr; hit.len = len; hit.code = NULL; hit.valid = is_valid; return prune_hits_in_hit_range(hits, &hit); } static int prune_hits_in_hit_range(RList *hits, RCoreAsmHit *hit){ RListIter *iter, *iter_tmp; RCoreAsmHit *to_check_hit; int result = 0; ut64 start_range, end_range; if (hit == NULL || hits == NULL) return 0; start_range = hit->addr; end_range = hit->addr + hit->len; r_list_foreach_safe (hits, iter, iter_tmp, to_check_hit){ if (to_check_hit && is_hit_inrange(to_check_hit, start_range, end_range)) { IFDBG eprintf ("Found hit that clashed (start: 0x%"PFMT64x " - end: 0x%"PFMT64x" ), 0x%"PFMT64x" len: %d (valid: %d 0x%"PFMT64x " - 0x%"PFMT64x")\n", start_range, end_range, to_check_hit->addr, to_check_hit->len, to_check_hit->valid, to_check_hit->addr, to_check_hit->addr+to_check_hit->len); // XXX - could this be a valid decode instruction we are deleting? r_list_delete (hits, iter); //iter->data = NULL; to_check_hit = NULL; result ++; } } return result; } static RCoreAsmHit * find_addr(RList *hits, ut64 addr) { // Find an address in the list of hits RListIter *addr_iter = NULL; RCoreAsmHit dummy_value; dummy_value.addr = addr; addr_iter = r_list_find (hits, &dummy_value, ((RListComparator)rcoreasm_address_comparator)); return r_list_iter_get_data(addr_iter); } static int handle_forward_disassemble(RCore* core, RList *hits, ut8* buf, ut64 len, ut64 current_buf_pos, ut64 current_instr_addr, ut64 end_addr){ // forward disassemble from the current instruction up to the end address ut64 temp_instr_len = 0, temp_instr_addr = current_instr_addr, tmp_current_buf_pos = current_buf_pos, start = 0, end = 0, start_range = current_instr_addr, end_range = end_addr; RAsmOp op; RCoreAsmHit *hit = NULL, *found_addr = NULL; ut8 is_valid = R_FALSE; if (end_addr < current_instr_addr) return end_addr; r_asm_set_pc (core->assembler, current_instr_addr); while ( tmp_current_buf_pos < len && temp_instr_addr < end_addr) { temp_instr_len = len - tmp_current_buf_pos; IFDBG eprintf("Current position: %"PFMT64d" instr_addr: 0x%"PFMT64x"\n", tmp_current_buf_pos, temp_instr_addr); temp_instr_len = r_asm_disassemble (core->assembler, &op, buf+tmp_current_buf_pos, temp_instr_len); if (temp_instr_len == 0){ is_valid = R_FALSE; temp_instr_len = 1; } else is_valid = R_TRUE; // check to see if addr exits found_addr = find_addr(hits, temp_instr_addr); start = temp_instr_addr; end = temp_instr_addr + temp_instr_len; if (!found_addr) { add_hit_to_sorted_hits(hits, temp_instr_addr, temp_instr_len, is_valid); } else if (is_valid && !found_addr->valid && is_addr_in_range(start, end, start_range, end_range )) { ut32 prune_results = 0; prune_results = prune_hits_in_addr_range(hits, temp_instr_addr, temp_instr_len, is_valid); add_hit_to_sorted_hits(hits, temp_instr_addr, temp_instr_len, is_valid); if (prune_results ) { r_list_add_sorted (hits, hit, ((RListComparator)rcoreasm_address_comparator)); IFDBG eprintf("Pruned %u hits from list in fwd sweep.\n", prune_results); } else { free (hit); hit = NULL; } } temp_instr_addr += temp_instr_len; tmp_current_buf_pos += temp_instr_len; } return temp_instr_addr; } #if 0 static int handle_disassembly_overlap(RCore* core, RList *hits, ut8* buf, int len, ut64 current_buf_pos, ut64 current_instr_addr ) { // disassemble over lap means the current instruction decoded using the bytes in a previously decoded instruction ut64 next_buf_pos = current_buf_pos, end_addr = current_instr_addr + ( len - current_buf_pos - 1); /* Sub optimal method (e.g. easy) */ handle_forward_disassemble (core, hits, buf, len, current_buf_pos, current_instr_addr, end_addr ); next_buf_pos = current_buf_pos; return next_buf_pos; } #endif static int is_addr_in_range(ut64 start, ut64 end, ut64 start_range, ut64 end_range){ int result = R_FALSE; if (start == start_range) { return R_TRUE; } else if (start < end && start_range < end_range) { // ez cases if ( start_range <= start && start < end_range ) result = R_TRUE; else if (start_range < end && end < end_range ) result = R_TRUE; else if ( start <= start_range && end_range < end ) result = R_TRUE; // XXX - these cases need to be tested // (long long) start_range < 0 < end_range } else if (start_range > end_range) { if (start < end) { if (start < end_range) result = R_TRUE; else if (end <= end_range) result = R_TRUE; else if ( start_range <= start ) result = R_TRUE; else if ( start_range < end ) result = R_TRUE; // (long long) start < 0 < end } else { if (end < end_range) result = R_TRUE; else if (end <= end_range) result = R_TRUE; else if ( start_range <= start ) result = R_TRUE; } // XXX - these cases need to be tested // (long long) start < 0 < end } else if (start_range < end_range) { if ( start < end_range) result = R_TRUE; else if ( start <= start_range ) result = R_TRUE; else if ( start_range < end) result = R_TRUE; } return result; } static int is_hit_inrange(RCoreAsmHit *hit, ut64 start_range, ut64 end_range){ int result = R_FALSE; if (hit) { result = is_addr_in_range (hit->addr, hit->addr + hit->len, start_range, end_range); } return result; } R_API RList *r_core_asm_bwdisassemble (RCore *core, ut64 addr, int n, int len) { RList *hits = r_core_asm_hit_list_new(); RAsmOp op; // len = n * 32; // if (n > core->blocksize) n = core->blocksize; ut8 *buf = (ut8 *)malloc(len); ut64 instrlen = 0, at = 0; ut32 idx = 0, hit_count = 0; int numinstr, asmlen, ii; RAsmCode *c; if (hits == NULL || buf == NULL ){ if (hits) { r_list_free (hits); } free (buf); return NULL; } if (r_io_read_at (core->io, addr-len, buf, len) != len) { if (hits) { r_list_free (hits); } free (buf); return NULL; } for (idx = 1; idx < len; ++idx) { if (r_cons_singleton ()->breaked) break; at = addr - idx; hit_count = 0; c = r_asm_mdisassemble (core->assembler, buf+(len-idx), idx); if (strstr(c->buf_asm, "invalid") || strstr(c->buf_asm, ".byte")) { r_asm_code_free(c); continue; } numinstr = 0; asmlen = strlen(c->buf_asm); for(ii = 0; ii < asmlen; ++ii) { if (c->buf_asm[ii] == '\n') ++numinstr; } r_asm_code_free(c); if (numinstr >= n || idx > 32 * n) { break; } } at = addr - idx; hit_count = 0; r_asm_set_pc (core->assembler, at); at = addr-idx; for ( hit_count = 0; hit_count < n; hit_count++) { instrlen = r_asm_disassemble (core->assembler, &op, buf+(len-(addr-at)), addr-at); add_hit_to_hits(hits, at, instrlen, R_TRUE); at += instrlen; } free (buf); return hits; } static RList * r_core_asm_back_disassemble_all(RCore *core, ut64 addr, ut64 len, ut64 max_hit_count, ut32 extra_padding){ RList *hits = r_core_asm_hit_list_new (); RCoreAsmHit dummy_value; RCoreAsmHit *hit = NULL; RAsmOp op; ut8 *buf = (ut8 *)malloc (len + extra_padding); int current_instr_len = 0; ut64 current_instr_addr = addr, current_buf_pos = len - 1, hit_count = 0; memset (&dummy_value, 0, sizeof (RCoreAsmHit)); if (hits == NULL || buf == NULL ){ if (hits) { r_list_purge (hits); free (hits); } free (buf); return NULL; } if (r_io_read_at (core->io, addr-(len+extra_padding), buf, len+extra_padding) != len+extra_padding) { r_list_purge (hits); free (hits); free (buf); return NULL; } if (len == 0){ return hits; } do { if (r_cons_singleton ()->breaked) break; // reset assembler r_asm_set_pc (core->assembler, current_instr_addr); current_instr_len = len - current_buf_pos + extra_padding; IFDBG eprintf("current_buf_pos: 0x%"PFMT64x", current_instr_len: %d\n", current_buf_pos, current_instr_len); current_instr_len = r_asm_disassemble (core->assembler, &op, buf+current_buf_pos, current_instr_len); hit = r_core_asm_hit_new (); hit->addr = current_instr_addr; hit->len = current_instr_len; hit->code = NULL; r_list_add_sorted (hits, hit, ((RListComparator)rcoreasm_address_comparator)); current_buf_pos--; current_instr_addr--; hit_count++; } while ( ((int) current_buf_pos >= 0) && (int)(len - current_buf_pos) >= 0 && hit_count <= max_hit_count); free(buf); return hits; } static RList *r_core_asm_back_disassemble (RCore *core, ut64 addr, int len, ut64 max_hit_count, ut8 disassmble_each_addr, ut32 extra_padding) { RList *hits;; RAsmOp op; ut8 *buf = NULL; ut8 max_invalid_b4_exit = 4, last_num_invalid = 0; int current_instr_len = 0; ut64 current_instr_addr = addr, current_buf_pos = 0, next_buf_pos = len; RCoreAsmHit dummy_value; ut32 hit_count = 0; if (disassmble_each_addr){ return r_core_asm_back_disassemble_all(core, addr, len, max_hit_count, extra_padding+1); } hits = r_core_asm_hit_list_new (); buf = malloc (len + extra_padding); if (hits == NULL || buf == NULL ){ if (hits) { r_list_purge (hits); free (hits); } free (buf); return NULL; } if (r_io_read_at (core->io, (addr + extra_padding)-len, buf, len+extra_padding) != len+extra_padding) { r_list_purge (hits); free (hits); free (buf); return NULL; } // // XXX - This is a heavy handed approach without a // an appropriate btree or hash table for storing // hits, because are using: // 1) Sorted RList with many inserts and searches // 2) Pruning hits to find the most optimal disassembly // greedy approach // 1) Consume previous bytes // 1a) Instruction is invalid (incr current_instr_addr) // 1b) Disasm is perfect // 1c) Disasm is underlap (disasm(current_instr_addr, next_instr_addr - current_instr_addr) short some bytes) // 1d) Disasm is overlap (disasm(current_instr_addr, next_instr_addr - current_instr_addr) over some bytes) memset (&dummy_value, 0, sizeof (RCoreAsmHit)); // disassemble instructions previous to current address, extra_padding can move the location of addr // so we need to account for that with current_buf_pos current_buf_pos = len - extra_padding - 1; next_buf_pos = len + extra_padding - 1; current_instr_addr = addr-1; do { if (r_cons_singleton ()->breaked) break; // reset assembler r_asm_set_pc (core->assembler, current_instr_addr); current_instr_len = next_buf_pos - current_buf_pos; current_instr_len = r_asm_disassemble (core->assembler, &op, buf+current_buf_pos, current_instr_len); IFDBG { ut32 byte_cnt = current_instr_len ? current_instr_len : 1; eprintf("current_instr_addr: 0x%"PFMT64x", current_buf_pos: 0x%"PFMT64x", current_instr_len: %d \n", current_instr_addr, current_buf_pos, current_instr_len); ut8 *hex_str = (ut8*)r_hex_bin2strdup(buf+current_buf_pos, byte_cnt); eprintf("==== current_instr_bytes: %s ",hex_str); if (current_instr_len > 0) eprintf("op.buf_asm: %s\n", op.buf_asm); else eprintf("op.buf_asm: \n"); if (hex_str) free(hex_str); } // disassembly invalid if (current_instr_len == 0 || strstr (op.buf_asm, "invalid")) { if (current_instr_len == 0) current_instr_len = 1; add_hit_to_sorted_hits(hits, current_instr_addr, current_instr_len, /* is_valid */ R_FALSE); hit_count ++; last_num_invalid ++; // disassembly perfect } else if (current_buf_pos + current_instr_len == next_buf_pos) { // i think this may be the only case where an invalid instruction will be // added because handle_forward_disassemble and handle_disassembly_overlap // are only called in cases where a valid instruction has been found. // and they are lazy, since they purge the hit list ut32 purge_results = 0; ut8 is_valid = R_TRUE; IFDBG eprintf(" handling underlap case: current_instr_addr: 0x%"PFMT64x".\n", current_instr_addr); purge_results = prune_hits_in_addr_range(hits, current_instr_addr, current_instr_len, /* is_valid */ R_TRUE); if (purge_results) { handle_forward_disassemble(core, hits, buf, len, current_buf_pos+current_instr_len, current_instr_addr+current_instr_len, addr); hit_count = r_list_length(hits); } add_hit_to_sorted_hits(hits, current_instr_addr, current_instr_len, is_valid); //handle_forward_disassemble(core, hits, buf, len, current_buf_pos+current_instr_len, current_instr_addr+current_instr_len, addr/*end_addr*/); hit_count ++; next_buf_pos = current_buf_pos; last_num_invalid = 0; // disassembly underlap } else if (current_buf_pos + current_instr_len < next_buf_pos) { ut32 purge_results = 0; ut8 is_valid = R_TRUE; purge_results = prune_hits_in_addr_range(hits, current_instr_addr, current_instr_len, /* is_valid */ R_TRUE); add_hit_to_sorted_hits(hits, current_instr_addr, current_instr_len, is_valid); if (hit_count < purge_results ) hit_count = 0; // WTF?? else hit_count -= purge_results; next_buf_pos = current_buf_pos; handle_forward_disassemble(core, hits, buf, len - extra_padding, current_buf_pos+current_instr_len, current_instr_addr+current_instr_len, addr); hit_count = r_list_length(hits); last_num_invalid = 0; // disassembly overlap } else if (current_buf_pos + current_instr_len > next_buf_pos) { //ut64 value = handle_disassembly_overlap(core, hits, buf, len, current_buf_pos, current_instr_addr); next_buf_pos = current_buf_pos; hit_count = r_list_length (hits); last_num_invalid = 0; } // walk backwards by one instruction IFDBG eprintf(" current_instr_addr: 0x%"PFMT64x" current_instr_len: %d next_instr_addr: 0x%04"PFMT64x"\n", current_instr_addr, current_instr_len, next_buf_pos); IFDBG eprintf(" hit count: %d \n", hit_count ); current_instr_addr -= 1; current_buf_pos -= 1; if ( hit_count >= max_hit_count && (last_num_invalid >= max_invalid_b4_exit || last_num_invalid == 0)) break; } while (((int) current_buf_pos >= 0) && (int)(len - current_buf_pos) >= 0); r_asm_set_pc (core->assembler, addr); free (buf); return hits; } R_API RList *r_core_asm_back_disassemble_instr (RCore *core, ut64 addr, int len, ut32 hit_count, ut32 extra_padding){ // extra padding to allow for additional disassembly on border buffer cases ut8 disassmble_each_addr = R_FALSE; return r_core_asm_back_disassemble (core, addr, len, hit_count, disassmble_each_addr, extra_padding); } R_API RList *r_core_asm_back_disassemble_byte (RCore *core, ut64 addr, int len, ut32 hit_count, ut32 extra_padding){ // extra padding to allow for additional disassembly on border buffer cases ut8 disassmble_each_addr = R_TRUE; return r_core_asm_back_disassemble (core, addr, len, hit_count, disassmble_each_addr, extra_padding); } /* Compute the len and the starting address * when disassembling `nb` opcodes backward. */ R_API ut32 r_core_asm_bwdis_len (RCore* core, int* instr_len, ut64* start_addr, ut32 nb) { ut32 instr_run = 0; RCoreAsmHit *hit; RListIter *iter = NULL; RList* hits = r_core_asm_bwdisassemble (core, core->offset, nb, core->blocksize); if (instr_len) *instr_len = 0; if (hits && r_list_length (hits) > 0) { hit = r_list_get_bottom (hits); if (start_addr) *start_addr = hit->addr; r_list_foreach (hits, iter, hit) instr_run += hit->len; if (instr_len) *instr_len = instr_run; } r_list_free (hits); return instr_run; }