capstone/cs.c

845 lines
19 KiB
C

/* Capstone Disassembly Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
#if defined (WIN32) || defined (WIN64) || defined (_WIN32) || defined (_WIN64)
#pragma warning(disable:4996)
#endif
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <capstone.h>
#include "utils.h"
#include "MCRegisterInfo.h"
#ifdef CAPSTONE_USE_SYS_DYN_MEM
#define INSN_CACHE_SIZE 32
#else
// reduce stack variable size for kernel/firmware
#define INSN_CACHE_SIZE 8
#endif
// default SKIPDATA mnemonic
#define SKIPDATA_MNEM ".byte"
cs_err (*arch_init[MAX_ARCH])(cs_struct *) = { NULL };
cs_err (*arch_option[MAX_ARCH]) (cs_struct *, cs_opt_type, size_t value) = { NULL };
void (*arch_destroy[MAX_ARCH]) (cs_struct *) = { NULL };
extern void ARM_enable(void);
extern void AArch64_enable(void);
extern void Mips_enable(void);
extern void X86_enable(void);
extern void PPC_enable(void);
extern void Sparc_enable(void);
extern void SystemZ_enable(void);
static void archs_enable(void)
{
static bool initialized = false;
if (initialized)
return;
#ifdef CAPSTONE_HAS_ARM
ARM_enable();
#endif
#ifdef CAPSTONE_HAS_ARM64
AArch64_enable();
#endif
#ifdef CAPSTONE_HAS_MIPS
Mips_enable();
#endif
#ifdef CAPSTONE_HAS_POWERPC
PPC_enable();
#endif
#ifdef CAPSTONE_HAS_SPARC
Sparc_enable();
#endif
#ifdef CAPSTONE_HAS_SYSZ
SystemZ_enable();
#endif
#ifdef CAPSTONE_HAS_X86
X86_enable();
#endif
initialized = true;
}
unsigned int all_arch = 0;
#ifdef CAPSTONE_USE_SYS_DYN_MEM
cs_malloc_t cs_mem_malloc = malloc;
cs_calloc_t cs_mem_calloc = calloc;
cs_realloc_t cs_mem_realloc = realloc;
cs_free_t cs_mem_free = free;
cs_vsnprintf_t cs_vsnprintf = vsnprintf;
#else
cs_malloc_t cs_mem_malloc = NULL;
cs_calloc_t cs_mem_calloc = NULL;
cs_realloc_t cs_mem_realloc = NULL;
cs_free_t cs_mem_free = NULL;
cs_vsnprintf_t cs_vsnprintf = NULL;
#endif
unsigned int cs_version(int *major, int *minor)
{
archs_enable();
if (major != NULL && minor != NULL) {
*major = CS_API_MAJOR;
*minor = CS_API_MINOR;
}
return (CS_API_MAJOR << 8) + CS_API_MINOR;
}
bool cs_support(int query)
{
archs_enable();
if (query == CS_ARCH_ALL)
return all_arch == ((1 << CS_ARCH_ARM) | (1 << CS_ARCH_ARM64) |
(1 << CS_ARCH_MIPS) | (1 << CS_ARCH_X86) |
(1 << CS_ARCH_PPC) | (1 << CS_ARCH_SPARC) |
(1 << CS_ARCH_SYSZ));
if ((unsigned int)query < CS_ARCH_MAX)
return all_arch & (1 << query);
if (query == CS_SUPPORT_DIET) {
#ifdef CAPSTONE_DIET
return true;
#else
return false;
#endif
}
if (query == CS_SUPPORT_X86_REDUCE) {
#if defined(CAPSTONE_HAS_X86) && defined(CAPSTONE_X86_REDUCE)
return true;
#else
return false;
#endif
}
// unsupported query
return false;
}
cs_err cs_errno(csh handle)
{
struct cs_struct *ud = NULL;
if (!handle)
return CS_ERR_CSH;
ud = (struct cs_struct *)(uintptr_t)handle;
return ud->errnum;
}
const char *cs_strerror(cs_err code)
{
switch(code) {
default:
return "Unknown error code";
case CS_ERR_OK:
return "OK (CS_ERR_OK)";
case CS_ERR_MEM:
return "Out of memory (CS_ERR_MEM)";
case CS_ERR_ARCH:
return "Invalid architecture (CS_ERR_ARCH)";
case CS_ERR_HANDLE:
return "Invalid handle (CS_ERR_HANDLE)";
case CS_ERR_CSH:
return "Invalid csh (CS_ERR_CSH)";
case CS_ERR_MODE:
return "Invalid mode (CS_ERR_MODE)";
case CS_ERR_OPTION:
return "Invalid option (CS_ERR_OPTION)";
case CS_ERR_DETAIL:
return "Details are unavailable (CS_ERR_DETAIL)";
case CS_ERR_MEMSETUP:
return "Dynamic memory management uninitialized (CS_ERR_MEMSETUP)";
case CS_ERR_VERSION:
return "Different API version between core & binding (CS_ERR_VERSION)";
case CS_ERR_DIET:
return "Information irrelevant in diet engine (CS_ERR_DIET)";
case CS_ERR_SKIPDATA:
return "Information irrelevant for 'data' instruction in SKIPDATA mode (CS_ERR_SKIPDATA)";
}
}
cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle)
{
cs_err err;
struct cs_struct *ud = NULL;
if (!cs_mem_malloc || !cs_mem_calloc || !cs_mem_realloc || !cs_mem_free || !cs_vsnprintf)
// Error: before cs_open(), dynamic memory management must be initialized
// with cs_option(CS_OPT_MEM)
return CS_ERR_MEMSETUP;
archs_enable();
if (arch < CS_ARCH_MAX && arch_init[arch]) {
ud = cs_mem_calloc(1, sizeof(*ud));
if (!ud) {
// memory insufficient
return CS_ERR_MEM;
}
ud->errnum = CS_ERR_OK;
ud->arch = arch;
ud->mode = mode;
ud->big_endian = mode & CS_MODE_BIG_ENDIAN;
// by default, do not break instruction into details
ud->detail = CS_OPT_OFF;
// default skipdata setup
ud->skipdata_setup.mnemonic = SKIPDATA_MNEM;
err = arch_init[ud->arch](ud);
if (err) {
cs_mem_free(ud);
*handle = 0;
return err;
}
*handle = (uintptr_t)ud;
return CS_ERR_OK;
} else {
*handle = 0;
return CS_ERR_ARCH;
}
}
cs_err cs_close(csh *handle)
{
struct cs_struct *ud = NULL;
if (*handle == 0)
// invalid handle
return CS_ERR_CSH;
ud = (struct cs_struct *)(*handle);
if (ud->printer_info)
cs_mem_free(ud->printer_info);
// arch_destroy[ud->arch](ud);
cs_mem_free(ud->insn_cache);
memset(ud, 0, sizeof(*ud));
cs_mem_free(ud);
// invalidate this handle by ZERO out its value.
// this is to make sure it is unusable after cs_close()
*handle = 0;
return CS_ERR_OK;
}
// fill insn with mnemonic & operands info
static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci,
PostPrinter_t postprinter, const uint8_t *code)
{
char *sp = NULL;
if (handle->detail) {
// avoiding copy insn->detail
memcpy(insn, &mci->flat_insn, sizeof(*insn) - sizeof(insn->detail));
// NOTE: copy details in 2 chunks, since union is always put at address divisible by 8
// copy from @regs_read until @arm
memcpy(insn->detail, (void *)((uintptr_t)(&(mci->flat_insn)) + offsetof(cs_insn_flat, regs_read)),
offsetof(cs_detail, arm) - offsetof(cs_detail, regs_read));
// then copy from @arm until end
memcpy((void *)((uintptr_t)(insn->detail) + offsetof(cs_detail, arm)),
(void *)((uintptr_t)(&(mci->flat_insn)) + offsetof(cs_insn_flat, arm)),
sizeof(cs_detail) - offsetof(cs_detail, arm));
} else {
insn->address = mci->address;
insn->size = (uint16_t)mci->insn_size;
}
// fill the instruction bytes
memcpy(insn->bytes, code, MIN(sizeof(insn->bytes), insn->size));
// map internal instruction opcode to public insn ID
if (handle->insn_id)
handle->insn_id(handle, insn, MCInst_getOpcode(mci));
// alias instruction might have ID saved in OpcodePub
if (MCInst_getOpcodePub(mci))
insn->id = MCInst_getOpcodePub(mci);
// post printer handles some corner cases (hacky)
if (postprinter)
postprinter((csh)handle, insn, buffer);
#ifndef CAPSTONE_DIET
// fill in mnemonic & operands
// find first space or tab
sp = buffer;
for (sp = buffer; *sp; sp++) {
if (*sp == ' '||*sp == '\t')
break;
if (*sp == '|')
*sp = ' ';
}
if (*sp) {
*sp = '\0';
// find the next non-space char
sp++;
for (; ((*sp == ' ') || (*sp == '\t')); sp++);
strncpy(insn->op_str, sp, sizeof(insn->op_str) - 1);
insn->op_str[sizeof(insn->op_str) - 1] = '\0';
} else
insn->op_str[0] = '\0';
strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1);
insn->mnemonic[sizeof(insn->mnemonic) - 1] = '\0';
#endif
}
// how many bytes will we skip when encountering data (CS_OPT_SKIPDATA)?
// this very much depends on instruction alignment requirement of each arch.
static uint8_t skipdata_size(cs_struct *handle)
{
switch(handle->arch) {
default:
// should never reach
return -1;
case CS_ARCH_ARM:
// skip 2 bytes on Thumb mode.
if (handle->mode & CS_MODE_THUMB)
return 2;
// otherwise, skip 4 bytes
return 4;
case CS_ARCH_ARM64:
case CS_ARCH_MIPS:
case CS_ARCH_PPC:
case CS_ARCH_SPARC:
// skip 4 bytes
return 4;
case CS_ARCH_SYSZ:
// SystemZ instruction's length can be 2, 4 or 6 bytes,
// so we just skip 2 bytes
return 2;
case CS_ARCH_X86:
// X86 has no restriction on instruction alignment
return 1;
}
}
cs_err cs_option(csh ud, cs_opt_type type, size_t value)
{
struct cs_struct *handle = NULL;
archs_enable();
// cs_option() can be called with NULL handle just for CS_OPT_MEM
// This is supposed to be executed before all other APIs (even cs_open())
if (type == CS_OPT_MEM) {
cs_opt_mem *mem = (cs_opt_mem *)value;
cs_mem_malloc = mem->malloc;
cs_mem_calloc = mem->calloc;
cs_mem_realloc = mem->realloc;
cs_mem_free = mem->free;
cs_vsnprintf = mem->vsnprintf;
return CS_ERR_OK;
}
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle)
return CS_ERR_CSH;
switch(type) {
default:
break;
case CS_OPT_DETAIL:
handle->detail = value;
return CS_ERR_OK;
case CS_OPT_SKIPDATA:
handle->skipdata = (value == CS_OPT_ON);
if (handle->skipdata) {
if (handle->skipdata_size == 0) {
// set the default skipdata size
handle->skipdata_size = skipdata_size(handle);
}
}
return CS_ERR_OK;
case CS_OPT_SKIPDATA_SETUP:
if (value)
handle->skipdata_setup = *((cs_opt_skipdata *)value);
return CS_ERR_OK;
}
return arch_option[handle->arch](handle, type, value);
}
// generate @op_str for data instruction of SKIPDATA
static void skipdata_opstr(char *opstr, const uint8_t *buffer, size_t size)
{
char *p = opstr;
int len;
size_t i;
if (!size) {
opstr[0] = '\0';
return;
}
len = sprintf(p, "0x%02x", buffer[0]);
p+= len;
for(i = 1; i < size; i++) {
len = sprintf(p, ", 0x%02x", buffer[i]);
p+= len;
}
}
// dynamicly allocate memory to contain disasm insn
// NOTE: caller must free() the allocated memory itself to avoid memory leaking
size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
{
struct cs_struct *handle = (struct cs_struct *)(uintptr_t)ud;
MCInst mci;
uint16_t insn_size;
size_t c = 0;
unsigned int f = 0;
cs_insn insn_cache[INSN_CACHE_SIZE];
void *total = NULL;
size_t total_size = 0;
bool r;
void *tmp;
size_t skipdata_bytes;
uint64_t offset_org;
if (!handle) {
// FIXME: how to handle this case:
// handle->errnum = CS_ERR_HANDLE;
return 0;
}
handle->errnum = CS_ERR_OK;
memset(insn_cache, 0, sizeof(insn_cache));
// save the original offset for SKIPDATA
offset_org = offset;
while (size > 0) {
MCInst_Init(&mci);
mci.csh = handle;
r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
if (r) {
SStream ss;
SStream_Init(&ss);
// relative branches need to know the address & size of current insn
mci.insn_size = insn_size;
mci.address = offset;
if (handle->detail) {
// save all the information for non-detailed mode
mci.flat_insn.address = offset;
mci.flat_insn.size = insn_size;
// allocate memory for @detail pointer
insn_cache[f].detail = cs_mem_calloc(1, sizeof(cs_detail));
}
handle->printer(&mci, &ss, handle->printer_info);
fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer, buffer);
f++;
if (f == ARR_SIZE(insn_cache)) {
// resize total to contain newly disasm insns
total_size += (sizeof(cs_insn) * INSN_CACHE_SIZE);
tmp = cs_mem_realloc(total, total_size);
if (tmp == NULL) { // insufficient memory
cs_mem_free(total);
handle->errnum = CS_ERR_MEM;
return 0;
}
total = tmp;
memcpy((void*)((uintptr_t)total + total_size - sizeof(insn_cache)), insn_cache, sizeof(insn_cache));
// reset f back to 0
f = 0;
}
c++;
buffer += insn_size;
size -= insn_size;
offset += insn_size;
if (count > 0 && c == count)
break;
} else {
// encounter a broken instruction
// if there is no request to skip data, or remaining data is too small,
// then bail out
if (!handle->skipdata || handle->skipdata_size > size)
break;
if (handle->skipdata_setup.callback) {
skipdata_bytes = handle->skipdata_setup.callback(buffer, offset - offset_org,
handle->skipdata_setup.user_data);
if (skipdata_bytes > size)
// remaining data is not enough
break;
if (!skipdata_bytes)
// user requested not to skip data, so bail out
break;
} else
skipdata_bytes = handle->skipdata_size;
// we have to skip some amount of data, depending on arch & mode
insn_cache[f].id = 0; // invalid ID for this "data" instruction
insn_cache[f].address = offset;
insn_cache[f].size = skipdata_bytes;
memcpy(insn_cache[f].bytes, buffer, skipdata_bytes);
strncpy(insn_cache[f].mnemonic, handle->skipdata_setup.mnemonic,
sizeof(insn_cache[f].mnemonic) - 1);
skipdata_opstr(insn_cache[f].op_str, buffer, skipdata_bytes);
insn_cache[f].detail = NULL;
f++;
if (f == ARR_SIZE(insn_cache)) {
// resize total to contain newly disasm insns
total_size += (sizeof(cs_insn) * INSN_CACHE_SIZE);
tmp = cs_mem_realloc(total, total_size);
if (tmp == NULL) { // insufficient memory
cs_mem_free(total);
handle->errnum = CS_ERR_MEM;
return 0;
}
total = tmp;
memcpy((void*)((uintptr_t)total + total_size - sizeof(insn_cache)), insn_cache, sizeof(insn_cache));
// reset f back to 0
f = 0;
}
buffer += skipdata_bytes;
size -= skipdata_bytes;
offset += skipdata_bytes;
c++;
}
}
if (f) {
// resize total to contain newly disasm insns
void *tmp = cs_mem_realloc(total, total_size + f * sizeof(insn_cache[0]));
if (tmp == NULL) { // insufficient memory
cs_mem_free(total);
handle->errnum = CS_ERR_MEM;
return 0;
}
total = tmp;
memcpy((void*)((uintptr_t)total + total_size), insn_cache, f * sizeof(insn_cache[0]));
}
*insn = total;
return c;
}
void cs_free(cs_insn *insn, size_t count)
{
size_t i;
// free all detail pointers
for (i = 0; i < count; i++)
cs_mem_free(insn[i].detail);
// then free pointer to cs_insn array
cs_mem_free(insn);
}
// return friendly name of regiser in a string
const char *cs_reg_name(csh ud, unsigned int reg)
{
struct cs_struct *handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle || handle->reg_name == NULL) {
return NULL;
}
return handle->reg_name(ud, reg);
}
const char *cs_insn_name(csh ud, unsigned int insn)
{
struct cs_struct *handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle || handle->insn_name == NULL) {
return NULL;
}
return handle->insn_name(ud, insn);
}
static bool arr_exist(unsigned char *arr, unsigned char max, unsigned int id)
{
int i;
for (i = 0; i < max; i++) {
if (arr[i] == id)
return true;
}
return false;
}
bool cs_insn_group(csh ud, cs_insn *insn, unsigned int group_id)
{
struct cs_struct *handle = NULL;
if (!ud)
return false;
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle->detail) {
handle->errnum = CS_ERR_DETAIL;
return false;
}
if(!insn->id) {
handle->errnum = CS_ERR_SKIPDATA;
return false;
}
if(!insn->detail) {
handle->errnum = CS_ERR_DETAIL;
return false;
}
return arr_exist(insn->detail->groups, insn->detail->groups_count, group_id);
}
bool cs_reg_read(csh ud, cs_insn *insn, unsigned int reg_id)
{
struct cs_struct *handle = NULL;
if (!ud)
return false;
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle->detail) {
handle->errnum = CS_ERR_DETAIL;
return false;
}
if(!insn->id) {
handle->errnum = CS_ERR_SKIPDATA;
return false;
}
if(!insn->detail) {
handle->errnum = CS_ERR_DETAIL;
return false;
}
return arr_exist(insn->detail->regs_read, insn->detail->regs_read_count, reg_id);
}
bool cs_reg_write(csh ud, cs_insn *insn, unsigned int reg_id)
{
struct cs_struct *handle = NULL;
if (!ud)
return false;
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle->detail) {
handle->errnum = CS_ERR_DETAIL;
return false;
}
if(!insn->id) {
handle->errnum = CS_ERR_SKIPDATA;
return false;
}
if(!insn->detail) {
handle->errnum = CS_ERR_DETAIL;
return false;
}
return arr_exist(insn->detail->regs_write, insn->detail->regs_write_count, reg_id);
}
int cs_op_count(csh ud, cs_insn *insn, unsigned int op_type)
{
struct cs_struct *handle = NULL;
unsigned int count = 0, i = 0;
if (!ud)
return -1;
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle->detail) {
handle->errnum = CS_ERR_DETAIL;
return -1;
}
if(!insn->id) {
handle->errnum = CS_ERR_SKIPDATA;
return -1;
}
if(!insn->detail) {
handle->errnum = CS_ERR_DETAIL;
return -1;
}
handle->errnum = CS_ERR_OK;
switch (handle->arch) {
default:
handle->errnum = CS_ERR_HANDLE;
return -1;
case CS_ARCH_ARM:
for (i = 0; i < insn->detail->arm.op_count; i++)
if (insn->detail->arm.operands[i].type == (arm_op_type)op_type)
count++;
break;
case CS_ARCH_ARM64:
for (i = 0; i < insn->detail->arm64.op_count; i++)
if (insn->detail->arm64.operands[i].type == (arm64_op_type)op_type)
count++;
break;
case CS_ARCH_X86:
for (i = 0; i < insn->detail->x86.op_count; i++)
if (insn->detail->x86.operands[i].type == (x86_op_type)op_type)
count++;
break;
case CS_ARCH_MIPS:
for (i = 0; i < insn->detail->mips.op_count; i++)
if (insn->detail->mips.operands[i].type == (mips_op_type)op_type)
count++;
break;
case CS_ARCH_PPC:
for (i = 0; i < insn->detail->ppc.op_count; i++)
if (insn->detail->ppc.operands[i].type == (ppc_op_type)op_type)
count++;
break;
case CS_ARCH_SPARC:
for (i = 0; i < insn->detail->sparc.op_count; i++)
if (insn->detail->sparc.operands[i].type == (sparc_op_type)op_type)
count++;
break;
case CS_ARCH_SYSZ:
for (i = 0; i < insn->detail->sysz.op_count; i++)
if (insn->detail->sysz.operands[i].type == (sysz_op_type)op_type)
count++;
break;
}
return count;
}
int cs_op_index(csh ud, cs_insn *insn, unsigned int op_type,
unsigned int post)
{
struct cs_struct *handle = NULL;
unsigned int count = 0, i = 0;
if (!ud)
return -1;
handle = (struct cs_struct *)(uintptr_t)ud;
if (!handle->detail) {
handle->errnum = CS_ERR_DETAIL;
return -1;
}
if(!insn->id) {
handle->errnum = CS_ERR_SKIPDATA;
return -1;
}
if(!insn->detail) {
handle->errnum = CS_ERR_DETAIL;
return -1;
}
handle->errnum = CS_ERR_OK;
switch (handle->arch) {
default:
handle->errnum = CS_ERR_HANDLE;
return -1;
case CS_ARCH_ARM:
for (i = 0; i < insn->detail->arm.op_count; i++) {
if (insn->detail->arm.operands[i].type == (arm_op_type)op_type)
count++;
if (count == post)
return i;
}
break;
case CS_ARCH_ARM64:
for (i = 0; i < insn->detail->arm64.op_count; i++) {
if (insn->detail->arm64.operands[i].type == (arm64_op_type)op_type)
count++;
if (count == post)
return i;
}
break;
case CS_ARCH_X86:
for (i = 0; i < insn->detail->x86.op_count; i++) {
if (insn->detail->x86.operands[i].type == (x86_op_type)op_type)
count++;
if (count == post)
return i;
}
break;
case CS_ARCH_MIPS:
for (i = 0; i < insn->detail->mips.op_count; i++) {
if (insn->detail->mips.operands[i].type == (mips_op_type)op_type)
count++;
if (count == post)
return i;
}
break;
case CS_ARCH_PPC:
for (i = 0; i < insn->detail->ppc.op_count; i++) {
if (insn->detail->ppc.operands[i].type == (ppc_op_type)op_type)
count++;
if (count == post)
return i;
}
break;
case CS_ARCH_SPARC:
for (i = 0; i < insn->detail->sparc.op_count; i++) {
if (insn->detail->sparc.operands[i].type == (sparc_op_type)op_type)
count++;
if (count == post)
return i;
}
break;
case CS_ARCH_SYSZ:
for (i = 0; i < insn->detail->sysz.op_count; i++) {
if (insn->detail->sysz.operands[i].type == (sysz_op_type)op_type)
count++;
if (count == post)
return i;
}
break;
}
return -1;
}