From cfdb0ebea17c3d6ecfefddba8454bb25afa6e1e1 Mon Sep 17 00:00:00 2001 From: condret Date: Sun, 2 Feb 2014 02:42:21 +0100 Subject: [PATCH] Add basic whitespace support #592 --- doc/fortunes | 1 + libr/anal/p/Makefile | 2 +- libr/anal/p/anal_malbolge.c | 4 +- libr/anal/p/anal_ws.c | 119 +++++++++++ libr/anal/p/ws.mk | 12 ++ libr/asm/arch/whitespace/wsdis.c | 342 +++++++++++++++++++++++++++++++ libr/asm/p/Makefile | 2 +- libr/asm/p/asm_malbolge.c | 4 +- libr/asm/p/asm_ws.c | 31 +++ libr/asm/p/ws.mk | 9 + libr/include/r_anal.h | 2 +- libr/include/r_asm.h | 1 + plugins.def.cfg | 2 + 13 files changed, 524 insertions(+), 7 deletions(-) create mode 100644 libr/anal/p/anal_ws.c create mode 100644 libr/anal/p/ws.mk create mode 100644 libr/asm/arch/whitespace/wsdis.c create mode 100644 libr/asm/p/asm_ws.c create mode 100644 libr/asm/p/ws.mk diff --git a/doc/fortunes b/doc/fortunes index 532d4d326e..6bd335be01 100644 --- a/doc/fortunes +++ b/doc/fortunes @@ -204,3 +204,4 @@ Wait a minute! I found a bug, self-fixing... OK Hold on, this should never happen! Well this is embarrasing... THIS IS NOT FUNNY +This code was intentionally left blank, try 'e asm.arch = ws' diff --git a/libr/anal/p/Makefile b/libr/anal/p/Makefile index 21d61b891e..6c349f450f 100644 --- a/libr/anal/p/Makefile +++ b/libr/anal/p/Makefile @@ -10,7 +10,7 @@ all: ${ALL_TARGETS} ; ALL_TARGETS= # TODO: rename to enabled plugins -ARCHS=x86_udis.mk ppc.mk arm.mk avr.mk csr.mk dalvik.mk sh.mk ebc.mk gb.mk malbolge.mk +ARCHS=x86_udis.mk ppc.mk arm.mk avr.mk csr.mk dalvik.mk sh.mk ebc.mk gb.mk malbolge.mk ws.mk include $(ARCHS) clean: diff --git a/libr/anal/p/anal_malbolge.c b/libr/anal/p/anal_malbolge.c index 07b28e966a..808a254ef9 100644 --- a/libr/anal/p/anal_malbolge.c +++ b/libr/anal/p/anal_malbolge.c @@ -16,14 +16,14 @@ static int mal_anal(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *data, int le break; case 39: op->type = R_ANAL_OP_TYPE_ROR; -// op->type2 = R_ANAL_OP_TYPE_LOAD; + op->type2 = R_ANAL_OP_TYPE_LOAD; break; case 40: op->type = R_ANAL_OP_TYPE_LOAD; break; case 62: op->type = R_ANAL_OP_TYPE_XOR; -// op->type2 = R_ANAL_OP_TYPE_LOAD; + op->type2 = R_ANAL_OP_TYPE_LOAD; break; case 81: op->type = R_ANAL_OP_TYPE_TRAP; diff --git a/libr/anal/p/anal_ws.c b/libr/anal/p/anal_ws.c new file mode 100644 index 0000000000..1e90cbf2e7 --- /dev/null +++ b/libr/anal/p/anal_ws.c @@ -0,0 +1,119 @@ +/* radare - LGPL - Copyright 2014 Condret */ + +#include +#include +#include +#include +#include +#include "../../asm/arch/whitespace/wsdis.c" + + +static int ws_anal(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *data, int len) +{ + memset(op, '\0', sizeof(RAnalOp)); + op->addr = addr; + op->type = R_ANAL_OP_TYPE_UNK; + RAsmOp *aop; + aop = R_NEW0(RAsmOp); + op->size = wsdis(aop, data, len); + if(op->size) { + switch(aop->buf_asm[0]) { + case 'n': + op->type = R_ANAL_OP_TYPE_NOP; + break; + case 'e': + op->type = R_ANAL_OP_TYPE_TRAP; + break; + case 'd': + if(aop->buf_asm[1] == 'u') + op->type = R_ANAL_OP_TYPE_UPUSH; + else + op->type = R_ANAL_OP_TYPE_DIV; + break; + case 'i': + op->type = R_ANAL_OP_TYPE_ILL; + break; + case 'a': + op->type = R_ANAL_OP_TYPE_ADD; + break; + case 'm': + if(aop->buf_asm[1] == 'o') + op->type = R_ANAL_OP_TYPE_MOD; + else + op->type = R_ANAL_OP_TYPE_MUL; + break; + case 'r': + op->type = R_ANAL_OP_TYPE_RET; + break; + case 'l': + op->type = R_ANAL_OP_TYPE_LOAD; + break; + case 'c': + if(aop->buf_asm[1] == 'a') + op->type = R_ANAL_OP_TYPE_CALL; + else + op->type = R_ANAL_OP_TYPE_UPUSH; + break; + case 'j': + if(aop->buf_asm[1] == 'm') + op->type = R_ANAL_OP_TYPE_JMP; + else + op->type = R_ANAL_OP_TYPE_CJMP; + break; + case 'g': + op->type = R_ANAL_OP_TYPE_IO; + break; + case 'p': + if(aop->buf_asm[1] == 'o') { + op->type = R_ANAL_OP_TYPE_POP; + } else { + if(aop->buf_asm[2] == 's') + op->type = R_ANAL_OP_TYPE_PUSH; + else + op->type = R_ANAL_OP_TYPE_IO; + } + break; + case 's': + switch (aop->buf_asm[1]) { + case 'u': + op->type = R_ANAL_OP_TYPE_SUB; + break; + case 't': + op->type = R_ANAL_OP_TYPE_STORE; + break; + case 'l': + op->type = R_ANAL_OP_TYPE_LOAD; // XXX + break; + case 'w': + op->type = R_ANAL_OP_TYPE_ROR; + } + break; + } + } + r_asm_op_free(aop); + return op->size; +} + +struct r_anal_plugin_t r_anal_plugin_ws = { + .name = "ws", + .desc = "Space, tab and linefeed analysis plugin", + .license = "LGPL3", + .arch = R_SYS_ARCH_BF, + .bits = 32, + .init = NULL, + .fini = NULL, + .op = &ws_anal, + .set_reg_profile = NULL, + .fingerprint_bb = NULL, + .fingerprint_fcn = NULL, + .diff_bb = NULL, + .diff_fcn = NULL, + .diff_eval = NULL +}; + +#ifndef CORELIB +struct r_lib_struct_t radare_plugin = { + .type = R_LIB_TYPE_ANAL, + .data = &r_anal_plugin_ws +}; +#endif diff --git a/libr/anal/p/ws.mk b/libr/anal/p/ws.mk new file mode 100644 index 0000000000..33485b1457 --- /dev/null +++ b/libr/anal/p/ws.mk @@ -0,0 +1,12 @@ +OBJ_WS=anal_ws.o + +STATIC_OBJ+=${OBJ_WS} +TARGET_WS=anal_ws.${EXT_SO} + +ALL_TARGETS+=${TARGET_WS} +#LDFLAGS+=-L../../lib -lr_lib +#LDFLAGS+=-L../../syscall -lr_syscall +#LDFLAGS+=-L../../diff -lr_diff + +${TARGET_WS}: ${OBJ_WS} + ${CC} $(call libname,anal_ws) ${LDFLAGS} ${CFLAGS} -o anal_ws.${EXT_SO} ${OBJ_WS} diff --git a/libr/asm/arch/whitespace/wsdis.c b/libr/asm/arch/whitespace/wsdis.c new file mode 100644 index 0000000000..a5dcdb1c67 --- /dev/null +++ b/libr/asm/arch/whitespace/wsdis.c @@ -0,0 +1,342 @@ +#include +#include +#include + +enum { + WS_OP_UNK = 0, + WS_OP_NOP, + WS_OP_PREF, + WS_OP_STACK, + WS_OP_ARITH, + WS_OP_HEAP, + WS_OP_FLOW, + WS_OP_IO +}; + +static int get_ws_pref_optype(ut8 *buf, int len) +{ + if(len) { + switch(buf[0]) { + case ' ': + return WS_OP_STACK; + case '\t': + return WS_OP_PREF; + case 10: + return WS_OP_FLOW; + default: + return WS_OP_NOP; + } + } + return WS_OP_UNK; +} + +static int get_ws_suf_optype(ut8 *buf, int len) +{ + if(len) { + switch(buf[0]) { + case ' ': + return WS_OP_ARITH; + case '\t': + return WS_OP_HEAP; + case 10: + return WS_OP_IO; + default: + return WS_OP_NOP; + } + } + return WS_OP_UNK; +} + +int get_ws_optype(ut8 *buf, int len) +{ + ut8 *ptr; + if(get_ws_pref_optype(buf, len) == WS_OP_PREF) { + ptr = buf+1; + while(get_ws_suf_optype(ptr, len - ( ptr - buf )) == WS_OP_NOP) + ptr++; + return get_ws_suf_optype(ptr, len - ( ptr - buf)); + } + return get_ws_pref_optype(buf, len); +} + +ut8 *get_ws_next_token(ut8 *buf, int len) +{ + ut8 *ret; + ret = buf; + while(len - (ret - buf)) { + switch(*ret) { + case ' ': + case '\t': + case 10: + return ret; + } + ret++; + } + return NULL; +} + +static st32 get_ws_val(ut8 *buf, int len) +{ + ut8 sig; + ut8 *tok; + int i, ret; + ret = 0; + tok = get_ws_next_token(buf, len); + sig = (*tok == '\t'); + len = len - (tok - buf) -1; + for(i=0; i<30; i++) { //XXX : conceptually wrong + tok++; + tok = get_ws_next_token(tok, len); + if(!tok || *tok == 10) { + if(sig) + return ret * (-1); + return ret; + } + ret = (ret << 1); + ret = ret + (*tok == '\t'); + len = len - (tok - buf) -1; + } + if(sig) + return ret * (-1); + return ret; +} + +int test_ws_token_exist(ut8 *buf, ut8 token, int len) +{ + ut8 *ptr; + ptr = get_ws_next_token(buf, len); + while(ptr && *ptr!=token) { + len = len - (ptr - buf); + ptr = get_ws_next_token(ptr + 1, len - 1); + } + if(ptr) + return (ptr-buf); + return -1; +} + +int wsdis(RAsmOp *op, ut8 *buf, int len) +{ + ut8 *ptr; + ptr = buf; + switch(get_ws_optype(buf, len)) { + case WS_OP_UNK: + return op->size = 0; + case WS_OP_NOP: + sprintf(op->buf_asm, "nop"); + return op->size = 1; + case WS_OP_STACK: + ptr++; + if(!get_ws_next_token(ptr, len -1)) + return op->size = 0; + switch(*get_ws_next_token(ptr, len - 1)) { + case ' ': + if(-1 == test_ws_token_exist(get_ws_next_token(ptr, len - 1), 10, len - 1)) + return op->size = 0; + sprintf(op->buf_asm, "push"); + sprintf(&op->buf_asm[4], " %d", get_ws_val(ptr + 1, len - 1)); + return op->size = test_ws_token_exist(ptr - 1, 10, len) + 1; + case 10: + ptr = get_ws_next_token(ptr, len -1) + 1; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + sprintf(op->buf_asm, "dup"); + break; + case '\t': + sprintf(op->buf_asm, "swap"); + break; + case 10: + sprintf(op->buf_asm, "pop"); + break; + } + return op->size = ptr - buf + 1; + case '\t': + ptr = get_ws_next_token(ptr, len -1) + 1; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + sprintf(op->buf_asm, "copy"); + break; + case 10: + sprintf(op->buf_asm, "slide"); + break; + case '\t': + sprintf(op->buf_asm, "illegal_stack_t"); + return op->size = ptr - buf + 1; + } + ptr ++; + if(-1 == test_ws_token_exist(ptr, 10, len - (ptr - buf) -1)) { + op->buf_asm[0] = '\0'; //XXX + return op->size = 0; + } + if(strlen(op->buf_asm) < 6) + sprintf(&op->buf_asm[strlen(op->buf_asm)], " %d", get_ws_val(ptr, len - (ptr - buf) - 1)); + return op->size = test_ws_token_exist(ptr, 10, len - (ptr - buf) -1) + ptr - buf + 1; //+1? + } + case WS_OP_HEAP: + ptr = get_ws_next_token(ptr + 1, len - 1) + 1; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + sprintf(op->buf_asm, "store"); + break; + case '\t': + sprintf(op->buf_asm, "load"); + break; + case 10: + sprintf(op->buf_asm, "illegal_heap"); + break; + } + return op->size = ptr - buf + 1; + case WS_OP_IO: + ptr = get_ws_next_token(ptr + 1, len - 1) + 1; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + ptr++; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + sprintf(op->buf_asm, "putc"); + return op->size = ptr - buf + 1; + case '\t': + sprintf(op->buf_asm, "puti"); + return op->size = ptr - buf + 1; + } + break; + case '\t': + ptr++; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + sprintf(op->buf_asm, "getc"); + return op->size = ptr - buf + 1; + case '\t': + sprintf(op->buf_asm, "geti"); + return op->size = ptr - buf + 1; + } + } + sprintf(op->buf_asm, "illegal_io"); + return op->size = ptr - buf + 1; + case WS_OP_ARITH: + ptr = get_ws_next_token(ptr + 1, len - 1) + 1; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + ptr++; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + sprintf(op->buf_asm, "add"); + break; + case '\t': + sprintf(op->buf_asm, "sub"); + break; + case 10: + sprintf(op->buf_asm, "mul"); + break; + } + return op->size = ptr - buf + 1; + case '\t': + ptr++; + ptr = get_ws_next_token(ptr, len - (ptr -buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case ' ': + sprintf(op->buf_asm, "div"); + break; + case '\t': + sprintf(op->buf_asm, "mod"); + break; + case 10: + sprintf(op->buf_asm, "illegal_ar_t"); + } + break; + case 10: + sprintf(op->buf_asm, "illegal_ar"); + } + return op->size = ptr - buf + 1; + case WS_OP_FLOW: + ptr = get_ws_next_token(ptr + 1, len -1); + if(!ptr) //evil + return op->size = 0; + switch(*ptr) { + case 10: + ptr++; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + if(*ptr == 10) + sprintf(op->buf_asm, "exit"); + else + sprintf(op->buf_asm, "illegal_fl_lf"); + return op->size = ptr - buf + 1; + case '\t': + ptr++; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case 10: + sprintf(op->buf_asm, "ret"); + return op->size = ptr - buf + 1; + case '\t': + sprintf(op->buf_asm, "jn"); + break; + case ' ': + sprintf(op->buf_asm, "jz"); + break; + } + ptr++; + if(-1 == test_ws_token_exist(ptr, 10, len - (ptr - buf))) { + op->buf_asm[0] = '\0'; + return op->size = 0; + } + if(strlen(op->buf_asm) == 2) + sprintf(&op->buf_asm[2], " %d", get_ws_val(ptr, len - (ptr- buf) -1)); + return op->size = ptr - buf + test_ws_token_exist(ptr, 10, len - (ptr - buf)) + 1; + case ' ': + ptr++; + ptr = get_ws_next_token(ptr, len - (ptr - buf)); + if(!ptr) + return op->size = 0; + switch(*ptr) { + case 10: + sprintf(op->buf_asm, "jmp"); + break; + case '\t': + sprintf(op->buf_asm, "call"); + break; + case ' ': + sprintf(op->buf_asm, "mark"); + break; + } + ptr++; + if(-1 == test_ws_token_exist(ptr, 10, len - (ptr - buf))) { + op->buf_asm[0] = '\0'; + return op->size = 0; + } + sprintf(&op->buf_asm[strlen(op->buf_asm)], " %d", get_ws_val(ptr, len - (ptr - buf) -1)); + return op->size = ptr - buf + test_ws_token_exist(ptr, 10, len - (ptr - buf)) + 1; + } + } + sprintf(op->buf_asm,"wtf"); + return op->size = 0; +} diff --git a/libr/asm/p/Makefile b/libr/asm/p/Makefile index ad98a2abbe..1deacb8f71 100644 --- a/libr/asm/p/Makefile +++ b/libr/asm/p/Makefile @@ -13,7 +13,7 @@ ALL_TARGETS= # TODO: rename to enabled plugins ARCHS=mips.mk sparc.mk java.mk bf.mk arm.mk dalvik.mk x86_as.mk x86_nz.mk ARCHS+=ppc.mk x86_olly.mk x86.mk csr.mk x86_nasm.mk psosvm.mk avr.mk -ARCHS+=msil.mk sh.mk arm_winedbg.mk c55plus.mk gb.mk snes.mk ebc.mk malbolge.mk +ARCHS+=msil.mk sh.mk arm_winedbg.mk c55plus.mk gb.mk snes.mk ebc.mk malbolge.mk ws.mk include $(ARCHS) all: ${ALL_TARGETS} diff --git a/libr/asm/p/asm_malbolge.c b/libr/asm/p/asm_malbolge.c index bbbfc614db..82e6060766 100644 --- a/libr/asm/p/asm_malbolge.c +++ b/libr/asm/p/asm_malbolge.c @@ -17,13 +17,13 @@ static int mal_dis(RAsmOp *op, ut64 c, ut8 *buf, ut64 len) sprintf(op->buf_asm, "in a"); break; case 39: - sprintf(op->buf_asm, "rotr [d],\tmov a, [d]"); + sprintf(op->buf_asm, "rotr [d], mov a, [d]"); break; case 40: sprintf(op->buf_asm, "mov d, [d]"); break; case 62: - sprintf(op->buf_asm, "crz [d], a,\tmov a, [d]"); + sprintf(op->buf_asm, "crz [d], a, mov a, [d]"); break; case 81: sprintf(op->buf_asm, "end"); diff --git a/libr/asm/p/asm_ws.c b/libr/asm/p/asm_ws.c new file mode 100644 index 0000000000..de91cc55c8 --- /dev/null +++ b/libr/asm/p/asm_ws.c @@ -0,0 +1,31 @@ +/* radare - LGPL - Copyright 2014 - condret */ + +#include +#include +#include +#include +#include "../arch/whitespace/wsdis.c" + +int disassemble(RAsm *a, RAsmOp *op, const ut8 *buf, int len) { + return wsdis(op, buf, len); +} + +RAsmPlugin r_asm_plugin_ws = { + .name = "ws", + .desc = "Whitespace disassembly plugin", + .arch = "whitespace", + .license = "LGPL3", + .bits = 32, + .init = NULL, + .fini = NULL, + .disassemble = &disassemble, + .modify = NULL, + .assemble = NULL +}; + +#ifndef CORELIB +struct r_lib_struct_t radare_plugin = { + .type = R_LIB_TYPE_ASM, + .data = &r_asm_plugin_ws +}; +#endif diff --git a/libr/asm/p/ws.mk b/libr/asm/p/ws.mk new file mode 100644 index 0000000000..c372ca00ee --- /dev/null +++ b/libr/asm/p/ws.mk @@ -0,0 +1,9 @@ +OBJ_WS=asm_ws.o + +STATIC_OBJ+=${OBJ_WS} +TARGET_WS=asm_ws.${EXT_SO} + +ALL_TARGETS+=${TARGET_WS} + +${TARGET_WS}: ${OBJ_WS} + ${CC} ${call libname,asm_ws} ${CFLAGS} -o ${TARGET_WS} ${OBJ_WS} diff --git a/libr/include/r_anal.h b/libr/include/r_anal.h index 3264d976d4..ebfccaa7de 100644 --- a/libr/include/r_anal.h +++ b/libr/include/r_anal.h @@ -1116,7 +1116,7 @@ extern RAnalPlugin r_anal_plugin_ebc; extern RAnalPlugin r_anal_plugin_gb; extern RAnalPlugin r_anal_plugin_nios2; extern RAnalPlugin r_anal_plugin_malbolge; - +extern RAnalPlugin r_anal_plugin_ws; #ifdef __cplusplus } #endif diff --git a/libr/include/r_asm.h b/libr/include/r_asm.h index 475a6290fe..68f9c66406 100644 --- a/libr/include/r_asm.h +++ b/libr/include/r_asm.h @@ -187,6 +187,7 @@ extern RAsmPlugin r_asm_plugin_snes; extern RAsmPlugin r_asm_plugin_ebc; extern RAsmPlugin r_asm_plugin_nios2; extern RAsmPlugin r_asm_plugin_malbolge; +extern RAsmPlugin r_asm_plugin_ws; #endif #ifdef __cplusplus diff --git a/plugins.def.cfg b/plugins.def.cfg index 8b901f06a6..8089cc47f5 100644 --- a/plugins.def.cfg +++ b/plugins.def.cfg @@ -34,6 +34,7 @@ asm.gb asm.snes asm.ebc asm.malbolge +asm.ws anal.sh anal.x86_udis anal.z80 @@ -55,6 +56,7 @@ anal.sparc anal.ebc anal.gb anal.malbolge +anal.ws bin.any bin.bios bin.bf