Improve brainfuck VM using ESIL and arch restrictions ##arch

* Instruction size must be 0xff instead of 32, to handle long jumps
* Registers are 32bit, so internal stack must use 32bit words
* Fix loop logic using simplified conditionals
This commit is contained in:
pancake 2023-04-01 10:27:17 +02:00
parent 58cc69f0dd
commit d8420d4dca
11 changed files with 107 additions and 72 deletions

View File

@ -331,7 +331,8 @@ R_API bool r_anal_set_triplet(RAnal *anal, R_NULLABLE const char *os, R_NULLABLE
if (bits != anal->config->bits) { if (bits != anal->config->bits) {
r_anal_set_bits (anal, bits); r_anal_set_bits (anal, bits);
} }
return r_anal_use (anal, arch); return true;
// return r_anal_use (anal, arch);
} }
// copypasta from core/cbin.c // copypasta from core/cbin.c
@ -402,6 +403,7 @@ R_API ut8 *r_anal_mask(RAnal *anal, int size, const ut8 *data, ut64 at) {
memset (ret, 0xff, size); memset (ret, 0xff, size);
// TODO: use the bitfliping thing to guess the mask in here
while (idx < size) { while (idx < size) {
if ((oplen = r_anal_op (anal, op, at, data + idx, size - idx, R_ARCH_OP_MASK_BASIC)) < 1) { if ((oplen = r_anal_op (anal, op, at, data + idx, size - idx, R_ARCH_OP_MASK_BASIC)) < 1) {
break; break;

View File

@ -156,11 +156,19 @@ static bool decode(RArchSession *as, RAnalOp *op, RArchDecodeMask mask) {
switch (buf[0]) { switch (buf[0]) {
case '[': case '[':
op->type = R_ANAL_OP_TYPE_CJMP; op->type = R_ANAL_OP_TYPE_CJMP;
// read ahead to find the ] bracket
op->jump = dst;
op->fail = addr + 1; op->fail = addr + 1;
buf = r_mem_dup ((void *)buf, len); RArch *a = as->arch;
if (!buf) { RIOReadAt read_at = NULL;
break; RBin *bin = R_UNWRAP2 (a, binb.bin);
if (bin && bin->iob.read_at) {
RIOReadAt read_at = bin->iob.read_at;
buf = malloc (0xff);
read_at (bin->iob.io, op->addr, buf, 0xff);
} }
r_strbuf_set (&op->esil, "1,pc,-,brk,=[4],4,brk,+=");
#if 1
{ {
const ut8 *p = buf + 1; const ut8 *p = buf + 1;
int lev = 0, i = 1; int lev = 0, i = 1;
@ -171,12 +179,10 @@ static bool decode(RArchSession *as, RAnalOp *op, RArchDecodeMask mask) {
} }
if (*p == ']') { if (*p == ']') {
lev--; lev--;
if (lev==-1) { if (lev == -1) {
dst = addr + (size_t)(p - buf) + 1; dst = addr + (size_t)(p - buf) + 1;
op->jump = dst; op->jump = dst;
r_strbuf_setf (&op->esil, r_strbuf_set (&op->esil, "1,pc,-,brk,=[4],4,brk,+=,");
"0x%"PFMT64x",brk,=[1],brk,++=,"
"ptr,[1],!,?{,0x%"PFMT64x",pc,=,brk,--=,}", addr, dst);
goto beach; goto beach;
} }
} }
@ -184,13 +190,16 @@ static bool decode(RArchSession *as, RAnalOp *op, RArchDecodeMask mask) {
op->type = R_ANAL_OP_TYPE_ILL; op->type = R_ANAL_OP_TYPE_ILL;
goto beach; goto beach;
} }
if (i == len - 1) { if (read_at && i == len - 1) {
break;
// XXX unnecessary just break
int new_buf_len = len + 1 + BUFSIZE_INC; int new_buf_len = len + 1 + BUFSIZE_INC;
ut8 *new_buf = calloc (new_buf_len, 1); ut8 *new_buf = calloc (new_buf_len, 1);
if (new_buf) { if (new_buf) {
free (buf); free (buf);
memcpy (new_buf, op->bytes, new_buf_len); memcpy (new_buf, op->bytes, new_buf_len);
buf = new_buf; buf = new_buf;
read_at (bin->iob.io, op->addr + i, buf + i, 0xff);
p = buf + i; p = buf + i;
len += BUFSIZE_INC; len += BUFSIZE_INC;
} }
@ -201,11 +210,11 @@ static bool decode(RArchSession *as, RAnalOp *op, RArchDecodeMask mask) {
} }
beach: beach:
free (buf); free (buf);
#endif
break; break;
case ']': case ']':
op->type = R_ANAL_OP_TYPE_UJMP; op->type = R_ANAL_OP_TYPE_UJMP;
// XXX This is wrong esil r_strbuf_set (&op->esil, "4,brk,-=,ptr,[1],?{,brk,[4],pc,=,}");
r_strbuf_set (&op->esil, "brk,--=,brk,[1],pc,=");
break; break;
case '>': case '>':
op->type = R_ANAL_OP_TYPE_ADD; op->type = R_ANAL_OP_TYPE_ADD;
@ -230,11 +239,11 @@ beach:
case '.': case '.':
// print element in stack to screen // print element in stack to screen
op->type = R_ANAL_OP_TYPE_STORE; op->type = R_ANAL_OP_TYPE_STORE;
r_strbuf_set (&op->esil, "ptr,[1],scr,=[1],scr,++="); r_strbuf_set (&op->esil, "ptr,[1],scr,=[1],1,scr,+=");
break; break;
case ',': case ',':
op->type = R_ANAL_OP_TYPE_LOAD; op->type = R_ANAL_OP_TYPE_LOAD;
r_strbuf_set (&op->esil, "kbd,[1],ptr,=[1],kbd,++="); r_strbuf_set (&op->esil, "kbd,[1],ptr,=[1],1,kbd,+=");
break; break;
case 0x00: case 0x00:
case 0xff: case 0xff:
@ -248,15 +257,32 @@ beach:
return op->size; return op->size;
} }
static char *get_reg_profile(RArchSession *as) { static char *regs(RArchSession *as) {
if (as->config->bits == 8) {
return strdup (
"=PC pc\n"
"=BP brk\n"
"=SP ptr\n"
"=A0 tmp\n"
"=A1 tmp\n"
"=A2 tmp\n"
"=A3 tmp\n"
"gpr ptr .8 0 0\n" // data pointer
"gpr pc .8 4 0\n" // program counter
"gpr brk .8 8 0\n" // brackets
"gpr scr .32 12 0\n" // screen
"gpr kbd .32 16 0\n" // keyboard
"gpr tmp .32 20 0\n" // keyboard
);
}
return strdup ( return strdup (
"=PC pc\n" "=PC pc\n"
"=BP brk\n" "=BP brk\n"
"=SP ptr\n" "=SP ptr\n"
"=A0 rax\n" "=A0 ptr\n"
"=A1 rbx\n" "=A1 ptr\n"
"=A2 rcx\n" "=A2 ptr\n"
"=A3 rdx\n" "=A3 ptr\n"
"gpr ptr .32 0 0\n" // data pointer "gpr ptr .32 0 0\n" // data pointer
"gpr pc .32 4 0\n" // program counter "gpr pc .32 4 0\n" // program counter
"gpr brk .32 8 0\n" // brackets "gpr brk .32 8 0\n" // brackets
@ -277,7 +303,8 @@ static bool encode(RArchSession *as, RAnalOp *op, RArchEncodeMask mask) {
static int archinfo(RArchSession *as, ut32 q) { static int archinfo(RArchSession *as, ut32 q) {
switch (q) { switch (q) {
case R_ANAL_ARCHINFO_MAX_OP_SIZE: case R_ANAL_ARCHINFO_MAX_OP_SIZE:
return 32; return 0xff;
// return 32;
} }
return 1; return 1;
} }
@ -287,11 +314,11 @@ RArchPlugin r_arch_plugin_bf = {
.desc = "brainfuck code analysis plugin", .desc = "brainfuck code analysis plugin",
.license = "LGPL3", .license = "LGPL3",
.arch = "bf", .arch = "bf",
.bits = R_SYS_BITS_PACK2 (8, 32), .bits = R_SYS_BITS_PACK (32),
.endian = R_SYS_ENDIAN_NONE, .endian = R_SYS_ENDIAN_NONE,
.decode = &decode, .decode = &decode,
.encode = &encode, .encode = &encode,
.regs = get_reg_profile, .regs = regs,
.info = &archinfo .info = &archinfo
}; };

View File

@ -3,6 +3,7 @@
#include <r_arch.h> #include <r_arch.h>
#include <r_util.h> #include <r_util.h>
#if 0
static char* regs(RArchSession *as) { static char* regs(RArchSession *as) {
const char* profile = const char* profile =
"=PC null0\n" "=PC null0\n"
@ -13,6 +14,7 @@ static char* regs(RArchSession *as) {
"gpr null1 .32 ?1 0\n"; "gpr null1 .32 ?1 0\n";
return strdup (profile); return strdup (profile);
} }
#endif
RArchPlugin r_arch_plugin_null = { RArchPlugin r_arch_plugin_null = {
.name = "null", .name = "null",
@ -20,7 +22,7 @@ RArchPlugin r_arch_plugin_null = {
.arch = "none", .arch = "none",
.license = "LGPL3", .license = "LGPL3",
.bits = R_SYS_BITS_PACK4 (8, 16, 32, 64), .bits = R_SYS_BITS_PACK4 (8, 16, 32, 64),
.regs = regs // .regs = regs
}; };
#ifndef R2_PLUGIN_INCORE #ifndef R2_PLUGIN_INCORE

View File

@ -51,7 +51,8 @@ static RBinInfo *info(RBinFile *bf) {
eprintf ("\"e cmd.vprompt=pxa 32@stack;pxa 32@screen;pxa 32@data\"\n"); eprintf ("\"e cmd.vprompt=pxa 32@stack;pxa 32@screen;pxa 32@data\"\n");
eprintf ("s 0\n"); eprintf ("s 0\n");
eprintf ("e asm.bits=32\n"); eprintf ("e asm.bits=32\n");
eprintf ("dL bf\n"); eprintf ("e cmd.vprompt=pxa 32@stack;pxa 32@screen;pxa 32@data\n");
// eprintf ("dL bf\n");
return ret; return ret;
} }

View File

@ -989,7 +989,7 @@ R_API void r_cons_echo(const char *msg) {
static void optimize(void) { static void optimize(void) {
char *buf = C->buffer; char *buf = C->buffer;
int len = C->buffer_len; int len = C->buffer_len;
int i, codes = 0; int i;
int escape_n = 0; int escape_n = 0;
char escape[32]; char escape[32];
bool onescape = false; bool onescape = false;
@ -1022,7 +1022,6 @@ static void optimize(void) {
onescape = true; onescape = true;
escape[escape_n++] = buf[i]; escape[escape_n++] = buf[i];
escape[escape_n] = 0; escape[escape_n] = 0;
codes++;
} }
} }
// eprintf ("FROM %d TO %d (%d)%c", C->buffer_len, len, codes, 10); // eprintf ("FROM %d TO %d (%d)%c", C->buffer_len, len, codes, 10);

View File

@ -611,9 +611,6 @@ static bool linkcb(void *user, void *data, ut32 id) {
R_API bool r_core_bin_load(RCore *r, const char *filenameuri, ut64 baddr) { R_API bool r_core_bin_load(RCore *r, const char *filenameuri, ut64 baddr) {
r_return_val_if_fail (r && r->io, false); r_return_val_if_fail (r && r->io, false);
if (r_str_startswith (filenameuri, "malloc://")) {
return true;
}
R_CRITICAL_ENTER (r); R_CRITICAL_ENTER (r);
ut64 laddr = r_config_get_i (r->config, "bin.laddr"); ut64 laddr = r_config_get_i (r->config, "bin.laddr");
RBinFile *binfile = NULL; RBinFile *binfile = NULL;

View File

@ -153,6 +153,8 @@ static char *r_debug_bf_reg_profile(RDebug *dbg) {
"gpr inpi .32 24 0\n" "gpr inpi .32 24 0\n"
"gpr mem .32 28 0\n" "gpr mem .32 28 0\n"
"gpr memi .32 32 0\n" "gpr memi .32 32 0\n"
"gpr brk .32 36 0\n"
"gpr kbd .32 40 0\n"
); );
} }

View File

@ -1,4 +1,5 @@
NAME=bf decode NAME=bf decode
BROKEN=1
FILE=bins/bf/hello-ok.bf FILE=bins/bf/hello-ok.bf
CMDS=<<EOF CMDS=<<EOF
e asm.arch=bf e asm.arch=bf
@ -10,18 +11,18 @@ EOF
EXPECT=<<EOF EXPECT=<<EOF
address: 0xa address: 0xa
opcode: while [ptr] opcode: while [ptr]
esilcost: 4 esilcost: 12
disasm: while [ptr] disasm: while [ptr]
pseudo: while [ptr] pseudo: while [ptr]
mnemonic: while mnemonic: while
mask: 00 mask: ff
id: 1 id: 1
bytes: 5b bytes: 5b
size: 1 size: 1
sign: false sign: false
type: cjmp type: cjmp
cycles: 0 cycles: 0
esil: 0xa,brk,=[1],brk,++=,ptr,[1],!,?{,0x2a,pc,=,brk,--=,} esil: 1,pc,-,brk,=[4],4,brk,+=,
jump: 0x0000002a jump: 0x0000002a
fail: 0x0000000b fail: 0x0000000b
cond: al cond: al

View File

@ -1281,42 +1281,6 @@ EXPECT=<<EOF
EOF EOF
RUN RUN
NAME=delete memory format with Cf-
FILE=bins/dmg/src/Hello
CMDS=<<EOF
e scr.interactive=false
P-temp_proj
e prj.vc=false
e asm.arch=x86
e asm.bits=64
e asm.flags.inline=true
pd 1
Cf 1 x
pd 1
e scr.interactive=false
P+temp_proj
Cf-
pd 1
f jeje=0
e scr.interactive=false
Ps temp_proj
e scr.interactive=false
P temp_proj
pd 1
EOF
EXPECT=<<EOF
0x00000000 57 push rdi
0x00000000 pf x # size=1
0x00000000 = 0x6c726f57
0x00000000 57 push rdi
;-- jeje, rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15, rip, rbp, rflags, rsp:
0x00000000 57 push rdi
EOF
RUN
NAME=reflines offset 2 (ascii) NAME=reflines offset 2 (ascii)
FILE=bins/elf/analysis/ls-alxchk FILE=bins/elf/analysis/ls-alxchk
CMDS=<<EOF CMDS=<<EOF

View File

@ -808,6 +808,8 @@ RUN
NAME=Rename NAME=Rename
FILE=bins/elf/analysis/main FILE=bins/elf/analysis/main
CMDS=<<EOF CMDS=<<EOF
P-hello
P-world
e prj.vc = false e prj.vc = false
Ps hello Ps hello
Ps world Ps world
@ -817,3 +819,37 @@ EOF
EXPECT=<<EOF EXPECT=<<EOF
EOF EOF
RUN RUN
NAME=delete memory format with Cf-
FILE=bins/dmg/src/Hello
CMDS=<<EOF
e scr.interactive=false
P-temp_proj
e prj.vc=false
e asm.arch=x86
e asm.bits=64
e asm.flags.inline=true
pd 1
Cf 1 x
pd 1
e scr.interactive=false
P+temp_proj
Cf-
pd 1
f jeje=0
e scr.interactive=false
Ps temp_proj
e scr.interactive=false
P temp_proj
f-orax
pd 1
EOF
EXPECT=<<EOF
0x00000000 57 push rdi
0x00000000 pf x # size=1
0x00000000 = 0x6c726f57
0x00000000 57 push rdi
;-- jeje, rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15, rip, rbp, rflags, rsp:
0x00000000 57 push rdi
EOF
RUN

View File

@ -1,8 +1,8 @@
NAME=hello world NAME=hello world
FILE=bins/bf/hello-ok.bf FILE=bins/bf/hello-ok.bf
ARGS=-b32 ARGS=-a bf
CMDS=<<EOF CMDS=<<EOF
e asm.arch = bf -b32
f input 128 0x3000 f input 128 0x3000
o malloc://128 0x3000~z o malloc://128 0x3000~z
f screen 80*25 0x4000 f screen 80*25 0x4000
@ -17,6 +17,8 @@ ar scr=screen
ar kbd=input ar kbd=input
ar ptr=data ar ptr=data
s 0 s 0
ar PC=0
sr PC
150aes 150aes
ps 12 @ screen ps 12 @ screen
pd 3 @ 0xa pd 3 @ 0xa
@ -48,6 +50,7 @@ ar scr=screen
ar kbd=input ar kbd=input
ar ptr=data ar ptr=data
s 0 s 0
150aes
pd 3 @ 0xa pd 3 @ 0xa
EOF EOF
EXPECT=<<EOF EXPECT=<<EOF
@ -59,9 +62,8 @@ RUN
NAME=loopy hello world NAME=loopy hello world
FILE=bins/bf/hello-loops.bf FILE=bins/bf/hello-loops.bf
ARGS=-b32 ARGS=-a bf -b32
CMDS=<<EOF CMDS=<<EOF
e asm.arch = bf
f input 128 0x3000 f input 128 0x3000
o malloc://128 0x3000~z o malloc://128 0x3000~z
f screen 80*25 0x4000 f screen 80*25 0x4000
@ -75,8 +77,10 @@ ar brk=stack
ar scr=screen ar scr=screen
ar kbd=input ar kbd=input
ar ptr=data ar ptr=data
s 0 ar PC=0
sr PC
700aes 700aes
# aesu 0x0000006a
ps 12 @ screen ps 12 @ screen
EOF EOF
EXPECT=<<EOF EXPECT=<<EOF