From 1ffdedcd07982be902bbba8153d11499dfb04c0a Mon Sep 17 00:00:00 2001 From: FXTi Date: Wed, 13 May 2020 10:37:09 +0800 Subject: [PATCH] Add Python bytecode plugins (#16771) --- libr/anal/meson.build | 34 +- libr/anal/p/anal_pyc.c | 158 ++++ libr/anal/p/anal_rsp.c | 2 +- libr/anal/p/pyc.mk | 41 + libr/asm/arch/pyc/opcode.c | 298 +++++++ libr/asm/arch/pyc/opcode.h | 161 ++++ libr/asm/arch/pyc/opcode_10.c | 19 + libr/asm/arch/pyc/opcode_11.c | 15 + libr/asm/arch/pyc/opcode_12.c | 15 + libr/asm/arch/pyc/opcode_13.c | 19 + libr/asm/arch/pyc/opcode_14.c | 26 + libr/asm/arch/pyc/opcode_15.c | 125 +++ libr/asm/arch/pyc/opcode_16.c | 20 + libr/asm/arch/pyc/opcode_20.c | 22 + libr/asm/arch/pyc/opcode_21.c | 24 + libr/asm/arch/pyc/opcode_22.c | 19 + libr/asm/arch/pyc/opcode_23.c | 15 + libr/asm/arch/pyc/opcode_24.c | 21 + libr/asm/arch/pyc/opcode_25.c | 18 + libr/asm/arch/pyc/opcode_26.c | 18 + libr/asm/arch/pyc/opcode_27.c | 48 ++ libr/asm/arch/pyc/opcode_2x.c | 152 ++++ libr/asm/arch/pyc/opcode_30.c | 36 + libr/asm/arch/pyc/opcode_31.c | 28 + libr/asm/arch/pyc/opcode_32.c | 19 + libr/asm/arch/pyc/opcode_33.c | 21 + libr/asm/arch/pyc/opcode_34.c | 23 + libr/asm/arch/pyc/opcode_35.c | 43 + libr/asm/arch/pyc/opcode_36.c | 34 + libr/asm/arch/pyc/opcode_37.c | 27 + libr/asm/arch/pyc/opcode_38.c | 36 + libr/asm/arch/pyc/opcode_39.c | 33 + libr/asm/arch/pyc/opcode_3x.c | 165 ++++ libr/asm/arch/pyc/opcode_anal.c | 907 +++++++++++++++++++++ libr/asm/arch/pyc/opcode_arg_fmt.c | 69 ++ libr/asm/arch/pyc/pyc_dis.c | 208 +++++ libr/asm/arch/pyc/pyc_dis.h | 79 ++ libr/asm/meson.build | 32 + libr/asm/p/asm_pyc.c | 61 ++ libr/asm/p/pyc.mk | 43 + libr/bin/format/pyc/marshal.c | 1208 ++++++++++++++++++++++++++++ libr/bin/format/pyc/marshal.h | 76 ++ libr/bin/format/pyc/pyc.c | 23 + libr/bin/format/pyc/pyc.h | 16 + libr/bin/format/pyc/pyc_magic.c | 274 +++++++ libr/bin/format/pyc/pyc_magic.h | 20 + libr/bin/meson.build | 4 + libr/bin/p/bin_pyc.c | 142 ++++ libr/bin/p/pyc.mk | 14 + libr/include/r_anal.h | 1 + libr/include/r_asm.h | 1 + libr/include/r_bin.h | 1 + plugins.def.cfg | 3 + sys/clang-format-diff.py | 5 +- 54 files changed, 4918 insertions(+), 4 deletions(-) create mode 100644 libr/anal/p/anal_pyc.c create mode 100644 libr/anal/p/pyc.mk create mode 100644 libr/asm/arch/pyc/opcode.c create mode 100644 libr/asm/arch/pyc/opcode.h create mode 100644 libr/asm/arch/pyc/opcode_10.c create mode 100644 libr/asm/arch/pyc/opcode_11.c create mode 100644 libr/asm/arch/pyc/opcode_12.c create mode 100644 libr/asm/arch/pyc/opcode_13.c create mode 100644 libr/asm/arch/pyc/opcode_14.c create mode 100644 libr/asm/arch/pyc/opcode_15.c create mode 100644 libr/asm/arch/pyc/opcode_16.c create mode 100644 libr/asm/arch/pyc/opcode_20.c create mode 100644 libr/asm/arch/pyc/opcode_21.c create mode 100644 libr/asm/arch/pyc/opcode_22.c create mode 100644 libr/asm/arch/pyc/opcode_23.c create mode 100644 libr/asm/arch/pyc/opcode_24.c create mode 100644 libr/asm/arch/pyc/opcode_25.c create mode 100644 libr/asm/arch/pyc/opcode_26.c create mode 100644 libr/asm/arch/pyc/opcode_27.c create mode 100644 libr/asm/arch/pyc/opcode_2x.c create mode 100644 libr/asm/arch/pyc/opcode_30.c create mode 100644 libr/asm/arch/pyc/opcode_31.c create mode 100644 libr/asm/arch/pyc/opcode_32.c create mode 100644 libr/asm/arch/pyc/opcode_33.c create mode 100644 libr/asm/arch/pyc/opcode_34.c create mode 100644 libr/asm/arch/pyc/opcode_35.c create mode 100644 libr/asm/arch/pyc/opcode_36.c create mode 100644 libr/asm/arch/pyc/opcode_37.c create mode 100644 libr/asm/arch/pyc/opcode_38.c create mode 100644 libr/asm/arch/pyc/opcode_39.c create mode 100644 libr/asm/arch/pyc/opcode_3x.c create mode 100644 libr/asm/arch/pyc/opcode_anal.c create mode 100644 libr/asm/arch/pyc/opcode_arg_fmt.c create mode 100644 libr/asm/arch/pyc/pyc_dis.c create mode 100644 libr/asm/arch/pyc/pyc_dis.h create mode 100644 libr/asm/p/asm_pyc.c create mode 100644 libr/asm/p/pyc.mk create mode 100644 libr/bin/format/pyc/marshal.c create mode 100644 libr/bin/format/pyc/marshal.h create mode 100644 libr/bin/format/pyc/pyc.c create mode 100644 libr/bin/format/pyc/pyc.h create mode 100644 libr/bin/format/pyc/pyc_magic.c create mode 100644 libr/bin/format/pyc/pyc_magic.h create mode 100644 libr/bin/p/bin_pyc.c create mode 100644 libr/bin/p/pyc.mk diff --git a/libr/anal/meson.build b/libr/anal/meson.build index d7c0bfdb08..199725be0f 100644 --- a/libr/anal/meson.build +++ b/libr/anal/meson.build @@ -71,6 +71,7 @@ r_anal_sources = [ 'p/anal_ppc_cs.c', 'p/anal_ppc_gnu.c', 'p/anal_propeller.c', + 'p/anal_pyc.c', 'p/anal_riscv.c', 'p/anal_rsp.c', 'p/anal_sh.c', @@ -121,7 +122,38 @@ r_anal_sources = [ '../asm/arch/xtensa/gnu/xtensa-dis.c', '../asm/arch/xtensa/gnu/elf32-xtensa.c', '../asm/arch/xtensa/gnu/xtensa-isa.c', - '../asm/arch/xtensa/gnu/xtensa-modules.c' + '../asm/arch/xtensa/gnu/xtensa-modules.c', + '../asm/arch/pyc/opcode_10.c', + '../asm/arch/pyc/opcode_11.c', + '../asm/arch/pyc/opcode_12.c', + '../asm/arch/pyc/opcode_13.c', + '../asm/arch/pyc/opcode_14.c', + '../asm/arch/pyc/opcode_15.c', + '../asm/arch/pyc/opcode_16.c', + '../asm/arch/pyc/opcode_20.c', + '../asm/arch/pyc/opcode_21.c', + '../asm/arch/pyc/opcode_22.c', + '../asm/arch/pyc/opcode_23.c', + '../asm/arch/pyc/opcode_24.c', + '../asm/arch/pyc/opcode_25.c', + '../asm/arch/pyc/opcode_26.c', + '../asm/arch/pyc/opcode_27.c', + '../asm/arch/pyc/opcode_2x.c', + '../asm/arch/pyc/opcode_30.c', + '../asm/arch/pyc/opcode_31.c', + '../asm/arch/pyc/opcode_32.c', + '../asm/arch/pyc/opcode_33.c', + '../asm/arch/pyc/opcode_34.c', + '../asm/arch/pyc/opcode_35.c', + '../asm/arch/pyc/opcode_36.c', + '../asm/arch/pyc/opcode_37.c', + '../asm/arch/pyc/opcode_38.c', + '../asm/arch/pyc/opcode_39.c', + '../asm/arch/pyc/opcode_3x.c', + '../asm/arch/pyc/opcode_anal.c', + '../asm/arch/pyc/opcode_arg_fmt.c', + '../asm/arch/pyc/opcode.c', + '../asm/arch/pyc/pyc_dis.c' ] r_anal_inc = [ diff --git a/libr/anal/p/anal_pyc.c b/libr/anal/p/anal_pyc.c new file mode 100644 index 0000000000..9aece55a77 --- /dev/null +++ b/libr/anal/p/anal_pyc.c @@ -0,0 +1,158 @@ +/* radare - LGPL3 - Copyright 2016-2020 - FXTi */ + +#include +#include +#include +#include + +#include "../../asm/arch/pyc/pyc_dis.h" + +static pyc_opcodes *ops = NULL; + +static int archinfo(RAnal *anal, int query) { + if (!strcmp (anal->cpu, "x86")) { + return -1; + } + + switch (query) { + case R_ANAL_ARCHINFO_MIN_OP_SIZE: + return (anal->bits == 16)? 1: 2; + case R_ANAL_ARCHINFO_MAX_OP_SIZE: + return (anal->bits == 16)? 3: 2; + default: + return -1; + } +} + +static char *get_reg_profile(RAnal *anal) { + return strdup ( + "=PC pc\n" + "=BP bp\n" + "=SP sp\n" + "gpr sp .32 0 0\n" // stack pointer + "gpr pc .32 4 0\n" // program counter + "gpr bp .32 8 0\n" // base pointer // unused + ); +} + +static RList *get_pyc_code_obj(RAnal *anal) { + RBin *b = anal->binb.bin; + RBinPlugin *plugin = b->cur && b->cur->o? b->cur->o->plugin: NULL; + bool is_pyc = (plugin && strcmp (plugin->name, "pyc") == 0); + return is_pyc? b->cur->o->bin_obj: NULL; +} + +static int pyc_op(RAnal *a, RAnalOp *op, ut64 addr, const ut8 *data, int len, RAnalOpMask mask) { + RList *cobjs = r_list_get_n (get_pyc_code_obj (a), 0); + RListIter *iter = NULL; + pyc_code_object *func = NULL, *t = NULL; + r_list_foreach (cobjs, iter, t) { + if (R_BETWEEN (t->start_offset, addr, t->end_offset - 1)) { // addr in [start_offset, end_offset) + func = t; + break; + } + } + if (!func) { + return -1; + } + + ut64 func_base = func->start_offset; + ut32 extended_arg = 0, oparg; + ut8 op_code = data[0]; + op->jump = UT64_MAX; + op->fail = UT64_MAX; + op->ptr = op->val = UT64_MAX; + op->addr = addr; + op->sign = true; + op->type = R_ANAL_OP_TYPE_ILL; + op->id = op_code; + + if (!ops || !pyc_opcodes_equal (ops, a->cpu)) { + if (!(ops = get_opcode_by_version (a->cpu))) { + return -1; + } + } + bool is_python36 = a->bits == 8; + pyc_opcode_object *op_obj = &ops->opcodes[op_code]; + if (!op_obj->op_name) { + op->type = R_ANAL_OP_TYPE_ILL; + op->size = 1; + goto anal_end; + } + + op->size = is_python36? 2: ((op_code >= ops->have_argument)? 3: 1); + + if (op_code >= ops->have_argument) { + if (!is_python36) { + oparg = data[1] + data[2] * 256 + extended_arg; + } else { + oparg = data[1] + extended_arg; + } + extended_arg = 0; + if (op_code == ops->extended_arg) { + extended_arg = is_python36? (oparg << 8): (oparg * 65536); + } + } + + if (op_obj->type & HASJABS) { + op->type = R_ANAL_OP_TYPE_JMP; + op->jump = func_base + oparg; + + if (op_obj->type & HASCONDITION) { + op->type = R_ANAL_OP_TYPE_CJMP; + op->fail = addr + ((is_python36)? 2: 3); + } + goto anal_end; + } + if (op_obj->type & HASJREL) { + op->type = R_ANAL_OP_TYPE_JMP; + op->jump = addr + oparg + ((is_python36)? 2: 3); + op->fail = addr + ((is_python36)? 2: 3); + + if (op_obj->type & HASCONDITION) { + op->type = R_ANAL_OP_TYPE_CJMP; + //op->fail = addr + ((is_python36)? 2: 3); + } + //goto anal_end; + } + + if (op_obj->type & HASCOMPARE) { + op->type = R_ANAL_OP_TYPE_CMP; + goto anal_end; + } + + anal_pyc_op (op, op_obj, oparg); + +anal_end: + //free_opcode (ops); + return op->size; +} + +static int finish(void *user) { + if (ops) { + free_opcode (ops); + ops = NULL; + } + return 0; +} + +RAnalPlugin r_anal_plugin_pyc = { + .name = "pyc", + .desc = "Python bytecode analysis plugin", + .license = "LGPL3", + .arch = "pyc", + .bits = 16 | 8, // Partially agree with this + .archinfo = archinfo, + .get_reg_profile = get_reg_profile, + .op = &pyc_op, + .esil = false, + .fini = &finish, +}; + +#ifndef R2_PLUGIN_INCORE +R_API RLibStruct radare_plugin = { + .type = R_LIB_TYPE_ANAL, + .data = &r_anal_plugin_pyc, + .version = R2_VERSION +}; +#endif diff --git a/libr/anal/p/anal_rsp.c b/libr/anal/p/anal_rsp.c index b30ae323f8..6736e4afef 100644 --- a/libr/anal/p/anal_rsp.c +++ b/libr/anal/p/anal_rsp.c @@ -11,7 +11,7 @@ #include #include #include -#include "rsp_idec.h" +#include "../../asm/arch/rsp/rsp_idec.h" static int rsp_op(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *b, int len, RAnalOpMask mask) { int i; diff --git a/libr/anal/p/pyc.mk b/libr/anal/p/pyc.mk new file mode 100644 index 0000000000..88a64d35a0 --- /dev/null +++ b/libr/anal/p/pyc.mk @@ -0,0 +1,41 @@ +PYC_ASM_ROOT=../../asm/arch/pyc/ +OBJ_PYC=anal_pyc.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_10.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_11.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_12.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_13.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_14.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_15.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_16.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_20.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_21.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_22.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_23.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_24.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_25.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_26.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_27.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_2x.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_30.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_31.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_32.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_33.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_34.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_35.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_36.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_37.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_38.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_39.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_3x.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_anal.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode_arg_fmt.o +OBJ_PYC+=$(PYC_ASM_ROOT)/opcode.o + +STATIC_OBJ+=${OBJ_PYC} +TARGET_PYC=anal_pyc.$(EXT_SO) + +ALL_TARGETS+=${TARGET_PYC} +CFLAGS+=-I$(PYC_ROOT) + +${TARGET_PYC}: ${OBJ_PYC} + ${CC} $(call libname,anal_pyc) ${CFLAGS} $(LDFLAGS) -o ${TARGET_PYC} ${OBJ_PYC} -lr_util diff --git a/libr/asm/arch/pyc/opcode.c b/libr/asm/arch/pyc/opcode.c new file mode 100644 index 0000000000..249cf311de --- /dev/null +++ b/libr/asm/arch/pyc/opcode.c @@ -0,0 +1,298 @@ +#include "opcode.h" + +static version_opcode version_op[] = { + { "1.0.1", opcode_10 }, + { "1.1", opcode_11 }, + { "1.2", opcode_12 }, + { "1.3b1", opcode_13 }, + { "1.4", opcode_14 }, + { "1.4b1", opcode_14 }, + { "1.5a1", opcode_15 }, + { "1.6a2", opcode_16 }, + { "2.0b1", opcode_20 }, + { "2.1a1", opcode_21 }, + { "2.1a2", opcode_21 }, + { "2.2a0", opcode_22 }, + { "2.2a1", opcode_22 }, + { "2.3a0", opcode_23 }, + { "2.4a0", opcode_24 }, + { "2.4a2", opcode_24 }, + { "2.4a3", opcode_24 }, + { "2.5a0", opcode_25 }, + { "2.5b2", opcode_25 }, + { "2.5c3", opcode_25 }, + { "2.6a0", opcode_26 }, + { "2.6a1+", opcode_26 }, + { "2.7a0", opcode_27 }, + { "2.7a2+", opcode_27 }, + { "3.0a1", opcode_30 }, + { "3.0a1+", opcode_30 }, + { "3.0a2", opcode_30 }, + { "3.0a2+", opcode_30 }, + { "3.0a3+", opcode_30 }, + { "3.0a5+", opcode_30 }, + { "3.0x", opcode_30 }, + { "3.1a0", opcode_31 }, + { "3.2a0", opcode_32 }, + { "3.2a1+", opcode_32 }, + { "3.2a2+", opcode_33 }, + { "3.3.0a0", opcode_33 }, + { "3.3.0a1+", opcode_33 }, + { "3.3.0a3+", opcode_33 }, + { "3.3a0", opcode_33 }, + { "3.4.0a0", opcode_34 }, + { "3.4.0a3+", opcode_34 }, + { "3.4.0rc1+", opcode_34 }, + { "3.5.0a0", opcode_35 }, + { "3.5.0a4+", opcode_35 }, + { "3.5.0b1+", opcode_35 }, + { "3.5.0b2+", opcode_35 }, + { "3.6.0a0", opcode_36 }, + { "v3.6.0", opcode_36 }, + { "v3.6.0a2", opcode_36 }, + { "v3.6.0a3", opcode_36 }, + { "v3.6.0a4", opcode_36 }, + { "v3.6.0b1", opcode_36 }, + { "v3.6.0b2", opcode_36 }, + { "v3.6.0b3", opcode_36 }, + { "v3.6.0b4", opcode_36 }, + { "v3.6.0rc1", opcode_36 }, + { "v3.6.0rc2", opcode_36 }, + { "v3.6.1", opcode_36 }, + { "v3.6.10", opcode_36 }, + { "v3.6.10rc", opcode_36 }, + { "v3.6.1rc1", opcode_36 }, + { "v3.6.2", opcode_36 }, + { "v3.6.2rc1", opcode_36 }, + { "v3.6.2rc2", opcode_36 }, + { "v3.6.3", opcode_36 }, + { "v3.6.3rc1", opcode_36 }, + { "v3.6.4", opcode_36 }, + { "v3.6.4rc1", opcode_36 }, + { "v3.6.5", opcode_36 }, + { "v3.6.5rc1", opcode_36 }, + { "v3.6.6", opcode_36 }, + { "v3.6.6rc1", opcode_36 }, + { "v3.6.7", opcode_36 }, + { "v3.6.7rc1", opcode_36 }, + { "v3.6.7rc2", opcode_36 }, + { "v3.6.8", opcode_36 }, + { "v3.6.8rc1", opcode_36 }, + { "v3.6.9", opcode_36 }, + { "v3.6.9rc1", opcode_36 }, + { "v3.7.0", opcode_37 }, + { "v3.7.0a1", opcode_37 }, + { "v3.7.0a2", opcode_37 }, + { "v3.7.0a3", opcode_37 }, + { "v3.7.0a4", opcode_37 }, + { "v3.7.0b1", opcode_37 }, + { "v3.7.0b2", opcode_37 }, + { "v3.7.0b3", opcode_37 }, + { "v3.7.0b4", opcode_37 }, + { "v3.7.0b5", opcode_37 }, + { "v3.7.0rc1", opcode_37 }, + { "v3.7.1", opcode_37 }, + { "v3.7.1rc1", opcode_37 }, + { "v3.7.1rc2", opcode_37 }, + { "v3.7.2", opcode_37 }, + { "v3.7.2rc1", opcode_37 }, + { "v3.7.3", opcode_37 }, + { "v3.7.3rc1", opcode_37 }, + { "v3.7.4", opcode_37 }, + { "v3.7.4rc1", opcode_37 }, + { "v3.7.4rc2", opcode_37 }, + { "v3.7.5", opcode_37 }, + { "v3.7.5rc1", opcode_37 }, + { "v3.7.6", opcode_37 }, + { "v3.7.6rc1", opcode_37 }, + { "v3.8.0", opcode_38 }, + { "v3.8.0a1", opcode_38 }, + { "v3.8.0a2", opcode_38 }, + { "v3.8.0a3", opcode_38 }, + { "v3.8.0a4", opcode_38 }, + { "v3.8.0b1", opcode_38 }, + { "v3.8.0b2", opcode_38 }, + { "v3.8.0b3", opcode_38 }, + { "v3.8.0b4", opcode_38 }, + { "v3.8.0rc1", opcode_38 }, + { "v3.8.1", opcode_38 }, + { "v3.8.1rc1", opcode_38 }, + { "v3.9.0a1", opcode_39 }, + { "v3.9.0a2", opcode_39 }, + { "v3.9.0a3", opcode_39 }, + { NULL, NULL }, +}; + +bool pyc_opcodes_equal(pyc_opcodes *op, const char *version) { + version_opcode *vop = version_op; + + while (vop->version) { + if (!strcmp (vop->version, version)) { + if (vop->opcode_func == (pyc_opcodes * (*)()) (op->version_sig)) { + return true; + } + } + vop++; + } + + return false; +} + +pyc_opcodes *get_opcode_by_version(char *version) { + version_opcode *vop = version_op; + + while (vop->version) { + if (!strcmp (vop->version, version)) { + return vop->opcode_func (); + } + vop++; + } + + return NULL; // No match version +} + +pyc_opcodes *new_pyc_opcodes() { + size_t i, j; + pyc_opcodes *ret = R_NEW0 (pyc_opcodes); + if (!ret) { + return NULL; + } + ret->have_argument = 90; + ret->opcodes = malloc (sizeof (pyc_opcode_object) * 256); + if (!ret->opcodes) { + free (ret); + return NULL; + } + for (i = 0; i < 256; i++) { + ret->opcodes[i].op_name = r_str_newf ("<%u>", i); + if (!ret->opcodes[i].op_name) { + for (j = 0; j < i; j++) { + free (ret->opcodes[j].op_name); + } + free (ret->opcodes); + R_FREE (ret); + return NULL; + } + ret->opcodes[i].type = 0; + ret->opcodes[i].op_code = i; + ret->opcodes[i].op_push = 0; + ret->opcodes[i].op_pop = 0; + } + + ret->opcode_arg_fmt = r_list_new (); + return ret; +} + +void free_opcode(pyc_opcodes *opcodes) { + size_t i; + for (i = 0; i < 256; i++) { + free (opcodes->opcodes[i].op_name); + } + free (opcodes->opcodes); + r_list_free (opcodes->opcode_arg_fmt); + free (opcodes); +} + +void add_arg_fmt(pyc_opcodes *ret, char *op_name, const char *(*formatter) (ut32 oparg)) { + pyc_arg_fmt *fmt = R_NEW0 (pyc_arg_fmt); + if (!fmt) { + return; + } + fmt->op_name = op_name; + fmt->formatter = formatter; + r_list_append (ret->opcode_arg_fmt, fmt); +} + +void (def_op)(struct op_parameter par) { + free (par.op_obj[par.op_code].op_name); + par.op_obj[par.op_code].op_name = strdup (par.op_name); + par.op_obj[par.op_code].op_code = par.op_code; + par.op_obj[par.op_code].op_pop = par.pop; + par.op_obj[par.op_code].op_push = par.push; + if (!par.fallthrough) { + par.op_obj[par.op_code].type |= NOFOLLOW; + } +} + +void (name_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + par.op_obj[par.op_code].type |= HASNAME; +} + +void (local_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + par.op_obj[par.op_code].type |= HASLOCAL; +} + +void (free_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + par.op_obj[par.op_code].type |= HASFREE; +} + +void (store_op)(struct op_parameter par) { + switch (par.func) { + case NAME_OP: + name_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + break; + case LOCAL_OP: + local_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + break; + case FREE_OP: + free_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + break; + case DEF_OP: + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + break; + default: + eprintf ("Error in store_op in opcode.c, call function %u.\n", par.func); + return; + } + par.op_obj[par.op_code].type |= HASSTORE; +} + +void (varargs_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + par.op_obj[par.op_code].type |= HASVARGS; +} + +void (const_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + par.op_obj[par.op_code].type |= HASCONST; +} + +void (compare_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + par.op_obj[par.op_code].type |= HASCOMPARE; +} + +void (jabs_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push, .fallthrough = par.fallthrough); + par.op_obj[par.op_code].type |= HASJABS; + if (par.conditional) { + par.op_obj[par.op_code].type |= HASCONDITION; + } +} + +void (jrel_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push, .fallthrough = par.fallthrough); + par.op_obj[par.op_code].type |= HASJREL; + if (par.conditional) { + par.op_obj[par.op_code].type |= HASCONDITION; + } +} + +void (nargs_op)(struct op_parameter par) { + def_op (.op_obj = par.op_obj, .op_name = par.op_name, .op_code = par.op_code, .pop = par.pop, .push = par.push); + par.op_obj[par.op_code].type |= HASNARGS; +} + +void (rm_op)(struct op_parameter par) { + pyc_opcode_object *op_obj = &par.op_obj[par.op_code]; + if (op_obj->op_code == par.op_code && !strcmp (op_obj->op_name, par.op_name)) { + free (op_obj->op_name); + op_obj->op_name = r_str_newf ("<%u>", par.op_code); + op_obj->type = op_obj->op_pop = op_obj->op_push = 0; + } else { + eprintf ("Error in rm_op() while constructing opcodes for .pyc file: \n .op_code = %u, .op_name = %s", par.op_code, par.op_name); + } +} diff --git a/libr/asm/arch/pyc/opcode.h b/libr/asm/arch/pyc/opcode.h new file mode 100644 index 0000000000..7aac461c20 --- /dev/null +++ b/libr/asm/arch/pyc/opcode.h @@ -0,0 +1,161 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#ifndef OPCODE_H +#define OPCODE_H + +#include +#include +#include +#include + +#define OBJECT_SIZE_ON_STACK 1 + +typedef enum { + HASCOMPARE = 0x1, + HASCONDITION = 0x2, // conditional operator; has jump offset + HASCONST = 0x4, + HASFREE = 0x8, + HASJABS = 0x10, // Will appear with HASCONDITION sometimes + HASJREL = 0x20, // Will appear with HASCONDITION sometimes + HASLOCAL = 0x40, + HASNAME = 0x80, + HASNARGS = 0x100, // For function-like calls + HASSTORE = 0x200, // Some sort of store operation + HASVARGS = 0x400, // Similar but for operators BUILD_xxx + NOFOLLOW = 0x800, // Instruction doesn't fall to the next opcode +} pyc_opcode_type; + +typedef enum { + NAME_OP = 0x1, + LOCAL_OP = 0x2, + FREE_OP = 0x4, + DEF_OP = 0x8, +} pyc_store_op_func; + +typedef struct { + char *op_name; + ut16 type; + ut8 op_code; + st8 op_push; + st8 op_pop; +} pyc_opcode_object; + +typedef struct { + ut8 extended_arg; + ut8 have_argument; + ut8 bits; + void *(*version_sig)(); + RList *opcode_arg_fmt; + pyc_opcode_object *opcodes; +} pyc_opcodes; + +typedef struct { + char *op_name; + const char *(*formatter)(ut32 oparg); +} pyc_arg_fmt; + +typedef struct { + char *version; + pyc_opcodes *(*opcode_func)(); +} version_opcode; + +typedef struct { + char *op_name; + void (*func)(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg); +} op_anal_func; + +void anal_pyc_op(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg); + +pyc_opcodes *opcode_2x(void); +pyc_opcodes *opcode_3x(void); +pyc_opcodes *opcode_10(void); +pyc_opcodes *opcode_11(void); +pyc_opcodes *opcode_12(void); +pyc_opcodes *opcode_13(void); +pyc_opcodes *opcode_14(void); +pyc_opcodes *opcode_15(void); +pyc_opcodes *opcode_16(void); +pyc_opcodes *opcode_20(void); +pyc_opcodes *opcode_21(void); +pyc_opcodes *opcode_22(void); +pyc_opcodes *opcode_23(void); +pyc_opcodes *opcode_24(void); +pyc_opcodes *opcode_25(void); +pyc_opcodes *opcode_26(void); +pyc_opcodes *opcode_27(void); +pyc_opcodes *opcode_30(void); +pyc_opcodes *opcode_31(void); +pyc_opcodes *opcode_32(void); +pyc_opcodes *opcode_33(void); +pyc_opcodes *opcode_34(void); +pyc_opcodes *opcode_35(void); +pyc_opcodes *opcode_36(void); +pyc_opcodes *opcode_37(void); +pyc_opcodes *opcode_38(void); +pyc_opcodes *opcode_39(void); + +pyc_opcodes *get_opcode_by_version(char *version); + +pyc_opcodes *new_pyc_opcodes(); +void free_opcode(pyc_opcodes *opcodes); +bool pyc_opcodes_equal(pyc_opcodes *op, const char *version); + +void add_arg_fmt(pyc_opcodes *ret, char *op_name, const char *(*formatter) (ut32 oparg)); + +const char *format_MAKE_FUNCTION_arg_3x(ut32 oparg); +const char *format_extended_arg(ut32 oparg); +const char *format_CALL_FUNCTION_pos_name_encoded(ut32 oparg); +const char *format_CALL_FUNCTION_KW_36(ut32 oparg); +const char *format_CALL_FUNCTION_EX_36(ut32 oparg); +const char *format_MAKE_FUNCTION_arg_36(ut32 oparg); +const char *format_value_flags_36(ut32 oparg); +const char *format_extended_arg_36(ut32 oparg); + +struct op_parameter { + pyc_opcode_object *op_obj; + const char *op_name; + ut8 op_code; + st8 pop; + st8 push; + pyc_store_op_func func; + bool conditional; + bool fallthrough; +}; + +#define def_op(...) def_op((struct op_parameter){ .pop = -2, .push = -2, .fallthrough = true, __VA_ARGS__ }) +void (def_op)(struct op_parameter par); + +#define name_op(...) name_op((struct op_parameter){ .pop = -2, .push = -2, __VA_ARGS__ }) +void (name_op)(struct op_parameter par); + +#define local_op(...) local_op((struct op_parameter){ .pop = 0, .push = 1, __VA_ARGS__ }) +void (local_op)(struct op_parameter par); + +#define free_op(...) free_op((struct op_parameter){ .pop = 0, .push = 1, __VA_ARGS__ }) +void (free_op)(struct op_parameter par); + +#define store_op(...) store_op((struct op_parameter){ .pop = 0, .push = 1, .func = DEF_OP, __VA_ARGS__ }) +void (store_op)(struct op_parameter par); + +#define varargs_op(...) varargs_op((struct op_parameter){ .pop = -1, .push = 1, __VA_ARGS__ }) +void (varargs_op)(struct op_parameter par); + +#define const_op(...) const_op((struct op_parameter){ .pop = 0, .push = 1, __VA_ARGS__ }) +void (const_op)(struct op_parameter par); + +#define compare_op(...) compare_op((struct op_parameter){ .pop = 2, .push = 1, __VA_ARGS__ }) +void (compare_op)(struct op_parameter par); + +#define jabs_op(...) jabs_op((struct op_parameter){ .pop = 0, .push = 0, .conditional = false, .fallthrough = true, __VA_ARGS__ }) +void (jabs_op)(struct op_parameter par); + +#define jrel_op(...) jrel_op((struct op_parameter){ .pop = 0, .push = 0, .conditional = false, .fallthrough = true, __VA_ARGS__ }) +void (jrel_op)(struct op_parameter par); + +#define nargs_op(...) nargs_op((struct op_parameter){ .pop = -2, .push = -2, __VA_ARGS__ }) +void (nargs_op)(struct op_parameter par); + +#define rm_op(...) rm_op((struct op_parameter){ __VA_ARGS__ }) +void (rm_op)(struct op_parameter par); + +#endif diff --git a/libr/asm/arch/pyc/opcode_10.c b/libr/asm/arch/pyc/opcode_10.c new file mode 100644 index 0000000000..846e49c8e3 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_10.c @@ -0,0 +1,19 @@ +#include "opcode.h" + +pyc_opcodes *opcode_10(void) { + pyc_opcodes *ret = opcode_11 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_10; + + // 1.0 - 1.1 bytecodes differences + rm_op (.op_obj = ret->opcodes, .op_name = "LOAD_GLOBALS", .op_code = 84); + rm_op (.op_obj = ret->opcodes, .op_name = "EXEC_STMT", .op_code = 85); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_11.c b/libr/asm/arch/pyc/opcode_11.c new file mode 100644 index 0000000000..d42ed26ae6 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_11.c @@ -0,0 +1,15 @@ +#include "opcode.h" + +pyc_opcodes *opcode_11(void) { + pyc_opcodes *ret = opcode_13 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_11; + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_12.c b/libr/asm/arch/pyc/opcode_12.c new file mode 100644 index 0000000000..da8ceeda14 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_12.c @@ -0,0 +1,15 @@ +#include "opcode.h" + +pyc_opcodes *opcode_12(void) { + pyc_opcodes *ret = opcode_13 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_12; + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_13.c b/libr/asm/arch/pyc/opcode_13.c new file mode 100644 index 0000000000..b57bc1c09f --- /dev/null +++ b/libr/asm/arch/pyc/opcode_13.c @@ -0,0 +1,19 @@ +#include "opcode.h" + +pyc_opcodes *opcode_13(void) { + pyc_opcodes *ret = opcode_14 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_13; + + // 1.3 - 1.4 bytecodes differences + rm_op (.op_obj = ret->opcodes, .op_name = "BINARY_POWER", .op_code = 19); + def_op (.op_obj = ret->opcodes, .op_name = "LOAD_GLOBALS", .op_code = 84); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_14.c b/libr/asm/arch/pyc/opcode_14.c new file mode 100644 index 0000000000..427322c3a3 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_14.c @@ -0,0 +1,26 @@ +#include "opcode.h" + +pyc_opcodes *opcode_14(void) { + pyc_opcodes *ret = opcode_15 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_14; + + // 1.4 Bytecodes not in 1.5 + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_CALL", .op_code = 14); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_CALL", .op_code = 26); + def_op (.op_obj = ret->opcodes, .op_name = "RAISE_EXCEPTION", .op_code = 81); + def_op (.op_obj = ret->opcodes, .op_name = "BUILD_FUNCTION", .op_code = 86); + varargs_op (.op_obj = ret->opcodes, .op_name = "UNPACK_ARG", .op_code = 94); // Number of arguments expected + varargs_op (.op_obj = ret->opcodes, .op_name = "UNPACK_VARARG", .op_code = 99); // Minimal number of arguments + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_LOCAL", .op_code = 115); + varargs_op (.op_obj = ret->opcodes, .op_name = "SET_FUNC_ARGS", .op_code = 117); // Argcount + varargs_op (.op_obj = ret->opcodes, .op_name = "RESERVE_FAST", .op_code = 123); // Number of local variables + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_15.c b/libr/asm/arch/pyc/opcode_15.c new file mode 100644 index 0000000000..800dbf8f5d --- /dev/null +++ b/libr/asm/arch/pyc/opcode_15.c @@ -0,0 +1,125 @@ +#include "opcode.h" + +pyc_opcodes *opcode_15(void) { + pyc_opcodes *ret = new_pyc_opcodes (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_15; + + def_op (.op_obj = ret->opcodes, .op_name = "STOP_CODE", .op_code = 0, .pop = 0, .push = 0, .fallthrough = false); + def_op (.op_obj = ret->opcodes, .op_name = "POP_TOP", .op_code = 1); + def_op (.op_obj = ret->opcodes, .op_name = "ROT_TWO", .op_code = 2); + def_op (.op_obj = ret->opcodes, .op_name = "ROT_THREE", .op_code = 3); + def_op (.op_obj = ret->opcodes, .op_name = "DUP_TOP", .op_code = 4); + + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_POSITIVE", .op_code = 10, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_NEGATIVE", .op_code = 11, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_NOT", .op_code = 12, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_CONVERT", .op_code = 13, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_INVERT", .op_code = 15, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_POWER", .op_code = 19, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_MULTIPLY", .op_code = 20, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_DIVIDE", .op_code = 21, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_MODULO", .op_code = 22, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_ADD", .op_code = 23, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_SUBTRACT", .op_code = 24, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_SUBSCR", .op_code = 25, .pop = 2, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_0", .op_code = 30, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_1", .op_code = 31, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_2", .op_code = 32, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_3", .op_code = 33, .pop = 3, .push = 1); + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_0", .op_code = 40, .pop = 2, .push = 0); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_1", .op_code = 41, .pop = 3, .push = 0); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_2", .op_code = 42, .pop = 3, .push = 0); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_3", .op_code = 43, .pop = 4, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_0", .op_code = 50, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_1", .op_code = 51, .pop = 2, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_2", .op_code = 52, .pop = 2, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_3", .op_code = 53, .pop = 3, .push = 0); + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SUBSCR", .op_code = 60, .pop = 3, .push = 0); // Implements TOS1[TOS] = TOS2. + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SUBSCR", .op_code = 61, .pop = 2, .push = 0); // Implements del TOS1[TOS]. + + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_LSHIFT", .op_code = 62, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_RSHIFT", .op_code = 63, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_AND", .op_code = 64, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_XOR", .op_code = 65, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_OR", .op_code = 66, .pop = 2, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_EXPR", .op_code = 70, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_ITEM", .op_code = 71, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_NEWLINE", .op_code = 72, .pop = 1, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "BREAK_LOOP", .op_code = 80, .pop = 0, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "LOAD_LOCALS", .op_code = 82, .pop = 0, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "RETURN_VALUE", .op_code = 83, .pop = 1, .push = 0, .fallthrough = false); + + def_op (.op_obj = ret->opcodes, .op_name = "EXEC_STMT", .op_code = 85, .pop = 3, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "POP_BLOCK", .op_code = 87, .pop = 0, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "END_FINALLY", .op_code = 88, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "BUILD_CLASS", .op_code = 89, .pop = 3, .push = 0); + + ret->have_argument = 90; // Opcodes from here have an argument: + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_NAME", .op_code = 90, .pop = 1, .push = 0, .func = NAME_OP); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_NAME", .op_code = 91, .pop = 0, .push = 0); // "" + varargs_op (.op_obj = ret->opcodes, .op_name = "UNPACK_TUPLE", .op_code = 92); // Number of tuple items + def_op (.op_obj = ret->opcodes, .op_name = "UNPACK_LIST", .op_code = 93); // Number of list items + store_op (.op_obj = ret->opcodes, .op_name = "STORE_ATTR", .op_code = 95, .pop = 2, .push = 0, .func = NAME_OP); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_ATTR", .op_code = 96, .pop = 1, .push = 0); // "" + store_op (.op_obj = ret->opcodes, .op_name = "STORE_GLOBAL", .op_code = 97, .pop = 1, .push = 0, .func = NAME_OP); // "" + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_GLOBAL", .op_code = 98, .pop = 0, .push = 0); // "" + + const_op (.op_obj = ret->opcodes, .op_name = "LOAD_CONST", .op_code = 100, .pop = 0, .push = 1); // Operand is in const list + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_NAME", .op_code = 101, .pop = 0, .push = 1); // Operand is in name list + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_TUPLE", .op_code = 102, .pop = -1, .push = 1); // Number of tuple items + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_LIST", .op_code = 103, .pop = -1, .push = 1); // Number of list items + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_MAP", .op_code = 104, .pop = -1, .push = 1); // Always zero for now + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_ATTR", .op_code = 105, .pop = 1, .push = 1); // Operand is in name list + compare_op (.op_obj = ret->opcodes, .op_name = "COMPARE_OP", .op_code = 106, .pop = 2, .push = 1); // Comparison operator + + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_NAME", .op_code = 107, .pop = 2, .push = 1); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_FROM", .op_code = 108, .pop = 0, .push = 1); // Operand is in name list + + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_FORWARD", .op_code = 110, .pop = 0, .push = 0); // Number of bytes to skip + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_FALSE", .op_code = 111, .pop = 1, .push = 1, .conditional = true); // "" + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_TRUE", .op_code = 112, .pop = 1, .push = 1, .conditional = true); // "" + jabs_op (.op_obj = ret->opcodes, .op_name = "JUMP_ABSOLUTE", .op_code = 113, .pop = 0, .push = 0); // Target byte offset from beginning of code + def_op (.op_obj = ret->opcodes, .op_name = "FOR_LOOP", .op_code = 114); // Number of bytes to skip + + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_GLOBAL", .op_code = 116, .pop = 0, .push = 1); // Operand is in name list + + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_LOOP", .op_code = 120, .pop = 0, .push = 0, .conditional = true); // Distance to target address + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_EXCEPT", .op_code = 121, .pop = 0, .push = 0, .conditional = true); // "" + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_FINALLY", .op_code = 122, .pop = 0, .push = 0, .conditional = true); // "" + + local_op (.op_obj = ret->opcodes, .op_name = "LOAD_FAST", .op_code = 124, .pop = 0, .push = 1); // Local variable number + store_op (.op_obj = ret->opcodes, .op_name = "STORE_FAST", .op_code = 125, .pop = 1, .push = 0, .func = LOCAL_OP); // Local variable number + local_op (.op_obj = ret->opcodes, .op_name = "DELETE_FAST", .op_code = 126); // Local variable number + + def_op (.op_obj = ret->opcodes, .op_name = "SET_LINENO", .op_code = 127); // Current line number + + def_op (.op_obj = ret->opcodes, .op_name = "RAISE_VARARGS", .op_code = 130, .pop = -1, .push = 0, .fallthrough = false); + // Number of raise arguments (1, 2, or 3) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION", .op_code = 131, .pop = -1, .push = 1); // //args + (//kwargs << 8) + + def_op (.op_obj = ret->opcodes, .op_name = "MAKE_FUNCTION", .op_code = 132, .pop = -1, .push = 1); // Number of args with default values + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_SLICE", .op_code = 133, .pop = -1, .push = 1); // Number of items + + def_op (.op_obj = ret->opcodes, .op_name = "EXTENDED_ARG", .op_code = 143); + ret->extended_arg = 143; + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_16.c b/libr/asm/arch/pyc/opcode_16.c new file mode 100644 index 0000000000..fb3ce804ca --- /dev/null +++ b/libr/asm/arch/pyc/opcode_16.c @@ -0,0 +1,20 @@ +#include "opcode.h" + +pyc_opcodes *opcode_16(void) { + pyc_opcodes *ret = opcode_15 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_16; + + // 1.6 Bytecodes not in 1.5 + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR", .op_code = 140, .pop = -1, .push = 1); // #args + (#kwargs << 8) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_KW", .op_code = 141, .pop = -1, .push = 1); // #args + (#kwargs << 8) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR_KW", .op_code = 142, .pop = -1, .push = 1); // #args + (#kwargs << 8) + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_20.c b/libr/asm/arch/pyc/opcode_20.c new file mode 100644 index 0000000000..978d200ece --- /dev/null +++ b/libr/asm/arch/pyc/opcode_20.c @@ -0,0 +1,22 @@ +#include "opcode.h" + +pyc_opcodes *opcode_20(void) { + pyc_opcodes *ret = opcode_21 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_20; + + // 2.1 Bytecodes not in 2.0 + rm_op (.op_obj = ret->opcodes, .op_name = "CONTINUE_LOOP", .op_code = 119); + rm_op (.op_obj = ret->opcodes, .op_name = "MAKE_CLOSURE", .op_code = 134); + rm_op (.op_obj = ret->opcodes, .op_name = "LOAD_CLOSURE", .op_code = 135); + rm_op (.op_obj = ret->opcodes, .op_name = "LOAD_DEREF", .op_code = 136); + rm_op (.op_obj = ret->opcodes, .op_name = "STORE_DEREF", .op_code = 137); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_21.c b/libr/asm/arch/pyc/opcode_21.c new file mode 100644 index 0000000000..60552c0e60 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_21.c @@ -0,0 +1,24 @@ +#include "opcode.h" + +pyc_opcodes *opcode_21(void) { + pyc_opcodes *ret = opcode_22 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_21; + + // 2.1 bytecodes changes from 2.2 + rm_op (.op_obj = ret->opcodes, .op_name = "BINARY_FLOOR_DIVIDE", .op_code = 26); + rm_op (.op_obj = ret->opcodes, .op_name = "BINARY_TRUE_DIVIDE", .op_code = 27); + rm_op (.op_obj = ret->opcodes, .op_name = "INPLACE_FLOOR_DIVIDE", .op_code = 28); + rm_op (.op_obj = ret->opcodes, .op_name = "INPLACE_TRUE_DIVIDE", .op_code = 29); + rm_op (.op_obj = ret->opcodes, .op_name = "GET_ITER", .op_code = 68); + rm_op (.op_obj = ret->opcodes, .op_name = "YIELD_VALUE", .op_code = 86); + rm_op (.op_obj = ret->opcodes, .op_name = "FOR_ITER", .op_code = 93); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_22.c b/libr/asm/arch/pyc/opcode_22.c new file mode 100644 index 0000000000..78a5af5d85 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_22.c @@ -0,0 +1,19 @@ +#include "opcode.h" + +pyc_opcodes *opcode_22(void) { + pyc_opcodes *ret = opcode_2x (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_22; + + // 2.2 Bytecodes not in 2.3 + def_op (.op_obj = ret->opcodes, .op_name = "FOR_LOOP", .op_code = 114); + def_op (.op_obj = ret->opcodes, .op_name = "SET_LINENO", .op_code = 127, .pop = 0, .push = 0); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_23.c b/libr/asm/arch/pyc/opcode_23.c new file mode 100644 index 0000000000..63cee0ba84 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_23.c @@ -0,0 +1,15 @@ +#include "opcode.h" + +pyc_opcodes *opcode_23(void) { + pyc_opcodes *ret = opcode_2x (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_23; + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_24.c b/libr/asm/arch/pyc/opcode_24.c new file mode 100644 index 0000000000..c66eb87ef6 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_24.c @@ -0,0 +1,21 @@ +#include "opcode.h" + +pyc_opcodes *opcode_24(void) { + pyc_opcodes *ret = opcode_2x (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_24; + + // Bytecodes added since 2.3 + def_op (.op_obj = ret->opcodes, .op_name = "NOP", .op_code = 9, .pop = 0, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "LIST_APPEND", .op_code = 18, .pop = 2, .push = 1); // Calls list.append(TOS[-i], TOS). + // Used to implement list comprehensions. + def_op (.op_obj = ret->opcodes, .op_name = "YIELD_VALUE", .op_code = 86, .pop = 1, .push = 0); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_25.c b/libr/asm/arch/pyc/opcode_25.c new file mode 100644 index 0000000000..f02b8abc82 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_25.c @@ -0,0 +1,18 @@ +#include "opcode.h" + +pyc_opcodes *opcode_25(void) { + pyc_opcodes *ret = opcode_24 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_25; + + // Bytecodes added in 2.5 from 2.4 + def_op (.op_obj = ret->opcodes, .op_name = "WITH_CLEANUP", .op_code = 81); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_26.c b/libr/asm/arch/pyc/opcode_26.c new file mode 100644 index 0000000000..4f7cc106c3 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_26.c @@ -0,0 +1,18 @@ +#include "opcode.h" + +pyc_opcodes *opcode_26(void) { + pyc_opcodes *ret = opcode_25 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_26; + + // Below are opcode changes since Python 2.5 + store_op (.op_obj = ret->opcodes, .op_name = "STORE_MAP", .op_code = 54, .pop = 3, .push = 1); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_27.c b/libr/asm/arch/pyc/opcode_27.c new file mode 100644 index 0000000000..42792e9702 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_27.c @@ -0,0 +1,48 @@ +#include "opcode.h" + +pyc_opcodes *opcode_27(void) { + pyc_opcodes *ret = opcode_26 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_27; + + // Below are opcode changes since Python 2.6 + rm_op (.op_obj = ret->opcodes, .op_name = "BUILD_MAP", .op_code = 104); + rm_op (.op_obj = ret->opcodes, .op_name = "LOAD_ATTR", .op_code = 105); + rm_op (.op_obj = ret->opcodes, .op_name = "COMPARE_OP", .op_code = 106); + rm_op (.op_obj = ret->opcodes, .op_name = "IMPORT_NAME", .op_code = 107); + rm_op (.op_obj = ret->opcodes, .op_name = "IMPORT_FROM", .op_code = 108); + rm_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_FALSE", .op_code = 111); + rm_op (.op_obj = ret->opcodes, .op_name = "EXTENDED_ARG", .op_code = 143); + rm_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_TRUE", .op_code = 112); + + def_op (.op_obj = ret->opcodes, .op_name = "LIST_APPEND", .op_code = 94, .pop = 2, .push = 1); // Calls list.append(TOS[-i], TOS). + // Used to implement list comprehensions. + def_op (.op_obj = ret->opcodes, .op_name = "BUILD_SET", .op_code = 104); // Number of set items + def_op (.op_obj = ret->opcodes, .op_name = "BUILD_MAP", .op_code = 105); + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_ATTR", .op_code = 106); + compare_op (.op_obj = ret->opcodes, .op_name = "COMPARE_OP", .op_code = 107); + + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_NAME", .op_code = 108, .pop = 2, .push = 1); // Index in name list + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_FROM", .op_code = 109, .pop = 0, .push = 1); + + jabs_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_FALSE_OR_POP", .op_code = 111, .conditional = true); // Target byte offset from beginning of code + jabs_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_TRUE_OR_POP", .op_code = 112, .conditional = true); // "" + jabs_op (.op_obj = ret->opcodes, .op_name = "POP_JUMP_IF_FALSE", .op_code = 114, .conditional = true); // "" + jabs_op (.op_obj = ret->opcodes, .op_name = "POP_JUMP_IF_TRUE", .op_code = 115, .conditional = true); // "" + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_WITH", .op_code = 143, .pop = 0, .push = 2); + + def_op (.op_obj = ret->opcodes, .op_name = "EXTENDED_ARG", .op_code = 145); + def_op (.op_obj = ret->opcodes, .op_name = "SET_ADD", .op_code = 146, .pop = 1, .push = 0); // Calls set.add(TOS1[-i], TOS). + // Used to implement set comprehensions. + def_op (.op_obj = ret->opcodes, .op_name = "MAP_ADD", .op_code = 147, .pop = 2, .push = 1); // Calls dict.setitem(TOS1[-i], TOS, TOS1) + // Used to implement dict comprehensions. + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + add_arg_fmt (ret, "CALL_FUNCTION", format_CALL_FUNCTION_pos_name_encoded); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_2x.c b/libr/asm/arch/pyc/opcode_2x.c new file mode 100644 index 0000000000..30526b9f43 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_2x.c @@ -0,0 +1,152 @@ +#include "opcode.h" + +pyc_opcodes *opcode_2x(void) { + pyc_opcodes *ret = new_pyc_opcodes (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_2x; + + def_op (.op_obj = ret->opcodes, .op_name = "STOP_CODE", .op_code = 0, .pop = 0, .push = 0, .fallthrough = false); + def_op (.op_obj = ret->opcodes, .op_name = "POP_TOP", .op_code = 1, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "ROT_TWO", .op_code = 2, .pop = 2, .push = 2); + def_op (.op_obj = ret->opcodes, .op_name = "ROT_THREE", .op_code = 3, .pop = 3, .push = 3); + def_op (.op_obj = ret->opcodes, .op_name = "DUP_TOP", .op_code = 4, .pop = 0, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "ROT_FOUR", .op_code = 5, .pop = 4, .push = 4); + + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_POSITIVE", .op_code = 10, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_NEGATIVE", .op_code = 11, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_NOT", .op_code = 12, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_CONVERT", .op_code = 13, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_INVERT", .op_code = 15, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_POWER", .op_code = 19, .pop = 2, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_MULTIPLY", .op_code = 20, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_DIVIDE", .op_code = 21, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_MODULO", .op_code = 22, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_ADD", .op_code = 23, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_SUBTRACT", .op_code = 24, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_SUBSCR", .op_code = 25, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_FLOOR_DIVIDE", .op_code = 26, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_TRUE_DIVIDE", .op_code = 27, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_FLOOR_DIVIDE", .op_code = 28, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_TRUE_DIVIDE", .op_code = 29, .pop = 2, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_0", .op_code = 30, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_1", .op_code = 31, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_2", .op_code = 32, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "SLICE_3", .op_code = 33, .pop = 3, .push = 1); + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_0", .op_code = 40, .pop = 2, .push = 0); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_1", .op_code = 41, .pop = 3, .push = 0); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_2", .op_code = 42, .pop = 3, .push = 0); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SLICE_3", .op_code = 43, .pop = 4, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_0", .op_code = 50, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_1", .op_code = 51, .pop = 2, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_2", .op_code = 52, .pop = 2, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SLICE_3", .op_code = 53, .pop = 3, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_ADD", .op_code = 55, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_SUBTRACT", .op_code = 56, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_MULTIPLY", .op_code = 57, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_DIVIDE", .op_code = 58, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_MODULO", .op_code = 59, .pop = 2, .push = 1); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SUBSCR", .op_code = 60, .pop = 3, .push = 0); // Implements TOS1[TOS] = TOS2. + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SUBSCR", .op_code = 61, .pop = 2, .push = 0); // Implements del TOS1[TOS]. + + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_LSHIFT", .op_code = 62, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_RSHIFT", .op_code = 63, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_AND", .op_code = 64, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_XOR", .op_code = 65, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_OR", .op_code = 66, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_POWER", .op_code = 67, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "GET_ITER", .op_code = 68, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_EXPR", .op_code = 70, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_ITEM", .op_code = 71, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_NEWLINE", .op_code = 72, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_ITEM_TO", .op_code = 73, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_NEWLINE_TO", .op_code = 74, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_LSHIFT", .op_code = 75, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_RSHIFT", .op_code = 76, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_AND", .op_code = 77, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_XOR", .op_code = 78, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_OR", .op_code = 79, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BREAK_LOOP", .op_code = 80, .pop = 0, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "LOAD_LOCALS", .op_code = 82, .pop = 0, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "RETURN_VALUE", .op_code = 83, .pop = 1, .push = 0, .fallthrough = false); + def_op (.op_obj = ret->opcodes, .op_name = "IMPORT_STAR", .op_code = 84, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "EXEC_STMT", .op_code = 85, .pop = 3, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "YIELD_VALUE", .op_code = 86, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "POP_BLOCK", .op_code = 87, .pop = 0, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "END_FINALLY", .op_code = 88, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "BUILD_CLASS", .op_code = 89, .pop = 3, .push = 0); + + ret->have_argument = 90; // Opcodes from here have an argument: + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_NAME", .op_code = 90, .pop = 1, .push = 0, .func = NAME_OP); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_NAME", .op_code = 91, .pop = 0, .push = 0); // "" + varargs_op (.op_obj = ret->opcodes, .op_name = "UNPACK_SEQUENCE", .op_code = 92, .pop = 9, .push = 1); // TOS is number of tuple items + jrel_op (.op_obj = ret->opcodes, .op_name = "FOR_ITER", .op_code = 93, .pop = 9, .push = 1); // TOS is read + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_ATTR", .op_code = 95, .pop = 2, .push = 0, .func = NAME_OP); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_ATTR", .op_code = 96, .pop = 1, .push = 0); // "" + store_op (.op_obj = ret->opcodes, .op_name = "STORE_GLOBAL", .op_code = 97, .pop = 1, .push = 0, .func = NAME_OP); // "" + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_GLOBAL", .op_code = 98, .pop = 0, .push = 0); // "" + def_op (.op_obj = ret->opcodes, .op_name = "DUP_TOPX", .op_code = 99, .pop = 1, .push = -1); // number of items to duplicate + const_op (.op_obj = ret->opcodes, .op_name = "LOAD_CONST", .op_code = 100, .pop = 0, .push = 1); // Operand is in const list + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_NAME", .op_code = 101, .pop = 0, .push = 1); // Operand is in name list + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_TUPLE", .op_code = 102, .pop = 9, .push = 1); // TOS is number of tuple items + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_LIST", .op_code = 103, .pop = 9, .push = 1); // TOS is number of list items + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_MAP", .op_code = 104, .pop = 0, .push = 1); // TOS is number of kwark items. Always zero for now + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_ATTR", .op_code = 105, .pop = 1, .push = 1); // Operand is in name list + compare_op (.op_obj = ret->opcodes, .op_name = "COMPARE_OP", .op_code = 106, .pop = 2, .push = 1); // Comparison operator + + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_NAME", .op_code = 107, .pop = 2, .push = 1); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_FROM", .op_code = 108, .pop = 0, .push = 1); // Operand is in name list + + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_FORWARD", .op_code = 110, .pop = 0, .push = 0, .fallthrough = false); // Number of bytes to skip + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_FALSE", .op_code = 111, .pop = 1, .push = 1, .conditional = true); // "" + + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_TRUE", .op_code = 112, .pop = 1, .push = 1, .conditional = true); // "" + jabs_op (.op_obj = ret->opcodes, .op_name = "JUMP_ABSOLUTE", .op_code = 113, .pop = 0, .push = 0, .fallthrough = false); // Target byte offset from beginning of code + + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_GLOBAL", .op_code = 116, .pop = 0, .push = 1); // Operand is in name list + + jabs_op (.op_obj = ret->opcodes, .op_name = "CONTINUE_LOOP", .op_code = 119, .pop = 0, .push = 0, .fallthrough = false); // Target address + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_LOOP", .op_code = 120, .pop = 0, .push = 0, .conditional = true); // Distance to target address + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_EXCEPT", .op_code = 121, .pop = 0, .push = 6, .conditional = true); // "" + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_FINALLY", .op_code = 122, .pop = 0, .push = 7, .conditional = true); // "" + + local_op (.op_obj = ret->opcodes, .op_name = "LOAD_FAST", .op_code = 124, .pop = 0, .push = 1); // Local variable number + store_op (.op_obj = ret->opcodes, .op_name = "STORE_FAST", .op_code = 125, .pop = 1, .push = 0, .func = LOCAL_OP); // Local variable number + local_op (.op_obj = ret->opcodes, .op_name = "DELETE_FAST", .op_code = 126); // Local variable number + + def_op (.op_obj = ret->opcodes, .op_name = "RAISE_VARARGS", .op_code = 130, .pop = 1, .push = 0, .fallthrough = false); // Number of raise arguments (1, 2, or 3) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION", .op_code = 131, .pop = 9, .push = 1); // TOS is //args + (//kwargs << 8) + + def_op (.op_obj = ret->opcodes, .op_name = "MAKE_FUNCTION", .op_code = 132, .pop = 9, .push = 1); // TOS is number of args with default values + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_SLICE", .op_code = 133, .pop = 9, .push = 1); // TOS is number of items + + def_op (.op_obj = ret->opcodes, .op_name = "MAKE_CLOSURE", .op_code = 134, .pop = 9, .push = 1); + free_op (.op_obj = ret->opcodes, .op_name = "LOAD_CLOSURE", .op_code = 135, .pop = 0, .push = 1); + free_op (.op_obj = ret->opcodes, .op_name = "LOAD_DEREF", .op_code = 136, .pop = 0, .push = 1); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_DEREF", .op_code = 137, .pop = 1, .push = 0, .func = FREE_OP); + + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR", .op_code = 140, .pop = -1, .push = 1); // #args + (#kwargs << 8) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_KW", .op_code = 141, .pop = -1, .push = 1); // #args + (#kwargs << 8) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR_KW", .op_code = 142, .pop = -1, .push = 1); // #args + (#kwargs << 8) + + def_op (.op_obj = ret->opcodes, .op_name = "EXTENDED_ARG", 143); + ret->extended_arg = 143; + + r_list_purge (ret->opcode_arg_fmt); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_30.c b/libr/asm/arch/pyc/opcode_30.c new file mode 100644 index 0000000000..4a6524e935 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_30.c @@ -0,0 +1,36 @@ +#include "opcode.h" + +pyc_opcodes *opcode_30(void) { + pyc_opcodes *ret = opcode_31 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_30; + + // These are in Python 3.x but not in Python 3.0 + + rm_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_FALSE_OR_POP", .op_code = 111); + rm_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_TRUE_OR_POP", .op_code = 112); + rm_op (.op_obj = ret->opcodes, .op_name = "POP_JUMP_IF_FALSE", .op_code = 114); + rm_op (.op_obj = ret->opcodes, .op_name = "POP_JUMP_IF_TRUE", .op_code = 115); + rm_op (.op_obj = ret->opcodes, .op_name = "LIST_APPEND", .op_code = 145); + rm_op (.op_obj = ret->opcodes, .op_name = "MAP_ADD", .op_code = 147); + + // These are are in 3.0 but are not in 3.1 or they have + // different opcode numbers. Note: As a result of opcode value + // changes, these have to be applied *after* removing ops (with + // the same name). + + def_op (.op_obj = ret->opcodes, .op_name = "SET_ADD", .op_code = 17, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "LIST_APPEND", .op_code = 18, .pop = 2, .push = 1); + + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_FALSE", .op_code = 111, .pop = 1, .push = 1, .conditional = true); + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_TRUE", .op_code = 112, .pop = 1, .push = 1, .conditional = true); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_3x); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_31.c b/libr/asm/arch/pyc/opcode_31.c new file mode 100644 index 0000000000..ca638282b4 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_31.c @@ -0,0 +1,28 @@ +#include "opcode.h" + +pyc_opcodes *opcode_31(void) { + pyc_opcodes *ret = opcode_32 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_31; + + // These are in Python 3.2 but not in Python 3.1 + rm_op (.op_obj = ret->opcodes, .op_name = "DUP_TOP_TWO", .op_code = 5); + rm_op (.op_obj = ret->opcodes, .op_name = "DELETE_DEREF", .op_code = 138); + rm_op (.op_obj = ret->opcodes, .op_name = "SETUP_WITH", .op_code = 143); + + // These are in Python 3.1 but not Python 3.2 + def_op (.op_obj = ret->opcodes, .op_name = "ROT_FOUR", .op_code = 5); + def_op (.op_obj = ret->opcodes, .op_name = "DUP_TOPX", .op_code = 99); + + // This op is in 3.2 but its opcode is a 144 instead + def_op (.op_obj = ret->opcodes, .op_name = "EXTENDED_ARG", .op_code = 143); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_3x); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_32.c b/libr/asm/arch/pyc/opcode_32.c new file mode 100644 index 0000000000..fe8dfdfefc --- /dev/null +++ b/libr/asm/arch/pyc/opcode_32.c @@ -0,0 +1,19 @@ +#include "opcode.h" + +pyc_opcodes *opcode_32(void) { + pyc_opcodes *ret = opcode_3x (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_32; + + // There are no opcodes to add or change. + // If there were, they'd be listed below. + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_3x); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_33.c b/libr/asm/arch/pyc/opcode_33.c new file mode 100644 index 0000000000..fc101250c3 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_33.c @@ -0,0 +1,21 @@ +#include "opcode.h" + +pyc_opcodes *opcode_33(void) { + pyc_opcodes *ret = opcode_3x (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_33; + + // Below are opcode changes since Python 3.2 + + rm_op (.op_obj = ret->opcodes, .op_name = "STOP_CODE", .op_code = 0); + def_op (.op_obj = ret->opcodes, .op_name = "YIELD_FROM", .op_code = 72, .pop = 1, .push = 0); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_3x); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_34.c b/libr/asm/arch/pyc/opcode_34.c new file mode 100644 index 0000000000..dd51c2d940 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_34.c @@ -0,0 +1,23 @@ +#include "opcode.h" + +pyc_opcodes *opcode_34(void) { + pyc_opcodes *ret = opcode_33 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_34; + + // These are removed since Python 3.3 + rm_op (.op_obj = ret->opcodes, .op_name = "STORE_LOCALS", .op_code = 69); + + // These are new since Python 3.3 + def_op (.op_obj = ret->opcodes, .op_name = "YIELD_FROM", .op_code = 72); + free_op (.op_obj = ret->opcodes, .op_name = "LOAD_CLASSDEREF", .op_code = 148); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_3x); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_35.c b/libr/asm/arch/pyc/opcode_35.c new file mode 100644 index 0000000000..162e4a2fd3 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_35.c @@ -0,0 +1,43 @@ +#include "opcode.h" + +pyc_opcodes *opcode_35(void) { + pyc_opcodes *ret = opcode_34 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_35; + + // These are removed since Python 3.5. + // Removals happen before adds since + // some opcodes are reused + rm_op (.op_obj = ret->opcodes, .op_name = "STORE_MAP", .op_code = 54); + rm_op (.op_obj = ret->opcodes, .op_name = "WITH_CLEANUP", .op_code = 81); + + // These are new since Python 3.5 + // OP NAME OPCODE POP PUSH + //--------------------------------------------------- + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_MATRIX_MULTIPLY", .op_code = 16, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_MATRIX_MULTIPLY", .op_code = 17, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "GET_AITER", .op_code = 50, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "GET_ANEXT", .op_code = 51, .pop = 0, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BEFORE_ASYNC_WITH", .op_code = 52); + def_op (.op_obj = ret->opcodes, .op_name = "GET_YIELD_FROM_ITER", .op_code = 69, .pop = 0, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "GET_AWAITABLE", .op_code = 73, .pop = 0, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "WITH_CLEANUP_START", .op_code = 81, .pop = 0, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "WITH_CLEANUP_FINISH", .op_code = 82, .pop = -1, .push = 1); + + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_LIST_UNPACK", .op_code = 149, .pop = -1, .push = 1); + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_MAP_UNPACK", .op_code = 150, .pop = -1, .push = 1); + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_MAP_UNPACK_WITH_CALL", .op_code = 151, .pop = -1, .push = 1); + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_TUPLE_UNPACK", .op_code = 152, .pop = -1, .push = 1); + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_SET_UNPACK", .op_code = 153, .pop = -1, .push = 1); + + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_ASYNC_WITH", .op_code = 154, .pop = 0, .push = 6); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_3x); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_36.c b/libr/asm/arch/pyc/opcode_36.c new file mode 100644 index 0000000000..ce89a79fcf --- /dev/null +++ b/libr/asm/arch/pyc/opcode_36.c @@ -0,0 +1,34 @@ +#include "opcode.h" + +pyc_opcodes *opcode_36(void) { + pyc_opcodes *ret = opcode_35 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_36; + + // These are removed since Python 3.6 + rm_op (.op_obj = ret->opcodes, .op_name = "MAKE_CLOSURE", .op_code = 134); + rm_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR", .op_code = 140); + rm_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR_KW", .op_code = 142); + + // These are new since Python 3.6 + store_op (.op_obj = ret->opcodes, .op_name = "STORE_ANNOTATION", .op_code = 127, .func = NAME_OP); // Index in name list + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_ASYNC_WITH", .op_code = 154); + def_op (.op_obj = ret->opcodes, .op_name = "FORMAT_VALUE", .op_code = 155); + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_CONST_KEY_MAP", .op_code = 156, .pop = -1, .push = 1); // TOS is count of kwargs + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_EX", .op_code = 142, .pop = -1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "SETUP_ANNOTATIONS", .op_code = 85); + def_op (.op_obj = ret->opcodes, .op_name = "BUILD_STRING", .op_code = 157); + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_TUPLE_UNPACK_WITH_CALL", .op_code = 158); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "CALL_FUNCTION_KW", format_CALL_FUNCTION_KW_36); + add_arg_fmt (ret, "CALL_FUNCTION_EX", format_CALL_FUNCTION_EX_36); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_36); + add_arg_fmt (ret, "FORMAT_VALUE", format_value_flags_36); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg_36); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_37.c b/libr/asm/arch/pyc/opcode_37.c new file mode 100644 index 0000000000..188f541258 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_37.c @@ -0,0 +1,27 @@ +#include "opcode.h" + +pyc_opcodes *opcode_37(void) { + pyc_opcodes *ret = opcode_36 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_37; + + // These are removed since 3.6... + // and STORE_ANNOTATION introduced in 3.6! + rm_op (.op_obj = ret->opcodes, .op_name = "STORE_ANNOTATION", .op_code = 127); + + // These are new since Python 3.7 + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_METHOD", .op_code = 160); + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_METHOD", .op_code = 161); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "CALL_FUNCTION_KW", format_CALL_FUNCTION_KW_36); + add_arg_fmt (ret, "CALL_FUNCTION_EX", format_CALL_FUNCTION_EX_36); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_36); + add_arg_fmt (ret, "FORMAT_VALUE", format_value_flags_36); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg_36); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_38.c b/libr/asm/arch/pyc/opcode_38.c new file mode 100644 index 0000000000..69e892c7b3 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_38.c @@ -0,0 +1,36 @@ +#include "opcode.h" + +pyc_opcodes *opcode_38(void) { + pyc_opcodes *ret = opcode_37 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_38; + + // These are removed since 3.7... + rm_op (.op_obj = ret->opcodes, .op_name = "BREAK_LOOP", .op_code = 80); + rm_op (.op_obj = ret->opcodes, .op_name = "CONTINUE_LOOP", .op_code = 119); + rm_op (.op_obj = ret->opcodes, .op_name = "SETUP_LOOP", .op_code = 120); + rm_op (.op_obj = ret->opcodes, .op_name = "SETUP_EXCEPT", .op_code = 121); + + // These are new since Python 3.7 + + // OP NAME OPCODE POP PUSH + // -------------------------------------------- + def_op (.op_obj = ret->opcodes, .op_name = "ROT_FOUR", .op_code = 6, .pop = 4, .push = 4); + def_op (.op_obj = ret->opcodes, .op_name = "BEGIN_FINALLY", .op_code = 53, .pop = 0, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "END_ASYNC_FOR", .op_code = 54, .pop = 7, .push = 0); // POP is 0, when not 7 + def_op (.op_obj = ret->opcodes, .op_name = "END_FINALLY", .op_code = 88, .pop = 1, .push = 0); // POP is 6, when not 1 + jrel_op (.op_obj = ret->opcodes, .op_name = "CALL_FINALLY", .op_code = 162, .pop = 0, .push = 1); + nargs_op (.op_obj = ret->opcodes, .op_name = "POP_FINALLY", .op_code = 163, .pop = 0, .push = 0); // PUSH/POP vary + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "CALL_FUNCTION_KW", format_CALL_FUNCTION_KW_36); + add_arg_fmt (ret, "CALL_FUNCTION_EX", format_CALL_FUNCTION_EX_36); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_36); + add_arg_fmt (ret, "FORMAT_VALUE", format_value_flags_36); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg_36); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_39.c b/libr/asm/arch/pyc/opcode_39.c new file mode 100644 index 0000000000..eae0eb43dd --- /dev/null +++ b/libr/asm/arch/pyc/opcode_39.c @@ -0,0 +1,33 @@ +#include "opcode.h" + +pyc_opcodes *opcode_39(void) { + pyc_opcodes *ret = opcode_38 (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_39; + + // These are removed since 3.8... + rm_op (.op_obj = ret->opcodes, .op_name = "BEGIN_FINALLY", .op_code = 53); + rm_op (.op_obj = ret->opcodes, .op_name = "WITH_CLEANUP_START", .op_code = 81); + rm_op (.op_obj = ret->opcodes, .op_name = "WITH_CLEANUP_FINISH", .op_code = 82); + rm_op (.op_obj = ret->opcodes, .op_name = "END_FINALLY", .op_code = 88); + rm_op (.op_obj = ret->opcodes, .op_name = "CALL_FINALLY", .op_code = 162); + rm_op (.op_obj = ret->opcodes, .op_name = "POP_FINALLY", .op_code = 163); + + // These are new since Python 3.9 + // OP NAME OPCODE POP PUSH + def_op (.op_obj = ret->opcodes, .op_name = "RERAISE", .op_code = 48, .pop = 0, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "WITH_EXCEPT_START", .op_code = 49, .pop = 3, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "LOAD_ASSERTION_ERROR", .op_code = 74, .pop = 0, .push = 1); + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "CALL_FUNCTION_KW", format_CALL_FUNCTION_KW_36); + add_arg_fmt (ret, "CALL_FUNCTION_EX", format_CALL_FUNCTION_EX_36); + add_arg_fmt (ret, "MAKE_FUNCTION", format_MAKE_FUNCTION_arg_36); + add_arg_fmt (ret, "FORMAT_VALUE", format_value_flags_36); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg_36); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_3x.c b/libr/asm/arch/pyc/opcode_3x.c new file mode 100644 index 0000000000..e4133711a7 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_3x.c @@ -0,0 +1,165 @@ +#include "opcode.h" + +pyc_opcodes *opcode_3x(void) { + pyc_opcodes *ret = new_pyc_opcodes (); + if (!ret) { + return NULL; + } + + ret->version_sig = (void *(*)())opcode_3x; + + def_op (.op_obj = ret->opcodes, .op_name = "STOP_CODE", .op_code = 0, .pop = 0, .push = 0, .fallthrough = false); + def_op (.op_obj = ret->opcodes, .op_name = "POP_TOP", .op_code = 1, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "ROT_TWO", .op_code = 2, .pop = 2, .push = 2); + def_op (.op_obj = ret->opcodes, .op_name = "ROT_THREE", .op_code = 3, .pop = 3, .push = 3); + def_op (.op_obj = ret->opcodes, .op_name = "DUP_TOP", .op_code = 4, .pop = 0, .push = 1); + + // Python 3.2+ + def_op (.op_obj = ret->opcodes, .op_name = "DUP_TOP_TWO", .op_code = 5, .pop = 0, .push = 2); + + def_op (.op_obj = ret->opcodes, .op_name = "NOP", 9); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_POSITIVE", .op_code = 10, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_NEGATIVE", .op_code = 11, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_NOT", .op_code = 12, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "UNARY_INVERT", .op_code = 15, .pop = 1, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_POWER", .op_code = 19, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_MULTIPLY", .op_code = 20, .pop = 2, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_MODULO", .op_code = 22, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_ADD", .op_code = 23, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_SUBTRACT", .op_code = 24, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_SUBSCR", .op_code = 25, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_FLOOR_DIVIDE", .op_code = 26, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_TRUE_DIVIDE", .op_code = 27, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_FLOOR_DIVIDE", .op_code = 28, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_TRUE_DIVIDE", .op_code = 29, .pop = 2, .push = 1); + + // Gone from Python 3 are Python2's + // SLICE+0 .. SLICE+3 + // STORE_SLICE+0 .. STORE_SLICE+3 + // DELETE_SLICE+0 .. DELETE_SLICE+3 + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_MAP", .op_code = 54, .pop = 3, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_ADD", .op_code = 55, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_SUBTRACT", .op_code = 56, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_MULTIPLY", .op_code = 57, .pop = 2, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_MODULO", .op_code = 59, .pop = 2, .push = 1); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_SUBSCR", .op_code = 60, .pop = 3, .push = 0); // Implements TOS1[TOS] = TOS2. + def_op (.op_obj = ret->opcodes, .op_name = "DELETE_SUBSCR", .op_code = 61, .pop = 2, .push = 0); // Implements del TOS1[TOS]. + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_LSHIFT", .op_code = 62, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_RSHIFT", .op_code = 63, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_AND", .op_code = 64, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_XOR", .op_code = 65, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BINARY_OR", .op_code = 66, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_POWER", .op_code = 67, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "GET_ITER", .op_code = 68, .pop = 1, .push = 1); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_LOCALS", .op_code = 69, .pop = 1, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "PRINT_EXPR", .op_code = 70, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "LOAD_BUILD_CLASS", .op_code = 71, .pop = 0, .push = 1); + + // Python3 drops/changes: + // def_op(.op_obj = ret->opcodes, .op_name = "PRINT_ITEM", 71) + // def_op(.op_obj = ret->opcodes, .op_name = "PRINT_NEWLINE", 72) + // def_op(.op_obj = ret->opcodes, .op_name = "PRINT_ITEM_TO", 73) + // def_op(.op_obj = ret->opcodes, .op_name = "PRINT_NEWLINE_TO", 74) + + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_LSHIFT", .op_code = 75, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_RSHIFT", .op_code = 76, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_AND", .op_code = 77, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_XOR", .op_code = 78, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "INPLACE_OR", .op_code = 79, .pop = 2, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "BREAK_LOOP", .op_code = 80, .pop = 0, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "WITH_CLEANUP", .op_code = 81, .pop = 1, .push = 0); // Cleans up the stack when a with statement + // block exits. Handle stack special + + def_op (.op_obj = ret->opcodes, .op_name = "RETURN_VALUE", .op_code = 83, .pop = 1, .push = 0, .fallthrough = false); + def_op (.op_obj = ret->opcodes, .op_name = "IMPORT_STAR", .op_code = 84, .pop = 1, .push = 0); + + def_op (.op_obj = ret->opcodes, .op_name = "YIELD_VALUE", .op_code = 86, .pop = 1, .push = 1); + def_op (.op_obj = ret->opcodes, .op_name = "POP_BLOCK", .op_code = 87, .pop = 0, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "END_FINALLY", .op_code = 88, .pop = 1, .push = 0); + def_op (.op_obj = ret->opcodes, .op_name = "POP_EXCEPT", .op_code = 89, .pop = 1, .push = -1); + + ret->have_argument = 90; // Opcodes from here have an argument: + + store_op (.op_obj = ret->opcodes, .op_name = "STORE_NAME", .op_code = 90, .pop = 1, .push = 0, .func = NAME_OP); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_NAME", .op_code = 91, .pop = 0, .push = 0); // "" + varargs_op (.op_obj = ret->opcodes, .op_name = "UNPACK_SEQUENCE", .op_code = 92, .pop = 9, .push = 1); // TOS is number of tuple items + jrel_op (.op_obj = ret->opcodes, .op_name = "FOR_ITER", .op_code = 93, .pop = 9, .push = 1); + + def_op (.op_obj = ret->opcodes, .op_name = "UNPACK_EX", .op_code = 94, .pop = 9, .push = 1); // assignment with a starred target; TOS is #entries + // argument has a count + store_op (.op_obj = ret->opcodes, .op_name = "STORE_ATTR", .op_code = 95, .pop = 2, .push = 0, .func = NAME_OP); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_ATTR", .op_code = 96, .pop = 1, .push = 0); // "" + store_op (.op_obj = ret->opcodes, .op_name = "STORE_GLOBAL", .op_code = 97, .pop = 1, .push = 0, .func = NAME_OP); // "" + name_op (.op_obj = ret->opcodes, .op_name = "DELETE_GLOBAL", .op_code = 98, .pop = 0, .push = 0); // "" + + // Python 2's DUP_TOPX is gone starting in Python 3.2 + + const_op (.op_obj = ret->opcodes, .op_name = "LOAD_CONST", .op_code = 100, .pop = 0, .push = 1); // Operand is in const list + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_NAME", .op_code = 101, .pop = 0, .push = 1); // Operand is in name list + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_TUPLE", .op_code = 102, .pop = 9, .push = 1); // TOS is count of tuple items + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_LIST", .op_code = 103, .pop = 9, .push = 1); // TOS is count of list items + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_SET", .op_code = 104, .pop = 9, .push = 1); // TOS is count of set items + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_MAP", .op_code = 105, .pop = 0, .push = 1); // TOS is count of kwarg items + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_ATTR", .op_code = 106, .pop = 1, .push = 1); // Operand is in name list + compare_op (.op_obj = ret->opcodes, .op_name = "COMPARE_OP", .op_code = 107, .pop = 2, .push = 1); // Comparison operator + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_NAME", .op_code = 108, .pop = 1, .push = 1); // Operand is in name list + name_op (.op_obj = ret->opcodes, .op_name = "IMPORT_FROM", .op_code = 109, .pop = 0, .push = 1); // Operand is in name list + + jrel_op (.op_obj = ret->opcodes, .op_name = "JUMP_FORWARD", .op_code = 110, .pop = 0, .push = 0); // Number of bytes to skip + jabs_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_FALSE_OR_POP", .op_code = 111, .conditional = true); // Target byte offset from beginning of code + jabs_op (.op_obj = ret->opcodes, .op_name = "JUMP_IF_TRUE_OR_POP", .op_code = 112, .conditional = true); // "" + jabs_op (.op_obj = ret->opcodes, .op_name = "JUMP_ABSOLUTE", .op_code = 113, .pop = 0, .push = 0); // Target byte offset from beginning of code + jabs_op (.op_obj = ret->opcodes, .op_name = "POP_JUMP_IF_FALSE", .op_code = 114, .pop = 9, .push = 1, .conditional = true); // "" + jabs_op (.op_obj = ret->opcodes, .op_name = "POP_JUMP_IF_TRUE", .op_code = 115, .pop = 9, .push = 1, .conditional = true); // "" + + name_op (.op_obj = ret->opcodes, .op_name = "LOAD_GLOBAL", .op_code = 116, .pop = 0, .push = 1); // Operand is in name list + + jabs_op (.op_obj = ret->opcodes, .op_name = "CONTINUE_LOOP", .op_code = 119, .pop = 0, .push = 0); // Target address + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_LOOP", .op_code = 120, .pop = 0, .push = 0, .conditional = true); // Distance to target address + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_EXCEPT", .op_code = 121, .pop = 0, .push = 6, .conditional = true); // "" + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_FINALLY", .op_code = 122, .pop = 0, .push = 6, .conditional = true); // "" + + local_op (.op_obj = ret->opcodes, .op_name = "LOAD_FAST", .op_code = 124, .pop = 0, .push = 1); // Local variable number + store_op (.op_obj = ret->opcodes, .op_name = "STORE_FAST", .op_code = 125, .pop = 1, .push = 0, .func = LOCAL_OP); // Local variable number + local_op (.op_obj = ret->opcodes, .op_name = "DELETE_FAST", .op_code = 126, .pop = 0, .push = 0); // Local variable number + + def_op (.op_obj = ret->opcodes, .op_name = "RAISE_VARARGS", .op_code = 130, .pop = 9, .push = 1, .fallthrough = false); + // Number of raise arguments (1, 2, or 3) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION", .op_code = 131, .pop = 9, .push = 1); // #args + (#kwargs << 8) + + def_op (.op_obj = ret->opcodes, .op_name = "MAKE_FUNCTION", .op_code = 132, .pop = 9, .push = 1); // TOS is number of args if < 3.6 + varargs_op (.op_obj = ret->opcodes, .op_name = "BUILD_SLICE", .op_code = 133, .pop = 9, .push = 1); // TOS is number of items to pop + + def_op (.op_obj = ret->opcodes, .op_name = "MAKE_CLOSURE", .op_code = 134, .pop = 9, .push = 1); // TOS is number of items to pop + free_op (.op_obj = ret->opcodes, .op_name = "LOAD_CLOSURE", .op_code = 135, .pop = 0, .push = 1); + free_op (.op_obj = ret->opcodes, .op_name = "LOAD_DEREF", .op_code = 136, .pop = 0, .push = 1); + store_op (.op_obj = ret->opcodes, .op_name = "STORE_DEREF", .op_code = 137, .pop = 1, .push = 0, .func = FREE_OP); + free_op (.op_obj = ret->opcodes, .op_name = "DELETE_DEREF", .op_code = 138, .pop = 0, .push = 0); + + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR", .op_code = 140, .pop = 9, .push = 1); // #args + (#kwargs << 8) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_KW", .op_code = 141, .pop = 9, .push = 1); // #args + (#kwargs << 8) + nargs_op (.op_obj = ret->opcodes, .op_name = "CALL_FUNCTION_VAR_KW", .op_code = 142, .pop = 9, .push = 1); // #args + (#kwargs << 8) + + jrel_op (.op_obj = ret->opcodes, .op_name = "SETUP_WITH", .op_code = 143, .pop = 0, .push = 7); + + def_op (.op_obj = ret->opcodes, .op_name = "LIST_APPEND", .op_code = 145, .pop = 2, .push = 1); // Calls list.append(TOS[-i], TOS). + // Used to implement list comprehensions. + def_op (.op_obj = ret->opcodes, .op_name = "SET_ADD", .op_code = 146, .pop = 1, .push = 0); // Calls set.add(TOS1[-i], TOS). + // Used to implement set comprehensions. + def_op (.op_obj = ret->opcodes, .op_name = "MAP_ADD", .op_code = 147, .pop = 2, .push = 1); // Calls dict.setitem(TOS1[-i], TOS, TOS1) + // Used to implement dict comprehensions. + + def_op (.op_obj = ret->opcodes, .op_name = "EXTENDED_ARG", .op_code = 144); + ret->extended_arg = 144; + + r_list_purge (ret->opcode_arg_fmt); + add_arg_fmt (ret, "EXTENDED_ARG", format_extended_arg); + + return ret; +} diff --git a/libr/asm/arch/pyc/opcode_anal.c b/libr/asm/arch/pyc/opcode_anal.c new file mode 100644 index 0000000000..934ffd4a13 --- /dev/null +++ b/libr/asm/arch/pyc/opcode_anal.c @@ -0,0 +1,907 @@ +#include "opcode.h" + +// The actual code of one opcode varies across the versions. +// That's why I specify one opcode by its name, not its code. + +static inline void anal_push(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg, ut32 type, st32 push_cnt) { + op->type = type; + op->stackop = R_ANAL_STACK_INC; + op->stackptr = OBJECT_SIZE_ON_STACK * push_cnt; +} + +static inline void anal_pop(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg, ut32 type, st32 push_cnt) { + op->type = type; + op->stackop = R_ANAL_STACK_INC; + op->stackptr = -(OBJECT_SIZE_ON_STACK * push_cnt); +} + +static void anal_BEFORE_ASYNC_WITH(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_BEGIN_FINALLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_BINARY_ADD(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_ADD, 1); +} + +static void anal_BINARY_AND(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_AND, 1); +} + +static void anal_BINARY_CALL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/v1.4b3/Include/opcode.h + // I can not find this opcode even in v1.4 version source code. + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_BINARY_DIVIDE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_DIV, 1); +} + +static void anal_BINARY_FLOOR_DIVIDE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_DIV, 1); +} + +static void anal_BINARY_LSHIFT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_SHL, 1); +} + +static void anal_BINARY_MATRIX_MULTIPLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_BINARY_MODULO(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_MOD, 1); +} + +static void anal_BINARY_MULTIPLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_MUL, 1); +} + +static void anal_BINARY_OR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_OR, 1); +} + +static void anal_BINARY_POWER(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_BINARY_RSHIFT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_SHR, 1); +} + +static void anal_BINARY_SUBSCR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_BINARY_SUBTRACT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_SUB, 1); +} + +static void anal_BINARY_TRUE_DIVIDE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_DIV, 1); +} + +static void anal_BINARY_XOR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_XOR, 1); +} + +static void anal_BREAK_LOOP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + //op->type = R_ANAL_OP_TYPE_CJMP; + // This is actually a jump, but require further analysis + op->type = R_ANAL_OP_TYPE_UNK; + op->jump = -1; + op->fail = -1; +} + +static void anal_BUILD_CLASS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, 2); +} + +static void anal_BUILD_CONST_KEY_MAP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg); +} + +static void anal_BUILD_FUNCTION(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_NEW; +} + +static void anal_BUILD_LIST(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg); +} + +static void anal_BUILD_LIST_UNPACK(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_MAP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, 2 * oparg - 1); +} + +static void anal_BUILD_MAP_UNPACK(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_MAP_UNPACK_WITH_CALL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg); +} + +static void anal_BUILD_SET(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_SET_UNPACK(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_SLICE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_STRING(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_TUPLE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_TUPLE_UNPACK(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg - 1); +} + +static void anal_BUILD_TUPLE_UNPACK_WITH_CALL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_NEW, oparg); +} + +static void anal_CALL_FUNCTION(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // TODO + // Get callee function from stack + // Parse oparg by version info + op->type = R_ANAL_OP_TYPE_ICALL; + op->jump = -1; +} + +static void anal_CALL_FUNCTION_EX(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_ICALL; + op->jump = -1; +} + +static void anal_CALL_FUNCTION_KW(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_ICALL; + op->jump = -1; +} + +static void anal_CALL_FUNCTION_VAR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_ICALL; + op->jump = -1; +} + +static void anal_CALL_FUNCTION_VAR_KW(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_ICALL; + op->jump = -1; +} + +static void anal_CALL_METHOD(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_ICALL; + op->jump = -1; +} + +static void anal_DELETE_ATTR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_DEREF(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_FAST(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_GLOBAL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_NAME(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_SLICE_0(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_SLICE_1(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_SLICE_2(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_SLICE_3(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DELETE_SUBSCR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_DUP_TOP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UPUSH, 1); +} + +static void anal_DUP_TOPX(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UPUSH, 1); +} + +static void anal_DUP_TOP_TWO(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UPUSH, 2); +} + +static void anal_END_ASYNC_FOR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // If TOS is StopAsyncIteration pop 7 values from the stack and restore the exception state using the second three of them. + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_END_FINALLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + //op->type = R_ANAL_OP_TYPE_CJMP; + // This is actually a jump, but require further analysis + op->type = R_ANAL_OP_TYPE_UNK; + op->jump = -1; + op->fail = -1; +} + +static void anal_EXEC_STMT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_EXTENDED_ARG(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_FORMAT_VALUE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; + if ((oparg & 0x04) == 0x04) { + op->stackop = R_ANAL_STACK_INC; + op->stackptr = -OBJECT_SIZE_ON_STACK; + } +} + +static void anal_FOR_LOOP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/b2b1ed17819ecb24a78d07d3ff1e8e6bc6137721/Python/ceval.c#L1499 + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_GET_AITER(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_GET_ANEXT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_GET_AWAITABLE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_GET_ITER(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_GET_YIELD_FROM_ITER(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_IMPORT_FROM(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_IMPORT_NAME(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_IMPORT_STAR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_INPLACE_ADD(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_ADD; +} + +static void anal_INPLACE_AND(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_AND; +} + +static void anal_INPLACE_DIVIDE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_DIV; +} + +static void anal_INPLACE_FLOOR_DIVIDE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_DIV; +} + +static void anal_INPLACE_LSHIFT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_SHL; +} + +static void anal_INPLACE_MATRIX_MULTIPLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_INPLACE_MODULO(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_MOD; +} + +static void anal_INPLACE_MULTIPLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_MUL; +} + +static void anal_INPLACE_OR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_OR; +} + +static void anal_INPLACE_POWER(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_INPLACE_RSHIFT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_SHR; +} + +static void anal_INPLACE_SUBTRACT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_SUB; +} + +static void anal_INPLACE_TRUE_DIVIDE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_DIV; +} + +static void anal_INPLACE_XOR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_XOR; +} + +static void anal_LIST_APPEND(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_ASSERTION_ERROR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_ATTR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_LOAD_BUILD_CLASS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_CLASSDEREF(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_CLOSURE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_CONST(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_DEREF(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_FAST(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_GLOBAL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_GLOBALS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/24260ec91623c18569225229d5becb852010ae2c/Include/opcode.h#L80 + // Can't find this opcode + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_LOAD_LOCAL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_LOCALS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_METHOD(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_LOAD_NAME(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_MAKE_CLOSURE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, oparg); +} + +static void anal_MAKE_FUNCTION(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, oparg); +} + +static void anal_MAP_ADD(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_NOP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_NOP; +} + +static void anal_POP_BLOCK(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_POP, 1); +} + +static void anal_POP_EXCEPT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_POP, 1); +} + +static void anal_POP_FINALLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // FIXME + // POP_FINALLY will pop 6 elements if TOS is an exception type + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_POP, 1); +} + +static void anal_POP_TOP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_POP, 1); +} + +static void anal_PRINT_EXPR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_PRINT_ITEM(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_PRINT_ITEM_TO(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 2); +} + +static void anal_PRINT_NEWLINE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_PRINT_NEWLINE_TO(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_RAISE_EXCEPTION(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/6c3a3aa17b028f6b93067083d32c7eaa4338757c/Include/opcode.h#L89 + // Can't find this opcode + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_RAISE_VARARGS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, oparg); +} + +static void anal_RERAISE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 3); +} + +static void anal_RESERVE_FAST(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/6c3a3aa17b028f6b93067083d32c7eaa4338757c/Include/opcode.h#L134 + // Can't find this opcode + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_RETURN_VALUE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_RET; + op->stackop = R_ANAL_STACK_INC; + op->stackptr = -OBJECT_SIZE_ON_STACK; + op->eob = true; +} + +static void anal_ROT_FOUR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // Lifts second, third and forth stack items one position up, moves top down to position four. + op->type = R_ANAL_OP_TYPE_XCHG; +} + +static void anal_ROT_THREE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // Lifts second and third stack item one position up, moves top down to position three. + op->type = R_ANAL_OP_TYPE_XCHG; +} + +static void anal_ROT_TWO(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // Swaps the two top-most stack items. + op->type = R_ANAL_OP_TYPE_XCHG; +} + +static void anal_SETUP_ANNOTATIONS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_SET_ADD(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_SET_FUNC_ARGS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/v1.4/Python/ceval.c + // Can't find this opcode + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_SET_LINENO(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_SLICE_0(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_SLICE_1(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_SLICE_2(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_SLICE_3(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 2); +} + +static void anal_STOP_CODE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_TRAP; +} + +static void anal_STORE_ANNOTATION(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_STORE_ATTR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_STORE_DEREF(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_STORE_FAST(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_STORE_GLOBAL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_STORE_LOCALS(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_STORE_MAP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 2); +} + +static void anal_STORE_NAME(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_STORE_SLICE_0(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 2); +} + +static void anal_STORE_SLICE_1(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 3); +} + +static void anal_STORE_SLICE_2(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 3); +} + +static void anal_STORE_SLICE_3(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 4); +} + +static void anal_STORE_SUBSCR(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 3); +} + +static void anal_UNARY_CALL(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/v1.4b3/Include/opcode.h + // I can not find this opcode even in v1.4 version source code. + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_UNARY_CONVERT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_UNARY_INVERT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_NOT; +} + +static void anal_UNARY_NEGATIVE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_UNARY_NOT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_NOT; +} + +static void anal_UNARY_POSITIVE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_UNPACK_ARG(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, oparg - 1); +} + +static void anal_UNPACK_EX(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, (oparg & 0xFF) + (oparg >> 8)); +} + +static void anal_UNPACK_LIST(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, oparg - 1); +} + +static void anal_UNPACK_SEQUENCE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, oparg - 1); +} + +static void anal_UNPACK_TUPLE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, oparg - 1); +} + +static void anal_UNPACK_VARARG(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // https://github.com/python/cpython/blob/v1.4b3/Include/opcode.h + // I can not find this opcode even in v1.4 version source code. + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_WITH_CLEANUP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // Need the value on stack + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_WITH_CLEANUP_FINISH(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 2); +} + +static void anal_WITH_CLEANUP_START(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + // Need the value on stack + op->type = R_ANAL_OP_TYPE_UNK; +} + +static void anal_WITH_EXCEPT_START(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_push (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_YIELD_FROM(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_YIELD_VALUE(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + anal_pop (op, op_obj, oparg, R_ANAL_OP_TYPE_UNK, 1); +} + +static void anal_FOR_ITER(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_CJMP; + ut64 mid = op->jump; + op->jump = op->fail; + op->fail = mid; +} + +static void anal_SETUP_LOOP(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + ut64 mid = op->jump; + op->jump = op->fail; + op->fail = mid; +} + +static void anal_SETUP_EXCEPT(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + ut64 mid = op->jump; + op->jump = op->fail; + op->fail = mid; +} + +static void anal_SETUP_FINALLY(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + ut64 mid = op->jump; + op->jump = op->fail; + op->fail = mid; +} + +static void anal_SETUP_WITH(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_CJMP; + ut64 mid = op->jump; + op->jump = op->fail; + op->fail = mid; +} + +static void anal_SETUP_ASYNC_WITH(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + op->type = R_ANAL_OP_TYPE_CJMP; + ut64 mid = op->jump; + op->jump = op->fail; + op->fail = mid; +} + +static op_anal_func op_anal[] = { + { "BEFORE_ASYNC_WITH", anal_BEFORE_ASYNC_WITH }, + { "BEGIN_FINALLY", anal_BEGIN_FINALLY }, + { "BINARY_ADD", anal_BINARY_ADD }, + { "BINARY_AND", anal_BINARY_AND }, + { "BINARY_CALL", anal_BINARY_CALL }, + { "BINARY_DIVIDE", anal_BINARY_DIVIDE }, + { "BINARY_FLOOR_DIVIDE", anal_BINARY_FLOOR_DIVIDE }, + { "BINARY_LSHIFT", anal_BINARY_LSHIFT }, + { "BINARY_MATRIX_MULTIPLY", anal_BINARY_MATRIX_MULTIPLY }, + { "BINARY_MODULO", anal_BINARY_MODULO }, + { "BINARY_MULTIPLY", anal_BINARY_MULTIPLY }, + { "BINARY_OR", anal_BINARY_OR }, + { "BINARY_POWER", anal_BINARY_POWER }, + { "BINARY_RSHIFT", anal_BINARY_RSHIFT }, + { "BINARY_SUBSCR", anal_BINARY_SUBSCR }, + { "BINARY_SUBTRACT", anal_BINARY_SUBTRACT }, + { "BINARY_TRUE_DIVIDE", anal_BINARY_TRUE_DIVIDE }, + { "BINARY_XOR", anal_BINARY_XOR }, + { "BREAK_LOOP", anal_BREAK_LOOP }, + { "BUILD_CLASS", anal_BUILD_CLASS }, + { "BUILD_CONST_KEY_MAP", anal_BUILD_CONST_KEY_MAP }, + { "BUILD_FUNCTION", anal_BUILD_FUNCTION }, + { "BUILD_LIST", anal_BUILD_LIST }, + { "BUILD_LIST_UNPACK", anal_BUILD_LIST_UNPACK }, + { "BUILD_MAP", anal_BUILD_MAP }, + { "BUILD_MAP_UNPACK", anal_BUILD_MAP_UNPACK }, + { "BUILD_MAP_UNPACK_WITH_CALL", anal_BUILD_MAP_UNPACK_WITH_CALL }, + { "BUILD_SET", anal_BUILD_SET }, + { "BUILD_SET_UNPACK", anal_BUILD_SET_UNPACK }, + { "BUILD_SLICE", anal_BUILD_SLICE }, + { "BUILD_STRING", anal_BUILD_STRING }, + { "BUILD_TUPLE", anal_BUILD_TUPLE }, + { "BUILD_TUPLE_UNPACK", anal_BUILD_TUPLE_UNPACK }, + { "BUILD_TUPLE_UNPACK_WITH_CALL", anal_BUILD_TUPLE_UNPACK_WITH_CALL }, + { "CALL_FUNCTION", anal_CALL_FUNCTION }, + { "CALL_FUNCTION_EX", anal_CALL_FUNCTION_EX }, + { "CALL_FUNCTION_KW", anal_CALL_FUNCTION_KW }, + { "CALL_FUNCTION_VAR", anal_CALL_FUNCTION_VAR }, + { "CALL_FUNCTION_VAR_KW", anal_CALL_FUNCTION_VAR_KW }, + { "CALL_METHOD", anal_CALL_METHOD }, + { "DELETE_ATTR", anal_DELETE_ATTR }, + { "DELETE_DEREF", anal_DELETE_DEREF }, + { "DELETE_FAST", anal_DELETE_FAST }, + { "DELETE_GLOBAL", anal_DELETE_GLOBAL }, + { "DELETE_NAME", anal_DELETE_NAME }, + { "DELETE_SLICE_0", anal_DELETE_SLICE_0 }, + { "DELETE_SLICE_1", anal_DELETE_SLICE_1 }, + { "DELETE_SLICE_2", anal_DELETE_SLICE_2 }, + { "DELETE_SLICE_3", anal_DELETE_SLICE_3 }, + { "DELETE_SUBSCR", anal_DELETE_SUBSCR }, + { "DUP_TOP", anal_DUP_TOP }, + { "DUP_TOPX", anal_DUP_TOPX }, + { "DUP_TOP_TWO", anal_DUP_TOP_TWO }, + { "END_ASYNC_FOR", anal_END_ASYNC_FOR }, + { "END_FINALLY", anal_END_FINALLY }, + { "EXEC_STMT", anal_EXEC_STMT }, + { "EXTENDED_ARG", anal_EXTENDED_ARG }, + { "FORMAT_VALUE", anal_FORMAT_VALUE }, + { "FOR_LOOP", anal_FOR_LOOP }, + { "GET_AITER", anal_GET_AITER }, + { "GET_ANEXT", anal_GET_ANEXT }, + { "GET_AWAITABLE", anal_GET_AWAITABLE }, + { "GET_ITER", anal_GET_ITER }, + { "GET_YIELD_FROM_ITER", anal_GET_YIELD_FROM_ITER }, + { "IMPORT_FROM", anal_IMPORT_FROM }, + { "IMPORT_NAME", anal_IMPORT_NAME }, + { "IMPORT_STAR", anal_IMPORT_STAR }, + { "INPLACE_ADD", anal_INPLACE_ADD }, + { "INPLACE_AND", anal_INPLACE_AND }, + { "INPLACE_DIVIDE", anal_INPLACE_DIVIDE }, + { "INPLACE_FLOOR_DIVIDE", anal_INPLACE_FLOOR_DIVIDE }, + { "INPLACE_LSHIFT", anal_INPLACE_LSHIFT }, + { "INPLACE_MATRIX_MULTIPLY", anal_INPLACE_MATRIX_MULTIPLY }, + { "INPLACE_MODULO", anal_INPLACE_MODULO }, + { "INPLACE_MULTIPLY", anal_INPLACE_MULTIPLY }, + { "INPLACE_OR", anal_INPLACE_OR }, + { "INPLACE_POWER", anal_INPLACE_POWER }, + { "INPLACE_RSHIFT", anal_INPLACE_RSHIFT }, + { "INPLACE_SUBTRACT", anal_INPLACE_SUBTRACT }, + { "INPLACE_TRUE_DIVIDE", anal_INPLACE_TRUE_DIVIDE }, + { "INPLACE_XOR", anal_INPLACE_XOR }, + { "LIST_APPEND", anal_LIST_APPEND }, + { "LOAD_ASSERTION_ERROR", anal_LOAD_ASSERTION_ERROR }, + { "LOAD_ATTR", anal_LOAD_ATTR }, + { "LOAD_BUILD_CLASS", anal_LOAD_BUILD_CLASS }, + { "LOAD_CLASSDEREF", anal_LOAD_CLASSDEREF }, + { "LOAD_CLOSURE", anal_LOAD_CLOSURE }, + { "LOAD_CONST", anal_LOAD_CONST }, + { "LOAD_DEREF", anal_LOAD_DEREF }, + { "LOAD_FAST", anal_LOAD_FAST }, + { "LOAD_GLOBAL", anal_LOAD_GLOBAL }, + { "LOAD_GLOBALS", anal_LOAD_GLOBALS }, + { "LOAD_LOCAL", anal_LOAD_LOCAL }, + { "LOAD_LOCALS", anal_LOAD_LOCALS }, + { "LOAD_METHOD", anal_LOAD_METHOD }, + { "LOAD_NAME", anal_LOAD_NAME }, + { "MAKE_CLOSURE", anal_MAKE_CLOSURE }, + { "MAKE_FUNCTION", anal_MAKE_FUNCTION }, + { "MAP_ADD", anal_MAP_ADD }, + { "NOP", anal_NOP }, + { "POP_BLOCK", anal_POP_BLOCK }, + { "POP_EXCEPT", anal_POP_EXCEPT }, + { "POP_FINALLY", anal_POP_FINALLY }, + { "POP_TOP", anal_POP_TOP }, + { "PRINT_EXPR", anal_PRINT_EXPR }, + { "PRINT_ITEM", anal_PRINT_ITEM }, + { "PRINT_ITEM_TO", anal_PRINT_ITEM_TO }, + { "PRINT_NEWLINE", anal_PRINT_NEWLINE }, + { "PRINT_NEWLINE_TO", anal_PRINT_NEWLINE_TO }, + { "RAISE_EXCEPTION", anal_RAISE_EXCEPTION }, + { "RAISE_VARARGS", anal_RAISE_VARARGS }, + { "RERAISE", anal_RERAISE }, + { "RESERVE_FAST", anal_RESERVE_FAST }, + { "RETURN_VALUE", anal_RETURN_VALUE }, + { "ROT_FOUR", anal_ROT_FOUR }, + { "ROT_THREE", anal_ROT_THREE }, + { "ROT_TWO", anal_ROT_TWO }, + { "SETUP_ANNOTATIONS", anal_SETUP_ANNOTATIONS }, + { "SET_ADD", anal_SET_ADD }, + { "SET_FUNC_ARGS", anal_SET_FUNC_ARGS }, + { "SET_LINENO", anal_SET_LINENO }, + { "SLICE_0", anal_SLICE_0 }, + { "SLICE_1", anal_SLICE_1 }, + { "SLICE_2", anal_SLICE_2 }, + { "SLICE_3", anal_SLICE_3 }, + { "STOP_CODE", anal_STOP_CODE }, + { "STORE_ANNOTATION", anal_STORE_ANNOTATION }, + { "STORE_ATTR", anal_STORE_ATTR }, + { "STORE_DEREF", anal_STORE_DEREF }, + { "STORE_FAST", anal_STORE_FAST }, + { "STORE_GLOBAL", anal_STORE_GLOBAL }, + { "STORE_LOCALS", anal_STORE_LOCALS }, + { "STORE_MAP", anal_STORE_MAP }, + { "STORE_NAME", anal_STORE_NAME }, + { "STORE_SLICE_0", anal_STORE_SLICE_0 }, + { "STORE_SLICE_1", anal_STORE_SLICE_1 }, + { "STORE_SLICE_2", anal_STORE_SLICE_2 }, + { "STORE_SLICE_3", anal_STORE_SLICE_3 }, + { "STORE_SUBSCR", anal_STORE_SUBSCR }, + { "UNARY_CALL", anal_UNARY_CALL }, + { "UNARY_CONVERT", anal_UNARY_CONVERT }, + { "UNARY_INVERT", anal_UNARY_INVERT }, + { "UNARY_NEGATIVE", anal_UNARY_NEGATIVE }, + { "UNARY_NOT", anal_UNARY_NOT }, + { "UNARY_POSITIVE", anal_UNARY_POSITIVE }, + { "UNPACK_ARG", anal_UNPACK_ARG }, + { "UNPACK_EX", anal_UNPACK_EX }, + { "UNPACK_LIST", anal_UNPACK_LIST }, + { "UNPACK_SEQUENCE", anal_UNPACK_SEQUENCE }, + { "UNPACK_TUPLE", anal_UNPACK_TUPLE }, + { "UNPACK_VARARG", anal_UNPACK_VARARG }, + { "WITH_CLEANUP", anal_WITH_CLEANUP }, + { "WITH_CLEANUP_FINISH", anal_WITH_CLEANUP_FINISH }, + { "WITH_CLEANUP_START", anal_WITH_CLEANUP_START }, + { "WITH_EXCEPT_START", anal_WITH_EXCEPT_START }, + { "YIELD_FROM", anal_YIELD_FROM }, + { "YIELD_VALUE", anal_YIELD_VALUE }, + // Fix jump info + { "FOR_ITER", anal_FOR_ITER }, + { "SETUP_LOOP", anal_SETUP_LOOP }, + { "SETUP_EXCEPT", anal_SETUP_EXCEPT }, + { "SETUP_FINALLY", anal_SETUP_FINALLY }, + { "SETUP_WITH", anal_SETUP_WITH }, + { "SETUP_ASYNC_WITH", anal_SETUP_ASYNC_WITH }, +}; + +void anal_pyc_op(RAnalOp *op, pyc_opcode_object *op_obj, ut32 oparg) { + size_t i; + for (i = 0; i < (sizeof (op_anal) / sizeof (op_anal_func)); i++) { + if (!strcmp (op_anal[i].op_name, op_obj->op_name)) { + op_anal[i].func (op, op_obj, oparg); + break; + } + } +} diff --git a/libr/asm/arch/pyc/opcode_arg_fmt.c b/libr/asm/arch/pyc/opcode_arg_fmt.c new file mode 100644 index 0000000000..3dc45e752a --- /dev/null +++ b/libr/asm/arch/pyc/opcode_arg_fmt.c @@ -0,0 +1,69 @@ +#include "opcode.h" + +const char *format_extended_arg(ut32 oparg) { + return r_str_newf ("%llu", oparg * (1 << 16)); +} + +const char *format_CALL_FUNCTION_pos_name_encoded(ut32 oparg) { + ut32 pos_args = oparg & 0xFF; + ut32 name = (oparg >> 8) & 0xFF; + return r_str_newf ("%u positional, %u named", pos_args, name); +} + +const char *format_MAKE_FUNCTION_arg_3x(ut32 oparg) { + ut32 pos_args = oparg & 0xFF; + ut32 name_default = (oparg >> 8) & 0xFF; + ut32 annotate_args = (oparg >> 16) & 0x7FFF; + return r_str_newf ("%u positional, %u name and default, %u annotations", pos_args, name_default, annotate_args); +} + +const char *format_CALL_FUNCTION_KW_36(ut32 oparg) { + return r_str_newf ("%u total positional and keyword args", oparg); +} + +const char *format_CALL_FUNCTION_EX_36(ut32 oparg) { + return r_str_new ((oparg & 0x01)? "keyword args": ""); +} + +static const char *MAKE_FUNCTION_FLAGS[] = { "default", "keyword-only", "annotation", "closure" }; + +const char *format_MAKE_FUNCTION_arg_36(ut32 oparg) { + size_t i; + char *ret = r_str_new (" "); + for (i = 0; i < sizeof (MAKE_FUNCTION_FLAGS) / sizeof (char *); ++i) { + if (oparg & 0x1) { + r_str_appendf (ret, ", %s", MAKE_FUNCTION_FLAGS[i]); + } else { + free (ret); + ret = r_str_new (MAKE_FUNCTION_FLAGS[i]); + } + oparg >>= 1; + } + return ret; + return r_str_new (ret); +} + +const char *format_value_flags_36(ut32 oparg) { + const char *ret = ""; + switch (oparg & 0x3) { + case 1: + ret = "!s"; + break; + case 2: + ret = "!r"; + break; + case 3: + ret = "!a"; + break; + } + if ((oparg & 0x4) == 0x4) { + // pop fmt_spec from the stack and use it, else use an + // empty fmt_spec. + return ""; + } + return r_str_new (ret); +} + +const char *format_extended_arg_36(ut32 oparg) { + return r_str_newf ("%llu", oparg * (1 << 8)); +} diff --git a/libr/asm/arch/pyc/pyc_dis.c b/libr/asm/arch/pyc/pyc_dis.c new file mode 100644 index 0000000000..da7acbd2d6 --- /dev/null +++ b/libr/asm/arch/pyc/pyc_dis.c @@ -0,0 +1,208 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#include "pyc_dis.h" + +static const char *cmp_op[] = { "<", "<=", "==", "!=", ">", ">=", "in", "not in", "is", "is not", "exception match", "BAD" }; + +static const char *parse_arg(pyc_opcode_object *op, ut32 oparg, RList *names, RList *consts, RList *varnames, RList *interned_table, RList *freevars, RList *cellvars, RList *opcode_arg_fmt); + +int r_pyc_disasm(RAsmOp *opstruct, const ut8 *code, RList *cobjs, RList *interned_table, ut64 pc, pyc_opcodes *ops) { + pyc_code_object *cobj = NULL, *t = NULL; + ut32 extended_arg = 0, i = 0, oparg; + st64 start_offset, end_offset; + RListIter *iter = NULL; + + r_list_foreach (cobjs, iter, t) { + start_offset = t->start_offset; + end_offset = t->end_offset; + if (start_offset <= pc && pc < end_offset) { // pc in [start_offset, end_offset) + cobj = t; + break; + } + } + + if (cobj) { + /* TODO: adding line number and offset */ + RList *varnames = cobj->varnames->data; + RList *consts = cobj->consts->data; + RList *names = cobj->names->data; + RList *freevars = cobj->freevars->data; + RList *cellvars = cobj->cellvars->data; + + ut8 op = code[i]; + i++; + char *name = ops->opcodes[op].op_name; + r_strbuf_set (&opstruct->buf_asm, name); + if (!name) { + return 0; + } + if (op >= ops->have_argument) { + if (ops->bits == 16) { + oparg = code[i] + code[i + 1] * 256 + extended_arg; + i += 2; + } else { + oparg = code[i] + extended_arg; + i += 1; + } + extended_arg = 0; + if (op == ops->extended_arg) { + if (ops->bits == 16) { + extended_arg = oparg * 65536; + } else { + extended_arg = oparg << 8; + } + } + const char *arg = parse_arg (&ops->opcodes[op], oparg, names, consts, varnames, interned_table, freevars, cellvars, ops->opcode_arg_fmt); + if (arg != NULL) { + r_strbuf_appendf (&opstruct->buf_asm, "%20s", arg); + free ((char *)arg); + } + } else if (ops->bits == 8) { + i += 1; + } + + return i; + } + return 0; +} + +static char *generic_array_obj_to_string(RList *l); + +static const char *parse_arg(pyc_opcode_object *op, ut32 oparg, RList *names, RList *consts, RList *varnames, RList *interned_table, RList *freevars, RList *cellvars, RList *opcode_arg_fmt) { + pyc_object *t = NULL; + const char *arg = NULL; + pyc_code_object *tmp_cobj; + pyc_arg_fmt *fmt; + RListIter *i = NULL; + + // version-specific formatter for certain opcodes + r_list_foreach (opcode_arg_fmt, i, fmt) + if (!strcmp (fmt->op_name, op->op_name)) { + return fmt->formatter (oparg); + } + + if (op->type & HASCONST) { + t = (pyc_object *)r_list_get_n (consts, oparg); + if (t == NULL) { + return NULL; + } + switch (t->type) { + case TYPE_CODE_v0: + case TYPE_CODE_v1: + tmp_cobj = t->data; + arg = r_str_newf ("CodeObject(%s) from %s", tmp_cobj->name->data, tmp_cobj->filename->data); + break; + case TYPE_TUPLE: + case TYPE_SET: + case TYPE_FROZENSET: + case TYPE_LIST: + case TYPE_SMALL_TUPLE: + arg = generic_array_obj_to_string (t->data); + break; + case TYPE_STRING: + case TYPE_INTERNED: + case TYPE_STRINGREF: + arg = r_str_newf ("'%s'", t->data); + default: + arg = r_str_new (t->data); + } + } + if (op->type & HASNAME) { + t = (pyc_object *)r_list_get_n (names, oparg); + if (t == NULL) { + return NULL; + } + arg = r_str_new (t->data); + } + if ((op->type & HASJREL) || (op->type & HASJABS)) { + arg = r_str_newf ("%u", oparg); + } + if (op->type & HASLOCAL) { + t = (pyc_object *)r_list_get_n (varnames, oparg); + if (!t) + return NULL; + arg = r_str_new (t->data); + } + if (op->type & HASCOMPARE) { + arg = r_str_new (cmp_op[oparg]); + } + if (op->type & HASFREE) { + if (!cellvars || !freevars) { + arg = r_str_newf ("%u", oparg); + return arg; + } + + if (oparg < r_list_length (cellvars)) { + t = (pyc_object *)r_list_get_n (cellvars, oparg); + } else if ((oparg - r_list_length (cellvars)) < r_list_length (freevars)) { + t = (pyc_object *)r_list_get_n (freevars, oparg); + } else { + arg = r_str_newf ("%u", oparg); + return arg; + } + if (!t) { + return NULL; + } + + arg = r_str_new (t->data); + } + if (op->type & HASNARGS) { + arg = r_str_newf ("%u", oparg); + } + if (op->type & HASVARGS) { + arg = r_str_newf ("%u", oparg); + } + + return arg; +} + +/* for debugging purpose */ +static void dump(RList *l) { + RListIter *it; + pyc_object *e = NULL; + + r_list_foreach (l, it, e) { + if (e->type == TYPE_TUPLE) { + eprintf ("[TYPE_TUPLE] %s\n", generic_array_obj_to_string (e->data)); + return; + } + eprintf ("[DATA] %s\n", (char *)e->data); + } +} + +static char *generic_array_obj_to_string(RList *l) { + RListIter *iter = NULL; + pyc_object *e = NULL; + + RStrBuf *rbuf = r_strbuf_new (NULL); + + r_list_foreach (l, iter, e) { + r_strbuf_append (rbuf, e->data); + r_strbuf_append (rbuf, ","); + } + + char *buf = r_strbuf_get (rbuf); + + /* remove last , */ + buf[strlen (buf) - 1] = '\0'; + char *r = r_str_newf ("(%s)", buf); + + r_strbuf_free (rbuf); + return r; +} + +static void dump_cobj(pyc_code_object *c) { + eprintf ("[DUMP]\n"); + eprintf ("name: %s\n", (char *)c->name->data); + eprintf ("const_start\n"); + dump (c->consts->data); + eprintf ("consts_end\n"); + + eprintf ("names_start\n"); + dump (c->names->data); + eprintf ("names_end\n"); + + eprintf ("varnames_start\n"); + dump (c->varnames->data); + eprintf ("varnames_end\n"); +} diff --git a/libr/asm/arch/pyc/pyc_dis.h b/libr/asm/arch/pyc/pyc_dis.h new file mode 100644 index 0000000000..84f88960c7 --- /dev/null +++ b/libr/asm/arch/pyc/pyc_dis.h @@ -0,0 +1,79 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#ifndef PYC_DIS_H +#define PYC_DIS_H + +#include +#include +#include +#include + +#include "opcode.h" + +typedef enum { + TYPE_ASCII = 'a', + TYPE_ASCII_INTERNED = 'A', + TYPE_BINARY_COMPLEX = 'y', + TYPE_BINARY_FLOAT = 'g', + TYPE_CODE_v0 = 'C', + TYPE_CODE_v1 = 'c', + TYPE_COMPLEX = 'x', + TYPE_DICT = '{', + TYPE_ELLIPSIS = '.', + TYPE_FALSE = 'F', + TYPE_FLOAT = 'f', + TYPE_FROZENSET = '>', + TYPE_INT64 = 'I', + TYPE_INTERNED = 't', + TYPE_INT = 'i', + TYPE_LIST = '[', + TYPE_LONG = 'l', + TYPE_NONE = 'N', + TYPE_NULL = '0', + TYPE_REF = 'r', + TYPE_SET = '<', + TYPE_SHORT_ASCII_INTERNED = 'Z', + TYPE_SHORT_ASCII = 'z', + TYPE_SMALL_TUPLE = ')', + TYPE_STOPITER = 'S', + TYPE_STRINGREF = 'R', + TYPE_STRING = 's', + TYPE_TRUE = 'T', + TYPE_TUPLE = '(', + TYPE_UNICODE = 'u', + TYPE_UNKNOWN = '?', +} pyc_marshal_type; + +typedef enum { + FLAG_REF = '\x80', +} pyc_marshal_flag; + +typedef struct { + pyc_marshal_type type; + void *data; +} pyc_object; + +typedef struct { + ut32 argcount; + ut32 posonlyargcount; + ut32 kwonlyargcount; + ut32 nlocals; + ut32 stacksize; + ut32 flags; + pyc_object *code; + pyc_object *consts; + pyc_object *names; + pyc_object *varnames; + pyc_object *freevars; + pyc_object *cellvars; + pyc_object *filename; + pyc_object *name; + ut32 firstlineno; + pyc_object *lnotab; + st64 start_offset; + st64 end_offset; +} pyc_code_object; + +int r_pyc_disasm(RAsmOp *op, const ut8 *buf, RList *cobjs, RList *interned_table, ut64 pc, pyc_opcodes *opcodes); + +#endif diff --git a/libr/asm/meson.build b/libr/asm/meson.build index a96b6dabf2..3579dc51ee 100644 --- a/libr/asm/meson.build +++ b/libr/asm/meson.build @@ -45,6 +45,7 @@ r_asm_sources = [ 'p/asm_ppc_cs.c', 'p/asm_ppc_gnu.c', 'p/asm_propeller.c', + 'p/asm_pyc.c', 'p/asm_riscv.c', 'p/asm_rsp.c', 'p/asm_sh.c', @@ -126,6 +127,37 @@ r_asm_sources = [ 'arch/ppc/libvle/vle.c', 'arch/ppc/libps/libps.c', 'arch/propeller/propeller_disas.c', + 'arch/pyc/opcode_10.c', + 'arch/pyc/opcode_11.c', + 'arch/pyc/opcode_12.c', + 'arch/pyc/opcode_13.c', + 'arch/pyc/opcode_14.c', + 'arch/pyc/opcode_15.c', + 'arch/pyc/opcode_16.c', + 'arch/pyc/opcode_20.c', + 'arch/pyc/opcode_21.c', + 'arch/pyc/opcode_22.c', + 'arch/pyc/opcode_23.c', + 'arch/pyc/opcode_24.c', + 'arch/pyc/opcode_25.c', + 'arch/pyc/opcode_26.c', + 'arch/pyc/opcode_27.c', + 'arch/pyc/opcode_2x.c', + 'arch/pyc/opcode_30.c', + 'arch/pyc/opcode_31.c', + 'arch/pyc/opcode_32.c', + 'arch/pyc/opcode_33.c', + 'arch/pyc/opcode_34.c', + 'arch/pyc/opcode_35.c', + 'arch/pyc/opcode_36.c', + 'arch/pyc/opcode_37.c', + 'arch/pyc/opcode_38.c', + 'arch/pyc/opcode_39.c', + 'arch/pyc/opcode_3x.c', + 'arch/pyc/opcode_anal.c', + 'arch/pyc/opcode_arg_fmt.c', + 'arch/pyc/opcode.c', + 'arch/pyc/pyc_dis.c', #'arch/riscv/riscv-opc.c', #'arch/riscv/riscv.c', 'arch/rsp/rsp_idec.c', diff --git a/libr/asm/p/asm_pyc.c b/libr/asm/p/asm_pyc.c new file mode 100644 index 0000000000..3c2b60539d --- /dev/null +++ b/libr/asm/p/asm_pyc.c @@ -0,0 +1,61 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#include +#include +#include +#include + +#include "../arch/pyc/pyc_dis.h" + +static pyc_opcodes *opcodes_cache = NULL; + +static int disassemble(RAsm *a, RAsmOp *opstruct, const ut8 *buf, int len) { + RList *shared = NULL; + + RBin *bin = a->binb.bin; + ut64 pc = a->pc; + + RBinPlugin *plugin = bin && bin->cur && bin->cur->o? bin->cur->o->plugin: NULL; + + if (plugin) { + if (!strcmp (plugin->name, "pyc")) { + shared = bin->cur->o->bin_obj; + } + } + RList *cobjs = r_list_get_n (shared, 0); + RList *interned_table = r_list_get_n (shared, 1); + if (!opcodes_cache || !pyc_opcodes_equal (opcodes_cache, a->cpu)) { + opcodes_cache = get_opcode_by_version (a->cpu); + opcodes_cache->bits = a->bits; + } + int r = r_pyc_disasm (opstruct, buf, cobjs, interned_table, pc, opcodes_cache); + opstruct->size = r; + return r; +} + +static bool finish(void *user) { + if (opcodes_cache) { + free_opcode (opcodes_cache); + opcodes_cache = NULL; + } + return true; +} + +RAsmPlugin r_asm_plugin_pyc = { + .name = "pyc", + .arch = "pyc", + .license = "LGPL3", + .bits = 16 | 8, + .desc = "PYC disassemble plugin", + .disassemble = &disassemble, + .fini = &finish, +}; + +#ifndef R2_PLUGIN_INCORE +R_API RLibStruct radare_plugin = { + .type = R_LIB_TYPE_ASM, + .data = &r_asm_plugin_pyc, + .version = R2_VERSION +}; + +#endif diff --git a/libr/asm/p/pyc.mk b/libr/asm/p/pyc.mk new file mode 100644 index 0000000000..6caf9af4c9 --- /dev/null +++ b/libr/asm/p/pyc.mk @@ -0,0 +1,43 @@ +PYC_ROOT=$(LIBR)/asm/arch/pyc +OBJ_PYC=asm_pyc.o +OBJ_PYC+=$(PYC_ROOT)/opcode_10.o +OBJ_PYC+=$(PYC_ROOT)/opcode_11.o +OBJ_PYC+=$(PYC_ROOT)/opcode_12.o +OBJ_PYC+=$(PYC_ROOT)/opcode_13.o +OBJ_PYC+=$(PYC_ROOT)/opcode_14.o +OBJ_PYC+=$(PYC_ROOT)/opcode_15.o +OBJ_PYC+=$(PYC_ROOT)/opcode_16.o +OBJ_PYC+=$(PYC_ROOT)/opcode_20.o +OBJ_PYC+=$(PYC_ROOT)/opcode_21.o +OBJ_PYC+=$(PYC_ROOT)/opcode_22.o +OBJ_PYC+=$(PYC_ROOT)/opcode_23.o +OBJ_PYC+=$(PYC_ROOT)/opcode_24.o +OBJ_PYC+=$(PYC_ROOT)/opcode_25.o +OBJ_PYC+=$(PYC_ROOT)/opcode_26.o +OBJ_PYC+=$(PYC_ROOT)/opcode_27.o +OBJ_PYC+=$(PYC_ROOT)/opcode_2x.o +OBJ_PYC+=$(PYC_ROOT)/opcode_30.o +OBJ_PYC+=$(PYC_ROOT)/opcode_31.o +OBJ_PYC+=$(PYC_ROOT)/opcode_32.o +OBJ_PYC+=$(PYC_ROOT)/opcode_33.o +OBJ_PYC+=$(PYC_ROOT)/opcode_34.o +OBJ_PYC+=$(PYC_ROOT)/opcode_35.o +OBJ_PYC+=$(PYC_ROOT)/opcode_36.o +OBJ_PYC+=$(PYC_ROOT)/opcode_37.o +OBJ_PYC+=$(PYC_ROOT)/opcode_38.o +OBJ_PYC+=$(PYC_ROOT)/opcode_39.o +OBJ_PYC+=$(PYC_ROOT)/opcode_3x.o +OBJ_PYC+=$(PYC_ROOT)/opcode_arg_fmt.o +OBJ_PYC+=$(PYC_ROOT)/opcode_anal.o +OBJ_PYC+=$(PYC_ROOT)/opcode.o +OBJ_PYC+=$(PYC_ROOT)/pyc_dis.o + +STATIC_OBJ+=${OBJ_PYC} +TARGET_PYC=asm_pyc.$(EXT_SO) + +ALL_TARGETS+=${TARGET_PYC} +CFLAGS+=-I$(PYC_ROOT) + +${TARGET_PYC}: ${OBJ_PYC} + ${CC} ${CFLAGS} $(LDFLAGS) -o ${TARGET_PYC} ${OBJ_PYC} -lr_util + diff --git a/libr/bin/format/pyc/marshal.c b/libr/bin/format/pyc/marshal.c new file mode 100644 index 0000000000..1102897c2b --- /dev/null +++ b/libr/bin/format/pyc/marshal.c @@ -0,0 +1,1208 @@ +/* radare - LGPL3 - Copyright 2016 - Matthieu (c0riolis) Tardy - l0stb1t*/ + +#include +#include +#include "marshal.h" +#include "pyc_magic.h" + +#define SIZE32_MAX 0x7FFFFFFF + +static ut32 magic_int; +static ut32 symbols_ordinal = 0; + +static RList *refs = NULL; // If you don't have a good reason, do not change this. And also checkout !refs in get_code_object() + +/* interned_table is used to handle TYPE_INTERNED object */ +extern RList *interned_table; + +static pyc_object *get_object(RBuffer *buffer); +static pyc_object *copy_object(pyc_object *object); +static void free_object(pyc_object *object); + +static ut8 get_ut8(RBuffer *buffer, bool *error) { + ut8 ret = 0; + int size = r_buf_read (buffer, &ret, sizeof (ret)); + if (size < sizeof (ret)) { + *error = true; + } + return ret; +} + +static ut16 get_ut16(RBuffer *buffer, bool *error) { + ut16 ret = 0; + + int size = r_buf_read (buffer, (ut8 *)&ret, sizeof (ret)); + if (size != sizeof (ret)) { + *error = true; + } + return ret; +} + +static ut32 get_ut32(RBuffer *buffer, bool *error) { + ut32 ret = 0; + int size = r_buf_read (buffer, (ut8 *)&ret, sizeof (ret)); + if (size != sizeof (ret)) { + *error = true; + } + return ret; +} + +static st32 get_st32(RBuffer *buffer, bool *error) { + st32 ret = 0; + int size = r_buf_read (buffer, (ut8 *)&ret, sizeof (ret)); + if (size < sizeof (ret)) { + *error = true; + } + return ret; +} + +static st64 get_st64(RBuffer *buffer, bool *error) { + st64 ret = 0; + int size = r_buf_read (buffer, (ut8 *)&ret, sizeof (ret)); + if (size < sizeof (ret)) { + *error = true; + } + return ret; +} + +static double get_float64(RBuffer *buffer, bool *error) { + double ret = 0; + int size = r_buf_read (buffer, (ut8 *)&ret, sizeof (ret)); + if (size < sizeof (ret)) { + *error = true; + } + return ret; +} + +static ut8 *get_bytes(RBuffer *buffer, ut32 size) { + ut8 *ret = R_NEWS0 (ut8, size + 1); + if (!ret) { + return NULL; + } + if (r_buf_read (buffer, ret, size) < size) { + free (ret); + return NULL; + } + return ret; +} + +static pyc_object *get_none_object(void) { + pyc_object *ret; + + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_NONE; + ret->data = strdup ("None"); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +static pyc_object *get_false_object(void) { + pyc_object *ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_FALSE; + ret->data = strdup ("False"); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +static pyc_object *get_true_object(void) { + pyc_object *ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_TRUE; + ret->data = strdup ("True"); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +static pyc_object *get_int_object(RBuffer *buffer) { + bool error = false; + pyc_object *ret = NULL; + + st32 i = get_st32 (buffer, &error); + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_INT; + ret->data = r_str_newf ("%d", i); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +static pyc_object *get_int64_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + st64 i; + + i = get_st64 (buffer, &error); + + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_INT64; + ret->data = r_str_newf ("%lld", i); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +/* long is used when the number is > MAX_INT64 */ +static pyc_object *get_long_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + bool neg = false; + ut32 tmp = 0; + size_t size; + size_t i, j = 0, left = 0; + ut16 n; + char *hexstr; + char digist2hex[] = "0123456789abcdef"; + + st32 ndigits = get_st32 (buffer, &error); + if (ndigits < -SIZE32_MAX) { + eprintf ("bad marshal data (long size out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_LONG; + if (ndigits < 0) { + ndigits = -ndigits; + neg = true; + } + if (ndigits == 0) { + ret->data = strdup ("0x0"); + } else { + size = ndigits * 15; + size = (size - 1) / 4 + 1; + size += 3 + (neg? 1: 0); + hexstr = calloc (size, sizeof (char)); + if (!hexstr) { + free (ret); + return NULL; + } + j = size - 1; + + for (i = 0; i < ndigits; i++) { + n = get_ut16 (buffer, &error); + tmp |= n << left; + left += 15; + + while (left >= 4) { + hexstr[--j] = digist2hex[tmp & 0xf]; + tmp >>= 4; + left -= 4; + } + } + + if (tmp) { + hexstr[--j] = digist2hex[tmp & 0xf]; + } + + hexstr[--j] = 'x'; + hexstr[--j] = '0'; + if (neg) { + hexstr[--j] = '-'; + } + + ret->data = &hexstr[j]; + } + return ret; +} + +static pyc_object *get_stringref_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 n = 0; + + n = get_st32 (buffer, &error); + if (n >= r_list_length (interned_table)) { + eprintf ("bad marshal data (string ref out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_STRINGREF; + ret->data = r_list_get_n (interned_table, n); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +static pyc_object *get_float_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 size = 0; + ut8 n = 0; + + n = get_ut8 (buffer, &error); + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ut8 *s = malloc (n + 1); + if (!s) { + return NULL; + } + /* object contain string representation of the number */ + size = r_buf_read (buffer, s, n); + if (size != n) { + R_FREE (s); + R_FREE (ret); + return NULL; + } + s[n] = '\0'; + ret->type = TYPE_FLOAT; + ret->data = s; + return ret; +} + +static pyc_object *get_binary_float_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + double f; + + f = get_float64 (buffer, &error); + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_FLOAT; + ret->data = r_str_newf ("%.15g", f); + if (!ret->data) { + R_FREE (ret); + return NULL; + } + return ret; +} + +static pyc_object *get_complex_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 size = 0; + ut32 n1 = 0; + ut32 n2 = 0; + + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + + if ((magic_int & 0xffff) <= 62061) { + n1 = get_ut8 (buffer, &error); + } else { + n1 = get_st32 (buffer, &error); + } + if (error) { + return NULL; + } + ut8 *s1 = malloc (n1 + 1); + if (!s1) { + return NULL; + } + /* object contain string representation of the number */ + size = r_buf_read (buffer, s1, n1); + if (size != n1) { + R_FREE (s1); + R_FREE (ret); + return NULL; + } + s1[n1] = '\0'; + + if ((magic_int & 0xffff) <= 62061) { + n2 = get_ut8 (buffer, &error); + } else + n2 = get_st32 (buffer, &error); + if (error) { + return NULL; + } + ut8 *s2 = malloc (n2 + 1); + if (!s2) { + return NULL; + } + /* object contain string representation of the number */ + size = r_buf_read (buffer, s2, n2); + if (size != n2) { + R_FREE (s1); + R_FREE (s2); + R_FREE (ret); + return NULL; + } + s2[n2] = '\0'; + + ret->type = TYPE_COMPLEX; + ret->data = r_str_newf ("%s+%sj", s1, s2); + R_FREE (s1); + R_FREE (s2); + if (!ret->data) { + R_FREE (ret); + return NULL; + } + return ret; +} + +static pyc_object *get_binary_complex_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + double a, b; + + //a + bj + a = get_float64 (buffer, &error); + b = get_float64 (buffer, &error); + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_BINARY_COMPLEX; + ret->data = r_str_newf ("%.15g+%.15gj", a, b); + if (!ret->data) { + R_FREE (ret); + return NULL; + } + return ret; +} + +static pyc_object *get_string_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 n = 0; + + n = get_ut32 (buffer, &error); + if (n > SIZE32_MAX) { + eprintf ("bad marshal data (string size out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_STRING; + ret->data = get_bytes (buffer, n); + if (!ret->data) { + R_FREE (ret); + return NULL; + } + return ret; +} + +static pyc_object *get_unicode_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 n = 0; + + n = get_ut32 (buffer, &error); + if (n > SIZE32_MAX) { + eprintf ("bad marshal data (unicode size out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + ret->type = TYPE_UNICODE; + ret->data = get_bytes (buffer, n); + if (!ret->data) { + R_FREE (ret); + return NULL; + } + return ret; +} + +static pyc_object *get_interned_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 n = 0; + + n = get_ut32 (buffer, &error); + if (n > SIZE32_MAX) { + eprintf ("bad marshal data (string size out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_INTERNED; + ret->data = get_bytes (buffer, n); + /* add data pointer to interned table */ + r_list_append (interned_table, ret->data); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +static pyc_object *get_array_object_generic(RBuffer *buffer, ut32 size) { + pyc_object *tmp = NULL; + pyc_object *ret = NULL; + ut32 i = 0; + + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->data = r_list_new (); + if (!ret->data) { + free (ret); + return NULL; + } + for (i = 0; i < size; i++) { + tmp = get_object (buffer); + if (!tmp) { + r_list_free (ret->data); + R_FREE (ret); + return NULL; + break; + } + if (!r_list_append (ret->data, tmp)) { + free (tmp); + r_list_free (ret->data); + return NULL; + break; + } + } + return ret; +} + +/* small TYPE_SMALL_TUPLE doesn't exist in python2 */ +/* */ +static pyc_object *get_small_tuple_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut8 n = 0; + + n = get_ut8 (buffer, &error); + if (error) { + return NULL; + } + ret = get_array_object_generic (buffer, n); + if (ret) { + ret->type = TYPE_SMALL_TUPLE; + return ret; + } + return NULL; +} + +static pyc_object *get_tuple_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 n = 0; + + n = get_ut32 (buffer, &error); + if (n > SIZE32_MAX) { + eprintf ("bad marshal data (tuple size out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = get_array_object_generic (buffer, n); + if (ret) { + ret->type = TYPE_TUPLE; + return ret; + } + return NULL; +} + +static pyc_object *get_list_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 n = 0; + + n = get_ut32 (buffer, &error); + if (n > SIZE32_MAX) { + eprintf ("bad marshal data (list size out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = get_array_object_generic (buffer, n); + ret->type = TYPE_LIST; + return ret; +} + +static pyc_object *get_dict_object(RBuffer *buffer) { + pyc_object *ret = NULL, + *key = NULL, + *val = NULL; + + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->data = r_list_new (); + if (!ret->data) { + R_FREE (ret); + return NULL; + } + for (;;) { + key = get_object (buffer); + if (key == NULL) { + break; + } + if (!r_list_append (ret->data, key)) { + r_list_free (ret->data); + R_FREE (ret); + R_FREE (key); + return NULL; + } + val = get_object (buffer); + if (!r_list_append (ret->data, val)) { + return NULL; + } + if (val == NULL) { + break; + } + } + ret->type = TYPE_DICT; + return ret; +} + +static pyc_object *get_set_object(RBuffer *buffer) { + pyc_object *ret = NULL; + bool error = false; + ut32 n = 0; + + n = get_ut32 (buffer, &error); + if (n > SIZE32_MAX) { + eprintf ("bad marshal data (set size out of range)"); + return NULL; + } + if (error) { + return NULL; + } + ret = get_array_object_generic (buffer, n); + if (!ret) { + return NULL; + } + ret->type = TYPE_SET; + return ret; +} + +static pyc_object *get_ascii_object_generic(RBuffer *buffer, ut32 size, bool interned) { + pyc_object *ret = NULL; + + ret = R_NEW0 (pyc_object); + if (!ret) { + return NULL; + } + ret->type = TYPE_ASCII; + ret->data = get_bytes (buffer, size); + if (!ret->data) { + R_FREE (ret); + } + return ret; +} + +static pyc_object *get_ascii_object(RBuffer *buffer) { + bool error = false; + ut32 n = 0; + + n = get_ut32 (buffer, &error); + if (error) { + return NULL; + } + return get_ascii_object_generic (buffer, n, true); +} + +static pyc_object *get_ascii_interned_object(RBuffer *buffer) { + bool error = false; + ut32 n; + + n = get_ut32 (buffer, &error); + if (error) { + return NULL; + } + return get_ascii_object_generic (buffer, n, true); +} + +static pyc_object *get_short_ascii_object(RBuffer *buffer) { + bool error = false; + ut8 n; + + n = get_ut8 (buffer, &error); + if (error) { + return NULL; + } + return get_ascii_object_generic (buffer, n, false); +} + +static pyc_object *get_short_ascii_interned_object(RBuffer *buffer) { + bool error = false; + ut8 n; + + n = get_ut8 (buffer, &error); + if (error) { + return NULL; + } + return get_ascii_object_generic (buffer, n, true); +} + +static pyc_object *get_ref_object(RBuffer *buffer) { + bool error = false; + pyc_object *ret; + pyc_object *obj; + ut32 index; + + index = get_ut32 (buffer, &error); + if (error) { + return NULL; + } + if (index >= r_list_length (refs)) { + return NULL; + } + obj = r_list_get_n (refs, index); + if (!obj) { + return NULL; + } + ret = copy_object (obj); + if (!ret) { + free (obj); + } + return ret; +} + +static void free_object(pyc_object *object) { + if (!object) { + return; + } + switch (object->type) { + case TYPE_SMALL_TUPLE: + case TYPE_TUPLE: + r_list_free (object->data); + break; + case TYPE_STRING: + case TYPE_TRUE: + case TYPE_FALSE: + case TYPE_INT: + case TYPE_NONE: + case TYPE_NULL: + case TYPE_ASCII_INTERNED: + case TYPE_SHORT_ASCII: + case TYPE_ASCII: + case TYPE_SHORT_ASCII_INTERNED: + free (object->data); + break; + case TYPE_CODE_v0: + case TYPE_CODE_v1: { + pyc_code_object *cobj = object->data; + free_object (cobj->code); + free_object (cobj->consts); + free_object (cobj->names); + free_object (cobj->varnames); + free_object (cobj->freevars); + free_object (cobj->cellvars); + free_object (cobj->filename); + free_object (cobj->name); + free_object (cobj->lnotab); + free (object->data); + } break; + case TYPE_REF: + free_object (object->data); + break; + case TYPE_SET: + case TYPE_FROZENSET: + case TYPE_ELLIPSIS: + case TYPE_STOPITER: + case TYPE_BINARY_COMPLEX: + case TYPE_BINARY_FLOAT: + case TYPE_COMPLEX: + case TYPE_STRINGREF: + case TYPE_DICT: + case TYPE_FLOAT: + case TYPE_INT64: + case TYPE_INTERNED: + case TYPE_LIST: + case TYPE_LONG: + case TYPE_UNICODE: + case TYPE_UNKNOWN: + eprintf ("Free not implemented for type %x\n", object->type); + return; + default: + eprintf ("Undefined type in free_object (%x)\n", object->type); + return; + } + free (object); +} + +static pyc_object *copy_object(pyc_object *object) { + pyc_object *copy = R_NEW0 (pyc_object); + if (!copy || !object) { + free (copy); + return NULL; + } + copy->type = object->type; + switch (object->type) { + case TYPE_NULL: + break; + case TYPE_TUPLE: + case TYPE_SMALL_TUPLE: + copy->data = r_list_clone (object->data); + break; + case TYPE_INT: + case TYPE_INT64: + case TYPE_NONE: + case TYPE_TRUE: + case TYPE_FALSE: + case TYPE_STRING: + case TYPE_ASCII: + case TYPE_SHORT_ASCII: + case TYPE_ASCII_INTERNED: + case TYPE_SHORT_ASCII_INTERNED: + copy->data = strdup (object->data); + break; + case TYPE_CODE_v0: + case TYPE_CODE_v1: { + pyc_code_object *src = object->data; + pyc_code_object *dst = R_NEW0 (pyc_code_object); + if (!dst) { + break; + } + memcpy (dst, src, sizeof (*dst)); + dst->code = copy_object (src->code); + dst->consts = copy_object (src->consts); + dst->names = copy_object (src->names); + dst->varnames = copy_object (src->varnames); + dst->freevars = copy_object (src->freevars); + dst->cellvars = copy_object (src->cellvars); + dst->filename = copy_object (src->filename); + dst->name = copy_object (src->name); + dst->lnotab = copy_object (src->lnotab); + copy->data = dst; + } break; + case TYPE_REF: + copy->data = copy_object (object->data); + break; + case TYPE_ELLIPSIS: + case TYPE_STOPITER: + case TYPE_BINARY_COMPLEX: + case TYPE_BINARY_FLOAT: + case TYPE_COMPLEX: + case TYPE_STRINGREF: + case TYPE_DICT: + case TYPE_FLOAT: + case TYPE_FROZENSET: + case TYPE_INTERNED: + case TYPE_LIST: + case TYPE_LONG: + case TYPE_SET: + case TYPE_UNICODE: + case TYPE_UNKNOWN: + eprintf ("Copy not implemented for type %x\n", object->type); + return NULL; + default: + eprintf ("Undefined type in copy_object (%x)\n", object->type); + return NULL; + } + if (!copy->data) { + R_FREE (copy); + } + return copy; +} + +static pyc_object *get_code_object(RBuffer *buffer) { + bool error = false; + + pyc_object *ret = R_NEW0 (pyc_object); + pyc_code_object *cobj = R_NEW0 (pyc_code_object); + if (!ret || !cobj) { + free (ret); + free (cobj); + return NULL; + } + + //ret->type = TYPE_CODE_v1; + // support start from v1.0 + ret->data = cobj; + + bool v10_to_12 = magic_int_within (magic_int, 39170, 16679, &error); // 1.0.1 - 1.2 + bool v13_to_22 = magic_int_within (magic_int, 11913, 60718, &error); // 1.3b1 - 2.2a1 + bool v11_to_14 = magic_int_within (magic_int, 39170, 20117, &error); // 1.0.1 - 1.4 + bool v15_to_22 = magic_int_within (magic_int, 20121, 60718, &error); // 1.5a1 - 2.2a1 + bool v13_to_20 = magic_int_within (magic_int, 11913, 50824, &error); // 1.3b1 - 2.0b1 + //bool v21_to_27 = (!v13_to_20) && magic_int_within (magic_int, 60124, 62212, &error); + bool has_posonlyargcount = magic_int_within (magic_int, 3410, 3424, &error); // v3.8.0a4 - latest + if (error) { + free (ret); + free (cobj); + return NULL; + } + + if (v13_to_22) { + cobj->argcount = get_ut16 (buffer, &error); + } else if (v10_to_12) { + cobj->argcount = 0; + } else { + cobj->argcount = get_ut32 (buffer, &error); + } + + if (has_posonlyargcount) { + cobj->posonlyargcount = get_ut32 (buffer, &error); // Included in argcount + } else { + cobj->posonlyargcount = 0; // None + } + + if (((3020 < (magic_int & 0xffff)) && ((magic_int & 0xffff) < 20121)) && (!v11_to_14)) { + cobj->kwonlyargcount = get_ut32 (buffer, &error); // Not included in argcount + } else { + cobj->kwonlyargcount = 0; + } + + if (v13_to_22) { + cobj->nlocals = get_ut16 (buffer, &error); + } else if (v10_to_12) { + cobj->nlocals = 0; + } else { + cobj->nlocals = get_ut32 (buffer, &error); + } + + if (v15_to_22) { + cobj->stacksize = get_ut16 (buffer, &error); + } else if (v11_to_14 || v10_to_12) { + cobj->stacksize = 0; + } else { + cobj->stacksize = get_ut32 (buffer, &error); + } + + if (v13_to_22) { + cobj->flags = get_ut16 (buffer, &error); + } else if (v10_to_12) { + cobj->flags = 0; + } else { + cobj->flags = get_ut32 (buffer, &error); + } + + //to help disassemble the code + cobj->start_offset = r_buf_tell (buffer) + 5; // 1 from get_object() and 4 from get_string_object() + if (!refs) { + return ret; //return for entried part to get the root object of this file + } + cobj->code = get_object (buffer); + cobj->end_offset = r_buf_tell (buffer); + + cobj->consts = get_object (buffer); + cobj->names = get_object (buffer); + + if (v10_to_12) { + cobj->varnames = NULL; + } else { + cobj->varnames = get_object (buffer); + } + + if (!(v10_to_12 || v13_to_20)) { + cobj->freevars = get_object (buffer); + cobj->cellvars = get_object (buffer); + } else { + cobj->freevars = NULL; + cobj->cellvars = NULL; + } + + cobj->filename = get_object (buffer); + cobj->name = get_object (buffer); + + if (v15_to_22) { + cobj->firstlineno = get_ut16 (buffer, &error); + } else if (v11_to_14) { + cobj->firstlineno = 0; + } else { + cobj->firstlineno = get_ut32 (buffer, &error); + } + + if (v11_to_14) { + cobj->lnotab = NULL; + } else { + cobj->lnotab = get_object (buffer); + } + + if (error) { + free_object (cobj->code); + free_object (cobj->consts); + free_object (cobj->names); + free_object (cobj->varnames); + free_object (cobj->freevars); + free_object (cobj->cellvars); + free_object (cobj->filename); + free_object (cobj->name); + free_object (cobj->lnotab); + free (cobj); + R_FREE (ret); + return NULL; + } + return ret; +} + +ut64 get_code_object_addr(RBuffer *buffer, ut32 magic) { + magic_int = magic; + pyc_object *co = get_code_object (buffer); + ut64 result = 0; + if (!co) { + return 0; + } + + pyc_code_object *cobj = co->data; + result = cobj->start_offset; + free (co->data); + free (co); + + return result; +} + +static pyc_object *get_object(RBuffer *buffer) { + bool error = false; + pyc_object *ret = NULL; + ut8 code = get_ut8 (buffer, &error); + ut8 flag = code & FLAG_REF; + RListIter *ref_idx; + ut8 type = code & ~FLAG_REF; + + if (error) { + return NULL; + } + + if (flag) { + ret = get_none_object (); + if (!ret) { + return NULL; + } + ref_idx = r_list_append (refs, ret); + if (!ref_idx) { + free (ret); + return NULL; + } + } + + switch (type) { + case TYPE_NULL: + return NULL; + case TYPE_TRUE: + return get_true_object (); + case TYPE_FALSE: + return get_false_object (); + case TYPE_NONE: + return get_none_object (); + case TYPE_REF: + return get_ref_object (buffer); + case TYPE_SMALL_TUPLE: + ret = get_small_tuple_object (buffer); + break; + case TYPE_TUPLE: + ret = get_tuple_object (buffer); + break; + case TYPE_STRING: + ret = get_string_object (buffer); + break; + case TYPE_CODE_v0: + ret = get_code_object (buffer); + ret->type = TYPE_CODE_v0; + break; + case TYPE_CODE_v1: + ret = get_code_object (buffer); + ret->type = TYPE_CODE_v1; + break; + case TYPE_INT: + ret = get_int_object (buffer); + break; + case TYPE_ASCII_INTERNED: + ret = get_ascii_interned_object (buffer); + break; + case TYPE_SHORT_ASCII: + ret = get_short_ascii_object (buffer); + break; + case TYPE_ASCII: + ret = get_ascii_object (buffer); + break; + case TYPE_SHORT_ASCII_INTERNED: + ret = get_short_ascii_interned_object (buffer); + break; + case TYPE_INT64: + ret = get_int64_object (buffer); + break; + case TYPE_INTERNED: + ret = get_interned_object (buffer); + break; + case TYPE_STRINGREF: + ret = get_stringref_object (buffer); + break; + case TYPE_FLOAT: + ret = get_float_object (buffer); + break; + case TYPE_BINARY_FLOAT: + ret = get_binary_float_object (buffer); + break; + case TYPE_COMPLEX: + ret = get_complex_object (buffer); // behaviour depends on Python version + break; + case TYPE_BINARY_COMPLEX: + ret = get_binary_complex_object (buffer); + break; + case TYPE_LIST: + ret = get_list_object (buffer); + break; + case TYPE_LONG: + ret = get_long_object (buffer); + break; + case TYPE_UNICODE: + ret = get_unicode_object (buffer); + break; + case TYPE_DICT: + ret = get_dict_object (buffer); + break; + case TYPE_FROZENSET: + case TYPE_SET: + ret = get_set_object (buffer); + break; + case TYPE_STOPITER: + ret = R_NEW0 (pyc_object); + break; + case TYPE_ELLIPSIS: + ret = R_NEW0 (pyc_object); + break; + case TYPE_UNKNOWN: + eprintf ("Get not implemented for type 0x%x\n", type); + return NULL; + default: + eprintf ("Undefined type in get_object (0x%x)\n", type); + return NULL; + } + + /* for debugging purpose + if (ret == NULL) { + eprintf("***%d***\n", type); + } + */ + + if (flag) { + free_object (ref_idx->data); + ref_idx->data = copy_object (ret); + } + + return ret; +} + +static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *symbols, RList *cobjs, char *prefix) { + pyc_code_object *cobj = NULL; + RBinSection *section = NULL; + RBinSymbol *symbol = NULL; + RListIter *i = NULL; + + //each code object is a section + if (!obj || (obj->type != TYPE_CODE_v1 && obj->type != TYPE_CODE_v0)) { + return false; + } + cobj = obj->data; + if (!cobj || !cobj->name) { + return false; + } + if (cobj->name->type != TYPE_ASCII && cobj->name->type != TYPE_STRING && cobj->name->type != TYPE_INTERNED) { + return false; + } + if (!cobj->name->data) { + return false; + } + //add the cobj to objs list + if (!r_list_append (cobjs, cobj)) { + goto fail; + } + section = R_NEW0 (RBinSection); + symbol = R_NEW0 (RBinSymbol); + prefix = r_str_newf ("%s%s%s", prefix? prefix: "", + prefix? ".": "", cobj->name->data); + if (!prefix || !section || !symbol) { + goto fail; + } + section->name = strdup (prefix); + if (!section->name) { + goto fail; + } + section->paddr = cobj->start_offset; + section->vaddr = cobj->start_offset; + section->size = cobj->end_offset - cobj->start_offset; + section->vsize = cobj->end_offset - cobj->start_offset; + if (!r_list_append (sections, section)) { + goto fail; + } + // start building symbol + symbol->name = strdup (prefix); + //symbol->bind; + symbol->type = R_BIN_TYPE_FUNC_STR; + symbol->size = cobj->end_offset - cobj->start_offset; + symbol->vaddr = cobj->start_offset; + symbol->paddr = cobj->start_offset; + symbol->ordinal = symbols_ordinal++; + if (!r_list_append (symbols, symbol)) { + goto fail; + } + if (cobj->consts->type != TYPE_TUPLE && cobj->consts->type != TYPE_SMALL_TUPLE) { + return false; + } + r_list_foreach (((RList *)(cobj->consts->data)), i, obj) + extract_sections_symbols (obj, sections, symbols, cobjs, prefix); + free (prefix); + return true; +fail: + + free (section); + free (prefix); + free (symbol); + return false; +} + +bool get_sections_symbols_from_code_objects(RBuffer *buffer, RList *sections, RList *symbols, RList *cobjs, ut32 magic) { + bool ret; + magic_int = magic; + refs = r_list_new (); + refs->free = (RListFree)free_object; + ret = extract_sections_symbols (get_object (buffer), sections, symbols, cobjs, NULL); + r_list_free (refs); + return ret; +} diff --git a/libr/bin/format/pyc/marshal.h b/libr/bin/format/pyc/marshal.h new file mode 100644 index 0000000000..aa732c806c --- /dev/null +++ b/libr/bin/format/pyc/marshal.h @@ -0,0 +1,76 @@ +/* radare - LGPL3 - Copyright 2016 - Matthieu (c0riolis) Tardy */ + +#ifndef MARSHAL_H +#define MARSHAL_H + +#include +#include + +typedef enum { + TYPE_ASCII = 'a', + TYPE_ASCII_INTERNED = 'A', + TYPE_BINARY_COMPLEX = 'y', + TYPE_BINARY_FLOAT = 'g', + TYPE_CODE_v0 = 'C', + TYPE_CODE_v1 = 'c', + TYPE_COMPLEX = 'x', + TYPE_DICT = '{', + TYPE_ELLIPSIS = '.', + TYPE_FALSE = 'F', + TYPE_FLOAT = 'f', + TYPE_FROZENSET = '>', + TYPE_INT64 = 'I', + TYPE_INTERNED = 't', + TYPE_INT = 'i', + TYPE_LIST = '[', + TYPE_LONG = 'l', + TYPE_NONE = 'N', + TYPE_NULL = '0', + TYPE_REF = 'r', + TYPE_SET = '<', + TYPE_SHORT_ASCII_INTERNED = 'Z', + TYPE_SHORT_ASCII = 'z', + TYPE_SMALL_TUPLE = ')', + TYPE_STOPITER = 'S', + TYPE_STRINGREF = 'R', + TYPE_STRING = 's', + TYPE_TRUE = 'T', + TYPE_TUPLE = '(', + TYPE_UNICODE = 'u', + TYPE_UNKNOWN = '?', +} pyc_marshal_type; + +typedef enum { + FLAG_REF = '\x80', +} pyc_marshal_flag; + +typedef struct { + pyc_marshal_type type; + void *data; +} pyc_object; + +typedef struct { + ut32 argcount; + ut32 posonlyargcount; + ut32 kwonlyargcount; + ut32 nlocals; + ut32 stacksize; + ut32 flags; + pyc_object *code; + pyc_object *consts; + pyc_object *names; + pyc_object *varnames; + pyc_object *freevars; + pyc_object *cellvars; + pyc_object *filename; + pyc_object *name; + ut32 firstlineno; + pyc_object *lnotab; + st64 start_offset; + st64 end_offset; +} pyc_code_object; + +bool get_sections_symbols_from_code_objects(RBuffer *buffer, RList *sections, RList *symbols, RList *objs, ut32 magic); +ut64 get_code_object_addr(RBuffer *buffer, ut32 magic); + +#endif diff --git a/libr/bin/format/pyc/pyc.c b/libr/bin/format/pyc/pyc.c new file mode 100644 index 0000000000..ee3037eff2 --- /dev/null +++ b/libr/bin/format/pyc/pyc.c @@ -0,0 +1,23 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#include "pyc.h" +#include "marshal.h" + +bool pyc_get_sections_symbols(RList *sections, RList *symbols, RList *cobjs, RBuffer *buf, ut32 magic) { + return get_sections_symbols_from_code_objects (buf, sections, symbols, cobjs, magic); +} + +static bool pyc_is_object(ut8 b, pyc_marshal_type type) { + return b == type; +} + +bool pyc_is_code(ut8 b, ut32 magic) { + if ((magic == 0x00949494 || magic == 0x0099be2a || magic == 0x0099be3a || magic == 0x00999901) && (pyc_is_object ((b & ~FLAG_REF), TYPE_CODE_v0))) { + //TYPE_CODE_V0 for Python < 1.0 + return true; + } + if (pyc_is_object ((b & ~FLAG_REF), TYPE_CODE_v1)) { + return true; + } + return false; +} diff --git a/libr/bin/format/pyc/pyc.h b/libr/bin/format/pyc/pyc.h new file mode 100644 index 0000000000..ea69624ae8 --- /dev/null +++ b/libr/bin/format/pyc/pyc.h @@ -0,0 +1,16 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#ifndef PYC_H +#define PYC_H + +#include +#include +#include +#include +#include "pyc_magic.h" +#include "marshal.h" + +bool pyc_get_sections_symbols(RList *sections, RList *symbols, RList *mem, RBuffer *buf, ut32 magic); +bool pyc_is_code(ut8 b, ut32 magic); + +#endif diff --git a/libr/bin/format/pyc/pyc_magic.c b/libr/bin/format/pyc/pyc_magic.c new file mode 100644 index 0000000000..81d7113a9a --- /dev/null +++ b/libr/bin/format/pyc/pyc_magic.c @@ -0,0 +1,274 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#include "pyc_magic.h" +#include + +static struct pyc_version versions[] = { + { 0x00949494, "0.9.4 beta", "77b80a91d357c1d95d8e7cd4cbbe799e5deb777e" }, + { 0x0099be2a, "0.9.9", "1cabc2b6c9701aea29bb506b593946e67bf7593a" }, + { 0x0099be3a, "0.9.9", "f820e6917f07e5012bdd103ef97389318d5a10f8" }, + { 0x00999901, "0.9.9", "9fed5726a36d7ce1355c30592838d93321d580ee" }, + { 0x00999902, "1.0.1", "1808ca5d8883097c72c6a8a89143041c20ea13c1" }, + { 0x00999903, "1.1", "d1e6c9a64a563841f60177ac907739f953f15630" }, + { 0x0a0d4127, "1.2", "fb3daf9b2456dc1a3d99f56f78c5e6270eeaf1e8" }, + { 0x0a0d2e89, "1.3b1", "0261bf5b3819b03d83f254562947244634604026" }, + { 0x0a0d0767, "1.4b1", "d1ae0ea9a585f912d7aa3d004ff817d0dea112f8" }, + { 0x0a0d1704, "1.4b1", "22e29b1747e139d9598eaa5126c59313af39949d" }, + { 0x0a0d4e95, "1.4", "b82d847b3dcbccd63de075e3879a9369dfb34e0d" }, + { 0x0a0d4e99, "1.5a1", "104a646fc7a67f27df25d4e941b20035e5876f9f" }, + { 0x0a0dc4fc, "1.6a2", "84cd52b560e571eba371d7136abcc9c42c27b002" }, + { 0x0a0dc61b, "2.0b1", "f79434ee41fa86019216240ba32be660bcfc5419" }, + { 0x0a0dc67b, "2.0b1", "4eaa463c031a1bdb5e7791c370d04108e0682dd4" }, + { 0x0a0dc67c, "2.0b1", "4eaa463c031a1bdb5e7791c370d04108e0682dd4" }, + { 0x0a0dc67f, "2.0b1", "18385172fac0b7099bd2d2264df614ed4466f083" }, + { 0x0a0dc680, "2.0b1", "18385172fac0b7099bd2d2264df614ed4466f083" }, + { 0x0a0dc685, "2.0b1", "f657dc8ff25e93b877305bbcfc45e360191bb326" }, + { 0x0a0dc686, "2.0b1", "f657dc8ff25e93b877305bbcfc45e360191bb326" }, + { 0x0a0dc686, "2.0b1", "1b9fd0cbd914947cc421ba7e45aa093d7ba9af00" }, + { 0x0a0dc687, "2.0b1", "1b9fd0cbd914947cc421ba7e45aa093d7ba9af00" }, + { 0x0a0dc687, "2.0b1", "1f1a156ed5af8f7a50ce05fc85f85423a24f2aa4" }, + { 0x0a0dc688, "2.0b1", "1f1a156ed5af8f7a50ce05fc85f85423a24f2aa4" }, + { 0x0a0deadc, "2.1a1", "fd8c7203251ff37dbb397f7d423ae41f16a03c68" }, + { 0x0a0deadd, "2.1a1", "fd8c7203251ff37dbb397f7d423ae41f16a03c68" }, + { 0x0a0deb2a, "2.1a2", "0076e8d28f9eba9eff4508696dc33730af2b4001" }, + { 0x0a0deb2b, "2.1a2", "0076e8d28f9eba9eff4508696dc33730af2b4001" }, + { 0x0a0dec04, "2.2a0", "32e7d0898eab85de8134f932680a85c6e7abcec0" }, + { 0x0a0dec05, "2.2a0", "32e7d0898eab85de8134f932680a85c6e7abcec0" }, + { 0x0a0ded2d, "2.2a1", "09df3254b49d7c9306585302fe815ab0bdb53439" }, + { 0x0a0ded2e, "2.2a1", "09df3254b49d7c9306585302fe815ab0bdb53439" }, + { 0x0a0df231, "2.3a0", "abedb2418f6231adf24205092a59996f1f1e4c02" }, + { 0x0a0df232, "2.3a0", "abedb2418f6231adf24205092a59996f1f1e4c02" }, + { 0x0a0df23b, "2.3a0", "d6ae544acd24a1f72ce00534fed464fde91ee504" }, + { 0x0a0df23c, "2.3a0", "d6ae544acd24a1f72ce00534fed464fde91ee504" }, + { 0x0a0df245, "2.3a0", "cf5928fab108de9fbe02632d07176c717c2b3aa7" }, + { 0x0a0df246, "2.3a0", "cf5928fab108de9fbe02632d07176c717c2b3aa7" }, + { 0x0a0df24f, "2.4a0", "adb42a71169604d3609ac2fbdb64cf8cd1c7250b" }, + { 0x0a0df250, "2.4a0", "adb42a71169604d3609ac2fbdb64cf8cd1c7250b" }, + { 0x0a0df259, "2.4a0", "3df36e2e5ddc1149af8eb52e20bc225d806236d4" }, + { 0x0a0df25a, "2.4a0", "3df36e2e5ddc1149af8eb52e20bc225d806236d4" }, + { 0x0a0df263, "2.4a2", "1f9b9c226b43ed20cb61e6d21aea6cb966e8bcd3" }, + { 0x0a0df264, "2.4a2", "1f9b9c226b43ed20cb61e6d21aea6cb966e8bcd3" }, + { 0x0a0df26d, "2.4a3", "2b49b4a85d9eb4a4cfa9f29c21d649c383945671" }, + { 0x0a0df26e, "2.4a3", "2b49b4a85d9eb4a4cfa9f29c21d649c383945671" }, + { 0x0a0df277, "2.5a0", "44e3f21f052590ddfabc12909af5188a4cd89d8c" }, + { 0x0a0df278, "2.5a0", "44e3f21f052590ddfabc12909af5188a4cd89d8c" }, + { 0x0a0df281, "2.5a0", "eb15cdd4a2f1001792074ca0789026989452ff82" }, + { 0x0a0df282, "2.5a0", "eb15cdd4a2f1001792074ca0789026989452ff82" }, + { 0x0a0df28b, "2.5a0", "b6d2f6fd3e116b9b9fe95bc982ac763c359ff103" }, + { 0x0a0df28c, "2.5a0", "b6d2f6fd3e116b9b9fe95bc982ac763c359ff103" }, + { 0x0a0df28c, "2.5a0", "5c36c222e7ca5310e5cc8b8db283bd669d1e24d4" }, + { 0x0a0df28d, "2.5a0", "5c36c222e7ca5310e5cc8b8db283bd669d1e24d4" }, + { 0x0a0df295, "2.5b2", "2c3ec720aa7beb0da4002b847cc5ed7dc782566c" }, + { 0x0a0df296, "2.5b2", "2c3ec720aa7beb0da4002b847cc5ed7dc782566c" }, + { 0x0a0df29f, "2.5b2", "b745b3fd66a649a5fa540bdf47971c26af0a959e" }, + { 0x0a0df2a0, "2.5b2", "b745b3fd66a649a5fa540bdf47971c26af0a959e" }, + { 0x0a0df2a9, "2.5c3", "b90a8b0395bd43fd193842451d0c49573b4d7166" }, + { 0x0a0df2aa, "2.5c3", "b90a8b0395bd43fd193842451d0c49573b4d7166" }, + { 0x0a0df2b3, "2.6a0", "8dcb882ec3e1aac7d336a40aa64ec66561fc3dec" }, + { 0x0a0df2b4, "2.6a0", "8dcb882ec3e1aac7d336a40aa64ec66561fc3dec" }, + { 0x0a0df2bd, "2.6a0", "3985d7e2067db75f170e0891994b0fd70963e40b" }, + { 0x0a0df2be, "2.6a0", "3985d7e2067db75f170e0891994b0fd70963e40b" }, + { 0x0a0df2c7, "2.6a0", "07aa19170a673da6b3e8c4c66bfd868b2f90c0e4" }, + { 0x0a0df2c8, "2.6a0", "07aa19170a673da6b3e8c4c66bfd868b2f90c0e4" }, + { 0x0a0df2d1, "2.6a1+", "343597c7d682b3552580352deddd0cdb36978a04" }, + { 0x0a0df2d2, "2.6a1+", "343597c7d682b3552580352deddd0cdb36978a04" }, + { 0x0a0df2d3, "2.6a1+", "eac41f90296c69c6d07d29f1feb453a4c0e400d1" }, + { 0x0a0df2d4, "2.6a1+", "eac41f90296c69c6d07d29f1feb453a4c0e400d1" }, + { 0x0a0df2db, "2.7a0", "94e79d78dff0dfb5c53f49842c7df65ad5b79e66" }, + { 0x0a0df2dc, "2.7a0", "94e79d78dff0dfb5c53f49842c7df65ad5b79e66" }, + { 0x0a0df2e5, "2.7a0", "ef8fe90886968b1eb468cb91ebae103f773fa17f" }, + { 0x0a0df2e6, "2.7a0", "ef8fe90886968b1eb468cb91ebae103f773fa17f" }, + { 0x0a0df2ef, "2.7a0", "145376df3ad728f7052fdd8b6eba600a8317fece" }, + { 0x0a0df2f0, "2.7a0", "145376df3ad728f7052fdd8b6eba600a8317fece" }, + { 0x0a0df2f9, "2.7a2+", "c2fdf25329ff30cf8d68c0c0e7cf479d7b203745" }, + { 0x0a0df2fa, "2.7a2+", "c2fdf25329ff30cf8d68c0c0e7cf479d7b203745" }, + { 0x0a0df303, "2.7a2+", "edfed0e32cedf3b84c6e999052486a750a3f5bee" }, + { 0x0a0df304, "2.7a2+", "edfed0e32cedf3b84c6e999052486a750a3f5bee" }, + { 0x0a0d0bb8, "3.0x", "49c6eb688906b1dddabf578f08129e6729d6151f" }, + { 0x0a0d0bb9, "3.0x", "49c6eb688906b1dddabf578f08129e6729d6151f" }, + { 0x0a0d0bc2, "3.0x", "f87a3e61853d72b1d133992f991e397b31aac8e8" }, + { 0x0a0d0bc3, "3.0x", "f87a3e61853d72b1d133992f991e397b31aac8e8" }, + { 0x0a0d0bcc, "3.0x", "d0b83c4630c0924df661063543f3c5478c8c35ac" }, + { 0x0a0d0bcd, "3.0x", "d0b83c4630c0924df661063543f3c5478c8c35ac" }, + { 0x0a0d0bd6, "3.0x", "1cce0526d9b0a53f4ff95713dde153dc70dae2dc" }, + { 0x0a0d0bd7, "3.0x", "1cce0526d9b0a53f4ff95713dde153dc70dae2dc" }, + { 0x0a0d0be0, "3.0x", "7a35d3d1ac5a301ef3dc52f9140844f0422011a5" }, + { 0x0a0d0be1, "3.0x", "7a35d3d1ac5a301ef3dc52f9140844f0422011a5" }, + { 0x0a0d0bea, "3.0x", "793e0323d4d65bfc89b40d78162cd771c575a18e" }, + { 0x0a0d0beb, "3.0x", "793e0323d4d65bfc89b40d78162cd771c575a18e" }, + { 0x0a0d0bf4, "3.0x", "19f7ff443718f7a9da1aea9edbf00a135f860883" }, + { 0x0a0d0bf5, "3.0x", "19f7ff443718f7a9da1aea9edbf00a135f860883" }, + { 0x0a0d0bfe, "3.0a1", "a89d469e1ff77716914ce1a4244fa529a71ce68a" }, + { 0x0a0d0bff, "3.0a1", "a89d469e1ff77716914ce1a4244fa529a71ce68a" }, + { 0x0a0d0c08, "3.0a1", "19319e70fc3edbb45b2d007161c1b3a1de094181" }, + { 0x0a0d0c09, "3.0a1", "19319e70fc3edbb45b2d007161c1b3a1de094181" }, + { 0x0a0d0c12, "3.0a1+", "0d462d789b18ec6a59ebe2116688d5b6985c215d" }, + { 0x0a0d0c13, "3.0a1+", "0d462d789b18ec6a59ebe2116688d5b6985c215d" }, + { 0x0a0d0c1c, "3.0a2", "4dc01402d78afe2c9b4a4bd8004eb08e2647335d" }, + { 0x0a0d0c1d, "3.0a2", "4dc01402d78afe2c9b4a4bd8004eb08e2647335d" }, + { 0x0a0d0c1e, "3.0a2+", "73e1bf179a01ad7824ff5aa2b29ce068a457cd67" }, + { 0x0a0d0c1f, "3.0a2+", "73e1bf179a01ad7824ff5aa2b29ce068a457cd67" }, + { 0x0a0d0c26, "3.0a3+", "832c820e9d144cb76c8778ad6fcffe232b1f5c46" }, + { 0x0a0d0c27, "3.0a3+", "832c820e9d144cb76c8778ad6fcffe232b1f5c46" }, + { 0x0a0d0c3a, "3.0a5+", "212a1fee6bf93f8b74f81dd3567bf964e627ea20" }, + { 0x0a0d0c3b, "3.0a5+", "212a1fee6bf93f8b74f81dd3567bf964e627ea20" }, + { 0x0a0d0c44, "3.1a0", "2ee4653927f72f9bb3ff14b3083d4a203d684dfc" }, + { 0x0a0d0c45, "3.1a0", "2ee4653927f72f9bb3ff14b3083d4a203d684dfc" }, + { 0x0a0d0c4e, "3.1a0", "643d8d4fc8ebcc69155d3416357aadca9c053388" }, + { 0x0a0d0c4f, "3.1a0", "643d8d4fc8ebcc69155d3416357aadca9c053388" }, + { 0x0a0d0c58, "3.2a0", "3aaf2e065db05401803705ed4bfa3fd2f9030df8" }, + { 0x0a0d0c59, "3.2a0", "3aaf2e065db05401803705ed4bfa3fd2f9030df8" }, + { 0x0a0d0c62, "3.2a1+", "72523121127327c022096d30e7b28a4a5a89495d" }, + { 0x0a0d0c6c, "3.2a2+", "252895d491570d5a27452809b582717be409b24d" }, + { 0x0a0d0c76, "3.3a0", "9a6d9ac6fb2e1bb15bbb4e8c2a6c939d07088477" }, + { 0x0a0d0c80, "3.3.0a0", "e1dbc72bd97f36c1aed7e3ba2a58278f4da807be" }, + { 0x0a0d0c8a, "3.3.0a0", "87331661042b89022f6f49506ae9c1ae459a95be" }, + { 0x0a0d0c94, "3.3.0a1+", "c0a6569fdad624cc89cdd24b68331dc2a9b64827" }, + { 0x0a0d0c9e, "3.3.0a3+", "96ab78ef82a775da11a538fc47aebe70d9c34f04" }, + { 0x0a0d0ca8, "3.4.0a0", "d296cf1600a8c2c7098737944b5ee793b67a6883" }, + { 0x0a0d0cb2, "3.4.0a0", "2528e4aea33801b40ec902a77b5451ebc925a331" }, + { 0x0a0d0cbc, "3.4.0a0", "cf65c7a75f558e6cd68903f4c2800f6b9574a35f" }, + { 0x0a0d0cc6, "3.4.0a0", "3d858f1eef546e6adb2c073be9384065cfc2537e" }, + { 0x0a0d0cd0, "3.4.0a0", "6db3741e59be2b6427032a0f51d8a06625d64c28" }, + { 0x0a0d0cda, "3.4.0a3+", "35b384ed594b4618a7ea345dad7d2149eabcf3e7" }, + { 0x0a0d0ce4, "3.4.0a3+", "bb2affc1e317a85c4edfe450c119bdec851a08ee" }, + { 0x0a0d0cee, "3.4.0rc1+", "e301a515f8f4c5cdde3b9726ec298bd4de1af963" }, + { 0x0a0d0cf8, "3.5.0a0", "c553d8f72d659b3bc14fe326662ba53ca97bf38a" }, + { 0x0a0d0d02, "3.5.0a4+", "a65f685ba8c011bf117cadf26c13ab7a0cbb122c" }, + { 0x0a0d0d0c, "3.5.0b1+", "6f05f83c7010764aff53793fbff162c42018f57e" }, + { 0x0a0d0d16, "3.5.0b2+", "7a0a1a4ac63942f4ea3c7804e323adf668d40a21" }, + { 0x0a0d0d20, "3.6.0a0", "1ddeb2e175df5009571b3632a709c6b74995cb29" }, + { 0x0a0d0d21, "3.6.0a0", "775b74e0e103f816382a0fc009b6ac51ea956750" }, + { 0x0a0d0d33, "v3.6.0", "5c4568a05a0a62b5947c55f68f9f2ecfb90a4f12" }, + { 0x0a0d0d2c, "v3.6.0a2", "fa42893546010a0c649ba0d85d41a8bb980086f0" }, + { 0x0a0d0d2c, "v3.6.0a3", "a731a68cf6611b0b23da758d735f056ff661757e" }, + { 0x0a0d0d2c, "v3.6.0a4", "b87d6000f38e6158bbe1d9df5c6136f27aeace12" }, + { 0x0a0d0d31, "v3.6.0b1", "beb798cad6a6013d5a606ea0cd19640b35b468ea" }, + { 0x0a0d0d32, "v3.6.0b2", "7e16af499b92def6fc4ab1bbcecd2c055a38de29" }, + { 0x0a0d0d32, "v3.6.0b3", "0ef256c2b09cca0990d8d3767de943096dd61a07" }, + { 0x0a0d0d32, "v3.6.0b4", "38c508a00c32a6ce45a10b705adf8c818fa49dcd" }, + { 0x0a0d0d33, "v3.6.0rc1", "ad2c2d380e7ebbd31712ceb59e87e84b8a7c131d" }, + { 0x0a0d0d33, "v3.6.0rc2", "f7b280956df077b90c5983eeabc8accdbb0aeb8d" }, + { 0x0a0d0d33, "v3.6.1", "208f61cc7a5dbc9879ae6e5c2f95891e270f09ef" }, + { 0x0a0d0d33, "v3.6.10", "ff1e26c1da1d89c5ddb3bfdfdbe5bcdf68b14990" }, + { 0x0a0d0d33, "v3.6.10rc", "6e40b45ef295e91febc75b9597033c18425cc36f" }, + { 0x0a0d0d33, "v3.6.1rc1", "ef16e250bd9864c4dd07e9d128ea871a7604c0f6" }, + { 0x0a0d0d33, "v3.6.2", "84d6b204565614fc9ae672fb5b8c6f2fd13afd34" }, + { 0x0a0d0d33, "v3.6.2rc1", "c34b7ba8183311504042966c658116083c0fd1ec" }, + { 0x0a0d0d33, "v3.6.2rc2", "62922b8b6550c0e80580e9a79dcce9d792358300" }, + { 0x0a0d0d33, "v3.6.3", "7a8e13423f3cc8cbacece5b8d40c9a78ed2ce468" }, + { 0x0a0d0d33, "v3.6.3rc1", "5dea35ed4d74f4a660e0cb848c76cb91a80ef284" }, + { 0x0a0d0d33, "v3.6.4", "f40976d661609cba85458040512ac2bbceeb3756" }, + { 0x0a0d0d33, "v3.6.4rc1", "73f3fb83724c0d3cc7361e57988196d657e21933" }, + { 0x0a0d0d33, "v3.6.5", "0a295395451a7f0366995f7c645da35255d640d7" }, + { 0x0a0d0d33, "v3.6.5rc1", "87c4b938b9d22bc17113d9548a11c24b6bf44490" }, + { 0x0a0d0d33, "v3.6.6", "5a62cf854bec500e3ee252624e39dbdaf66362a0" }, + { 0x0a0d0d33, "v3.6.6rc1", "9d7889210ba48b6fde9fac464fff1725d2dbdc1d" }, + { 0x0a0d0d33, "v3.6.7", "5ebb4a6fe4fc0981d427c9d417d12b6d92cb9fea" }, + { 0x0a0d0d33, "v3.6.7rc1", "9883b7245756a44f5c51870abb32d711dfc46df7" }, + { 0x0a0d0d33, "v3.6.7rc2", "923364c5da68e958d69383a56036ca3bb4def006" }, + { 0x0a0d0d33, "v3.6.8", "50dca05a9c8574e293a5486bb36f0e41f3786628" }, + { 0x0a0d0d33, "v3.6.8rc1", "036b0b3833a10aef6e326d8369524fd61f49ffc7" }, + { 0x0a0d0d33, "v3.6.9", "3406378668cca081c0747e765cfe9dc80bdefa89" }, + { 0x0a0d0d33, "v3.6.9rc1", "734d1d9fbb7cc685b13a11f081e6afa35df3b27f" }, + { 0x0a0d0d3e, "v3.7.0a1", "c9a8ad52ed621cd429361c12bf96d019e79eac84" }, + { 0x0a0d0d3f, "v3.7.0a2", "02ffd31e928bfb492ec4f23635590df36ddda134" }, + { 0x0a0d0d3f, "v3.7.0a3", "4ccd273feeb9692d7171d2923969359e58c96498" }, + { 0x0a0d0d40, "v3.7.0a4", "682d0dbdd1e7436f54a9a8f57e22cbfc5147c4c3" }, + { 0x0a0d0d41, "v3.7.0b1", "1401315d067812555e5f45d2111cdf4a2564fcef" }, + { 0x0a0d0d41, "v3.7.0b2", "511db7b8ecceb74fb2e738ce41e5394516b871f8" }, + { 0x0a0d0d41, "v3.7.0b3", "aa8b7b7c6c1dbe44789745108396b20b85dbec39" }, + { 0x0a0d0d41, "v3.7.0b4", "58bb10ac350a934a2cd75506d6cc70cdb2e0ee3b" }, + { 0x0a0d0d42, "v3.7.0", "ae1f6af15f3e4110616801e235873e47fd7d1977" }, + { 0x0a0d0d42, "v3.7.0b5", "6f05d12b03c4681d6488645e027b5bc5c19ce406" }, + { 0x0a0d0d42, "v3.7.0rc1", "2cbc466248a1a5b6b2639d6cf63945e71446b857" }, + { 0x0a0d0d42, "v3.7.1", "520d6b8e38c078e5560597592c790ce160c8d75c" }, + { 0x0a0d0d42, "v3.7.1rc1", "c05eb2f1bba48c803d54ce12fc00de87b69d5e06" }, + { 0x0a0d0d42, "v3.7.1rc2", "a2644156afbb499582326df7c7e2ec95d6a3373e" }, + { 0x0a0d0d42, "v3.7.2", "e15c3ed43c574400443edc785b5b44b812df0407" }, + { 0x0a0d0d42, "v3.7.2rc1", "fc9123bd8b773d25ba03f04a85139caf53a91715" }, + { 0x0a0d0d42, "v3.7.3", "0b8794d19c9f51451155b1f7ad235aa046632c8c" }, + { 0x0a0d0d42, "v3.7.3rc1", "cb702f0f6b05d1e6d1e1e4449a1e61cd535617d8" }, + { 0x0a0d0d42, "v3.7.4", "e4539bae82b5dc645fd99cbc869d2fba3067a4ee" }, + { 0x0a0d0d42, "v3.7.4rc1", "da3644267b7c7614e55b7e33228ce31ce2749f2a" }, + { 0x0a0d0d42, "v3.7.4rc2", "50745cbf2d7ca594e256fa96ea00a93a53f2ac96" }, + { 0x0a0d0d42, "v3.7.5", "dc8ada53cff5e8e8f9c20587ab6afc2152b2888f" }, + { 0x0a0d0d42, "v3.7.5rc1", "4d4c87da17c34d9eb169801d6bc01158c00171dc" }, + { 0x0a0d0d42, "v3.7.6", "ef8e77cea43fb83c1398d058b8f639fede8fba76" }, + { 0x0a0d0d42, "v3.7.6rc1", "73ffb22c7f371ff4ea04fdee86a8e71ce1ba56f9" }, + { 0x0a0d0d49, "v3.8.0a1", "8cb4789728241d25bca2c15568317c6655389f1b" }, + { 0x0a0d0d49, "v3.8.0a2", "c31af9d3dd4560d12dfe943347379f0fc6f47a50" }, + { 0x0a0d0d49, "v3.8.0a3", "d53bead39cd475e581c13307b7838160e603a6fd" }, + { 0x0a0d0d52, "v3.8.0a4", "1b3497f679823b0368fabc95ccd1a1c24b8d429e" }, + { 0x0a0d0d52, "v3.8.0b1", "5191895b326e520473c501736239271685a2a077" }, + { 0x0a0d0d53, "v3.8.0b2", "0bb25c6b3dc78355870758bdf88d1d543cdf4203" }, + { 0x0a0d0d54, "v3.8.0b3", "108336b63a31356dc9c1f35f91843d6893e26e00" }, + { 0x0a0d0d55, "v3.8.0", "5d714034866ce1e9f89dc141fe4cc0b50cf20a8e" }, + { 0x0a0d0d55, "v3.8.0b4", "122a9b489cfe94b04801d057e5b510d51710fab3" }, + { 0x0a0d0d55, "v3.8.0rc1", "41f60748364a6afda7360e6cc6e846af569b7ab9" }, + { 0x0a0d0d55, "v3.8.1", "d2529ef779ce819a5ea833b264e47440efcbac29" }, + { 0x0a0d0d55, "v3.8.1rc1", "827f6399a61be9d14f8ccfa5be73a6030ec45f1d" }, + { 0x0a0d0d5c, "v3.9.0a1", "fd757083df79c21eee862e8d89aeefefe45f64a0" }, + { 0x0a0d0d5e, "v3.9.0a2", "bf0a31c8fb782e03e9530c2488ab2d0e29fc0495" }, + { 0x0a0d0d60, "v3.9.0a3", "a36ea266c6470f6c65416f24de4497637e59af23" }, +}; + +struct pyc_version get_pyc_version(ut32 magic) { + struct pyc_version fail = { -1, 0, 0 }; + ut32 i; + for (i = 0; i < sizeof (versions) / sizeof (*versions); i++) + if (versions[i].magic == magic) { + return versions[i]; + } + return fail; +} + +bool magic_int_within(ut32 target_magic, ut32 lower, ut32 upper, bool *error) { + if (*error) { + return false; + } + ut64 ti = 0, li = 0, ui = 0; + ut64 size = sizeof (versions) / sizeof (struct pyc_version); + for (; ti < size && versions[ti].magic != target_magic; ti++) { + } + if (ti == size) { + *error = true; + eprintf ("target_magic not found in versions[]"); + return false; + } + + for (; li < size && (versions[li].magic & 0xffff) != lower; li++) { + } + if (li == size) { + *error = true; + eprintf ("lower magic_int not found in versions[]"); + return false; + } + + for (; ui < size && (versions[ui].magic & 0xffff) != upper; ui++) { + } + if (ui == size) { + *error = true; + eprintf ("upper magic_int not found in versions[]"); + return false; + } + + return (li <= ti) && (ti <= ui); +} + +double version2double(const char *version) { + unsigned idx = 0, buf_idx = 0; + char buf[20]; + double result; + + while (!('0' <= version[idx] && version[idx] <= '9')) + idx++; + for (; version[idx] != '.'; idx++) + buf[buf_idx++] = version[idx]; + buf[buf_idx++] = version[idx++]; + for (; '0' <= version[idx] && version[idx] <= '9'; idx++) + buf[buf_idx++] = version[idx]; + buf[buf_idx] = '\x00'; + sscanf (buf, "%lf", &result); + return result; +} diff --git a/libr/bin/format/pyc/pyc_magic.h b/libr/bin/format/pyc/pyc_magic.h new file mode 100644 index 0000000000..b6e04663cd --- /dev/null +++ b/libr/bin/format/pyc/pyc_magic.h @@ -0,0 +1,20 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#ifndef PYC_MAGIC_H +#define PYC_MAGIC_H + +#include + +struct pyc_version { + ut32 magic; + char *version; + char *revision; +}; + +struct pyc_version get_pyc_version(ut32 magic); + +double version2double(const char *version); + +bool magic_int_within(ut32 target_magic, ut32 lower, ut32 uppper, bool *error); + +#endif diff --git a/libr/bin/meson.build b/libr/bin/meson.build index 79a9557998..7589643700 100644 --- a/libr/bin/meson.build +++ b/libr/bin/meson.build @@ -51,6 +51,7 @@ r_bin_sources = [ 'p/bin_pebble.c', 'p/bin_prg.c', 'p/bin_psxexe.c', + 'p/bin_pyc.c', 'p/bin_qnx.c', 'p/bin_sfc.c', 'p/bin_smd.c', @@ -104,6 +105,9 @@ r_bin_sources = [ 'format/pe/pe64_write.c', 'format/pe/pe_write.c', 'format/pe/pemixed.c', + 'format/pyc/marshal.c', + 'format/pyc/pyc.c', + 'format/pyc/pyc_magic.c', 'format/te/te.c', 'format/wasm/wasm.c', 'format/zimg/zimg.c', diff --git a/libr/bin/p/bin_pyc.c b/libr/bin/p/bin_pyc.c new file mode 100644 index 0000000000..3915fda8ed --- /dev/null +++ b/libr/bin/p/bin_pyc.c @@ -0,0 +1,142 @@ +/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ + +#include +#include "../format/pyc/pyc.h" + +// XXX: to not use globals + +static ut64 code_start_offset = 0; +static struct pyc_version version; +/* used from marshall.c */ +RList *interned_table = NULL; +static RList *sections_cache = NULL; + +static bool check_buffer(RBuffer *b) { + if (r_buf_size (b) > 4) { + ut32 buf; + r_buf_read_at (b, 0, (ut8 *)&buf, sizeof (buf)); + version = get_pyc_version (buf); + return version.magic != -1; + } + return false; +} + +static bool load_buffer(RBinFile *bf, void **bin_obj, RBuffer *buf, ut64 loadaddr, Sdb *sdb) { + return check_buffer (buf); +} + +static ut64 get_entrypoint(RBuffer *buf) { + ut8 b; + ut64 result; + for (int addr = 0x8; addr <= 0x10; addr += 0x4) { + r_buf_read_at (buf, addr, &b, sizeof (b)); + if (pyc_is_code (b, version.magic)) { + code_start_offset = addr; + r_buf_seek (buf, addr + 1, R_BUF_SET); + if ((result = get_code_object_addr (buf, version.magic)) == 0) { + return addr; + } + return result; + } + } + return 0; +} + +static RBinInfo *info(RBinFile *arch) { + RBinInfo *ret = R_NEW0 (RBinInfo); + if (!ret) { + return NULL; + } + ret->file = strdup (arch->file); + ret->type = r_str_newf ("Python %s byte-compiled file", version.version); + ret->bclass = strdup ("Python byte-compiled file"); + ret->rclass = strdup ("pyc"); + ret->arch = strdup ("pyc"); + ret->machine = r_str_newf ("Python %s VM (rev %s)", version.version, + version.revision); + ret->os = strdup ("any"); + ret->bits = version2double (version.version) < 3.6? 16: 8; + ret->cpu = strdup (version.version); // pass version info in cpu, Asm plugin will get it + return ret; +} + +static RList *sections(RBinFile *arch) { + return sections_cache; +} + +static RList *entries(RBinFile *arch) { + RList *entries = r_list_new (); + if (!entries) { + return NULL; + } + RBinAddr *addr = R_NEW0 (RBinAddr); + if (!addr) { + return NULL; + } + ut64 entrypoint = get_entrypoint (arch->buf); + addr->paddr = entrypoint; + addr->vaddr = entrypoint; + r_buf_seek (arch->buf, entrypoint, R_IO_SEEK_SET); + r_list_append (entries, addr); + return entries; +} + +static ut64 baddr(RBinFile *bf) { + return 0; +} + +static RList *symbols(RBinFile *arch) { + RList *shared = r_list_new (); + if (!shared) { + return NULL; + } + RList *cobjs = r_list_new (); + if (!cobjs) { + r_list_free (shared); + return NULL; + } + interned_table = r_list_new (); + if (!interned_table) { + r_list_free (shared); + r_list_free (cobjs); + return NULL; + } + r_list_append (shared, cobjs); + r_list_append (shared, interned_table); + arch->o->bin_obj = shared; + RList *sections = r_list_new (); + if (!sections) { + return NULL; + } + RList *symbols = r_list_new (); + if (!symbols) { + r_list_free (sections); + return NULL; + } + RBuffer *buffer = arch->buf; + r_buf_seek (buffer, code_start_offset, R_BUF_SET); + pyc_get_sections_symbols (sections, symbols, cobjs, buffer, version.magic); + sections_cache = sections; + return symbols; +} + +RBinPlugin r_bin_plugin_pyc = { + .name = "pyc", + .desc = "Python byte-compiled file plugin", + .license = "LGPL3", + .info = &info, + .load_buffer = &load_buffer, + .check_buffer = &check_buffer, + .entries = &entries, + .sections = §ions, + .baddr = &baddr, + .symbols = &symbols, +}; + +#ifndef R2_PLUGIN_INCORE +R_API RLibStruct radare_plugin = { + .type = R_LIB_TYPE_BIN, + .data = &r_bin_plugin_pyc, + .version = R2_VERSION, +}; +#endif diff --git a/libr/bin/p/pyc.mk b/libr/bin/p/pyc.mk new file mode 100644 index 0000000000..6cb3ce6873 --- /dev/null +++ b/libr/bin/p/pyc.mk @@ -0,0 +1,14 @@ +OBJ_PYC=bin_pyc.o +OBJ_PYC+=../format/pyc/marshal.o +OBJ_PYC+=../format/pyc/pyc_magic.o +OBJ_PYC+=../format/pyc/pyc.o + +STATIC_OBJ+=${OBJ_PYC} +TARGET_PYC=bin_pyc.${EXT_SO} +CFLAGS+=-I../format/pyc/ + +ALL_TARGETS+=${TARGET_PYC} + +${TARGET_PYC}: ${OBJ_PYC} + ${CC} ${CFLAGS} -o ${TARGET_PYC} ${OBJ_PYC} $(R2_CFLAGS) $(R2_LDFLAGS) -lr_util + diff --git a/libr/include/r_anal.h b/libr/include/r_anal.h index 3a0a67f944..a1b617d5f8 100644 --- a/libr/include/r_anal.h +++ b/libr/include/r_anal.h @@ -2068,6 +2068,7 @@ extern RAnalPlugin r_anal_plugin_xap; extern RAnalPlugin r_anal_plugin_xcore_cs; extern RAnalPlugin r_anal_plugin_xtensa; extern RAnalPlugin r_anal_plugin_z80; +extern RAnalPlugin r_anal_plugin_pyc; #ifdef __cplusplus } #endif diff --git a/libr/include/r_asm.h b/libr/include/r_asm.h index 18aab339ff..488da15161 100644 --- a/libr/include/r_asm.h +++ b/libr/include/r_asm.h @@ -267,6 +267,7 @@ extern RAsmPlugin r_asm_plugin_xap; extern RAsmPlugin r_asm_plugin_xcore_cs; extern RAsmPlugin r_asm_plugin_xtensa; extern RAsmPlugin r_asm_plugin_z80; +extern RAsmPlugin r_asm_plugin_pyc; #endif diff --git a/libr/include/r_bin.h b/libr/include/r_bin.h index 611c2f4117..83715e0c15 100644 --- a/libr/include/r_bin.h +++ b/libr/include/r_bin.h @@ -877,6 +877,7 @@ extern RBinPlugin r_bin_plugin_sfc; extern RBinPlugin r_bin_plugin_z64; extern RBinPlugin r_bin_plugin_prg; extern RBinPlugin r_bin_plugin_dmp64; +extern RBinPlugin r_bin_plugin_pyc; #ifdef __cplusplus } diff --git a/plugins.def.cfg b/plugins.def.cfg index f08ceb168b..0da6edabbc 100644 --- a/plugins.def.cfg +++ b/plugins.def.cfg @@ -51,6 +51,7 @@ anal.riscv_cs anal.pic anal.rsp anal.wasm +anal.pyc asm.8051 asm.6502 asm.6502_cs @@ -117,6 +118,7 @@ asm.v810 asm.vax asm.mcs96 asm.pic +asm.pyc bin.any bin.wasm bin.nro @@ -174,6 +176,7 @@ bin_xtr.xtr_sep64 bin_ldr.ldr_linux bin.zimg bin.psxexe +bin.pyc bp.arm bp.bf bp.mips diff --git a/sys/clang-format-diff.py b/sys/clang-format-diff.py index e575e817fd..c0e7ae0668 100755 --- a/sys/clang-format-diff.py +++ b/sys/clang-format-diff.py @@ -107,7 +107,7 @@ def main(): i = 0 while True: # stop iterating when finding the next diff - if lineidx + i >= len(input) or input[lineidx + i].startswith('diff'): + if lineidx + i >= len(input): break debug('lineidx : ' + input[lineidx + i]) @@ -124,7 +124,8 @@ def main(): debug('set range_end: ' + str(start_line + range_line)) lines_by_file.setdefault(filename, []).append([range_start, range_end - 1]) range_start, range_end = None, None - + if input[lineidx + i].startswith('diff'): + break i += 1 # Reformat files containing changes in place.