diff --git a/.gitignore b/.gitignore index 3fcb8c32fc..eae52c80d2 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,8 @@ libr/include/r_userconf.h libr/include/r_version.h libr/include/r_version.h.tmp shlr/capstone/ +shlr/tree-sitter/ +shlr/radare2-shell-parser/ shlr/java/out shlr/java/out.exe shlr/sdb/sdb @@ -119,4 +121,4 @@ doc/doxygen/html __pycache__ # Dynamic docker building folder to minimize docker context .docker_alpine -.ccls-cache/ \ No newline at end of file +.ccls-cache/ diff --git a/libr/core/Makefile b/libr/core/Makefile index ded857d4c3..c63df67e01 100644 --- a/libr/core/Makefile +++ b/libr/core/Makefile @@ -14,6 +14,7 @@ OBJS+=task.o panels.o pseudo.o vmarks.o anal_tp.o anal_objc.o blaze.o cundo.o OBJS+=esil_data_flow.o CFLAGS+=-I../../shlr/heap/include +CFLAGS+=-I../../shlr/tree-sitter/lib/include -I../../shlr/radare2-shell-parser/src/tree_parser CFLAGS+=-DR2_PLUGIN_INCORE -I../../shlr LDFLAGS+=${DL_LIBS} @@ -41,6 +42,7 @@ OBJS+=$(STATIC_OBJS) #STATIC_OBJS=$(subst ..,p/..,$(subst core_,p/core_,$(STATIC_OBJ))) include $(TOP)/shlr/gdb/deps.mk +include $(TOP)/shlr/radare2-shell-parser-deps.mk include $(LTOP)/rules.mk # include plugins diff --git a/libr/core/cconfig.c b/libr/core/cconfig.c index 2cecc602dd..bc1950d34d 100644 --- a/libr/core/cconfig.c +++ b/libr/core/cconfig.c @@ -2017,6 +2017,13 @@ static bool cb_scrhtml(void *user, void *data) { return true; } +static bool cb_newshell(void *user, void *data) { + RConfigNode *node = (RConfigNode *)data; + RCore *core = (RCore *)user; + core->use_tree_sitter_r2cmd = node->i_value; + return true; +} + static bool cb_scrhighlight(void *user, void *data) { RConfigNode *node = (RConfigNode *) data; r_cons_highlight (node->value); @@ -3151,6 +3158,7 @@ R_API int r_core_config_init(RCore *core) { SETCB ("cfg.sandbox", "false", &cb_cfgsanbox, "Sandbox mode disables systems and open on upper directories"); SETPREF ("cfg.wseek", "false", "Seek after write"); SETCB ("cfg.bigendian", "false", &cb_bigendian, "Use little (false) or big (true) endianness"); + SETCB ("cfg.newshell", "false", &cb_newshell, "Use new commands parser"); SETI ("cfg.cpuaffinity", 0, "Run on cpuid"); /* log */ diff --git a/libr/core/cmd.c b/libr/core/cmd.c index abb711f723..c251875b09 100644 --- a/libr/core/cmd.c +++ b/libr/core/cmd.c @@ -21,12 +21,15 @@ #include #include #include +#include #include #include #if __UNIX__ #include #endif +TSLanguage *tree_sitter_r2cmd (); + R_API void r_save_panels_layout(RCore *core, const char *_name); R_API void r_load_panels_layout(RCore *core, const char *_name); @@ -4336,7 +4339,136 @@ R_API void run_pending_anal(RCore *core) { } } +static inline bool is_ts_commands(TSNode node) { + return strcmp (ts_node_type (node), "commands") == 0; +} + +static inline bool is_ts_arged_command(TSNode node) { + return strcmp (ts_node_type (node), "arged_command") == 0; +} + +static inline bool is_ts_tmp_seek_command(TSNode node) { + return strcmp (ts_node_type (node), "tmp_seek_command") == 0; +} + +static inline bool is_ts_interpret_command(TSNode node) { + return strcmp (ts_node_type (node), "interpret_command") == 0; +} + +static bool handle_ts_command(RCore *core, const char *cstr, TSNode node, bool log); +static bool core_cmd_tsr2cmd(RCore *core, const char *cstr, bool log); + +static bool handle_ts_arged_command(RCore *core, const char *cstr, TSNode node) { + TSNode command = ts_node_named_child (node, 0); + ut32 cmd_start_byte = ts_node_start_byte (command); + ut32 cmd_end_byte = ts_node_end_byte (command); + R_LOG_DEBUG ("command: '%.*s'\n", cmd_end_byte - cmd_start_byte, cstr + cmd_start_byte); + + ut32 child_count = ts_node_child_count (node); + ut32 last_end_byte = cmd_end_byte; + int i; + for (i = 1; i < child_count; ++i) { + TSNode arg = ts_node_named_child (node, i); + ut32 start_byte = ts_node_start_byte (arg); + ut32 end_byte = ts_node_end_byte (arg); + if (last_end_byte < end_byte) { + last_end_byte = end_byte; + } + R_LOG_DEBUG ("arg: '%.*s'\n", end_byte - start_byte, cstr + start_byte); + } + char *cmd_string = r_str_newf ("%.*s", last_end_byte - cmd_start_byte, cstr + cmd_start_byte); + bool res = r_cmd_call (core->rcmd, cmd_string) != -1; + free (cmd_string); + return res; +} + +static bool handle_ts_tmp_seek_command(RCore *core, const char *cstr, TSNode node, bool log) { + TSNode command = ts_node_named_child (node, 0); + TSNode offset = ts_node_named_child (node, 1); + ut32 offset_start = ts_node_start_byte (offset); + ut32 offset_end = ts_node_end_byte (offset); + char *offset_string = r_str_newf ("%.*s", offset_end - offset_start, cstr + offset_start); + ut64 orig_offset = core->offset; + R_LOG_DEBUG ("tmp_seek command, command X on tmp_seek %s\n", offset_string); + r_core_seek (core, r_num_math (core->num, offset_string), 1); + bool res = handle_ts_command (core, cstr, command, log); + r_core_seek (core, orig_offset, 1); + free (offset_string); + return res; +} + +static bool handle_ts_interpret_command(RCore *core, const char *cstr, TSNode node, bool log) { + TSNode command = ts_node_named_child (node, 0); + ut32 command_start = ts_node_start_byte (command); + ut32 command_end = ts_node_end_byte (command); + char *cmd_string = r_str_newf ("%.*s", command_end - command_start, cstr + command_start); + char *str = r_core_cmd_str (core, cmd_string); + R_LOG_DEBUG ("interpret_command cmd_string = '%s', result to interpret = '%s'\n", cmd_string, str); + free (cmd_string); + bool res = core_cmd_tsr2cmd (core, str, log); + free (str); + return res; +} + +static bool handle_ts_command(RCore *core, const char *cstr, TSNode node, bool log) { + bool ret = false; + + if (log) { + r_line_hist_add (cstr); + } + if (is_ts_arged_command (node)) { + ret = handle_ts_arged_command (core, cstr, node); + } else if (is_ts_tmp_seek_command (node)) { + ret = handle_ts_tmp_seek_command (core, cstr, node, log); + } else if (is_ts_interpret_command (node)) { + ret = handle_ts_interpret_command (core, cstr, node, log); + } + /* run pending analysis commands */ + run_pending_anal (core); + return ret; +} + +static bool handle_ts_commands(RCore *core, const char *cstr, TSNode node, bool log) { + ut32 child_count = ts_node_named_child_count (node); + bool res = true; + int i; + + R_LOG_DEBUG ("commands with %d childs\n", child_count); + for (i = 0; i < child_count; ++i) { + TSNode command = ts_node_named_child (node, i); + res &= handle_ts_command (core, cstr, command, log); + if (!res) { + eprintf ("Error while parsing command: %s\n", cstr); + return false; + } + } + return res; +} + +static bool core_cmd_tsr2cmd(RCore *core, const char *cstr, bool log) { + TSParser *parser = ts_parser_new (); + + ts_parser_set_language (parser, tree_sitter_r2cmd ()); + + TSTree *tree = ts_parser_parse_string (parser, NULL, cstr, strlen (cstr)); + TSNode root = ts_tree_root_node (tree); + bool res = false; + if (is_ts_commands (root) && !ts_node_has_error (root)) { + res = handle_ts_commands (core, cstr, root, log); + } else { + eprintf ("Error while parsing command: `%s`\n", cstr); + } + + ts_tree_delete (tree); + ts_parser_delete (parser); + return res; +} + R_API int r_core_cmd(RCore *core, const char *cstr, int log) { + if (core->use_tree_sitter_r2cmd) { + return core_cmd_tsr2cmd (core, cstr, log)? 0: 1; + } + char *cmd, *ocmd, *ptr, *rcmd; int ret = false, i; diff --git a/libr/core/core.c b/libr/core/core.c index 2d3d003e13..c2aa077bd9 100644 --- a/libr/core/core.c +++ b/libr/core/core.c @@ -2642,6 +2642,7 @@ R_API bool r_core_init(RCore *core) { core->incomment = false; core->config = NULL; core->http_up = false; + core->use_tree_sitter_r2cmd = false; ZERO_FILL (core->root_cmd_descriptor); core->print = r_print_new (); core->ropchain = r_list_newf ((RListFree)free); diff --git a/libr/core/meson.build b/libr/core/meson.build index bdf7e9a728..ddefabebf3 100644 --- a/libr/core/meson.build +++ b/libr/core/meson.build @@ -77,7 +77,6 @@ else endif r_core_inc = [platform_inc, include_directories(r_core_inc)] - r_core_deps = [ r_util_dep, r_reg_dep, @@ -103,7 +102,8 @@ r_core_deps = [ platform_deps, spp_dep, gdb_dep, - java_dep + java_dep, + shell_parser_dep ] if use_libuv diff --git a/libr/include/r_core.h b/libr/include/r_core.h index 1127cf72c3..ec71f428e8 100644 --- a/libr/include/r_core.h +++ b/libr/include/r_core.h @@ -333,6 +333,7 @@ typedef struct r_core_t { bool scr_gadgets; bool log_events; // core.c:cb_event_handler : log actions from events if cfg.log.events is set RList *ropchain; + bool use_tree_sitter_r2cmd; RMainCallback r_main_radare2; // int (*r_main_radare2)(int argc, char **argv); diff --git a/meson_options.txt b/meson_options.txt index a1d5169a8d..d8076d16f7 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -30,3 +30,6 @@ option('use_libuv', type: 'boolean', value: true) option('debugger', type: 'boolean', value: true) option('use_webui', type: 'boolean', value: false, description: 'install different WebUIs for radare2') + +option('shell_parser_in_builddir', type: 'boolean', value: true, description: 'When true, radare2-shell-parser is downloaded in the build directory') +option('tree_sitter_in_builddir', type: 'boolean', value: true, description: 'When true, tree-sitter is downloaded in the build directory') diff --git a/shlr/Makefile b/shlr/Makefile index 0259be2774..771fb745b7 100644 --- a/shlr/Makefile +++ b/shlr/Makefile @@ -26,6 +26,16 @@ else WGET?=curl -o endif +# NOTE: when you update TS_TIP or TS_BRA, also update them in shlr/meson.build +TS_URL=https://github.com/tree-sitter/tree-sitter.git +TS_BRA=master +TS_TIP=80008b0bccbddffc8e68f66a5f173ef71fd125e3 + +# NOTE: when you update SHELLPARSER_TIP or SHELLPARSER_BRA, also update them in shlr/meson.build +SHELLPARSER_URL=https://github.com/ret2libc/radare2-shell-parser.git +SHELLPARSER_BRA=master +SHELLPARSER_TIP=dfb12492f4052b5a6c64fc626e9bf65ccb7b5522 + ifeq ($(CS_RELEASE),1) CS_VER=4.0.1 CS_TAR=https://codeload.github.com/aquynh/capstone/tar.gz/$(CS_VER) @@ -37,6 +47,7 @@ CS_URL_BASE=github.com/aquynh/capstone CS_URL=$(GIT_PREFIX)$(CS_URL_BASE).git CS_ARCHIVE=https://$(CS_URL_BASE)/archive CS_UPD=20190515 +# NOTE: when you update CS_TIP or CS_BRA, also update them in shlr/meson.build ifeq ($(CS_NEXT),1) CS_TIP=5809774f62847e6755aa054746822ce32e369b3a CS_BRA=next @@ -49,15 +60,12 @@ CS_ARCHIVE_URL=$(CS_ARCHIVE)/$(CS_TIP).zip else CS_ARCHIVE_URL= endif -# NOTE: when you update CS_TIP or CS_BRA, also update them in shlr/meson.build -#CS_BRA=next -#CS_TIP=38607453f3de85733f9604dffc27778db3b53766 # REVERT THIS COMMIT BECAUSE ITS WRONG CS_REV= CS_PATCHES=1 endif -.PHONY: capstone-sync capstone-build all clean mrproper libgdbr libwindbg bochs +.PHONY: capstone-sync capstone-build all clean mrproper libgdbr libwindbg bochs tree-sitter-sync radare2-shell-parser-sync HOST_CC?=gcc SHLR?=$(shell pwd) @@ -89,7 +97,7 @@ all: exit 1 endif -preall: targets libwindbg capstone-build bochs +preall: targets libwindbg capstone-build tree-sitter-build radare2-shell-parser-build bochs @for MOD in ${MODS} ; do \ echo $(MAKE) -C $$MOD ; \ $(MAKE) -C $$MOD HAVE_VALA= ROOT="${PWD}/../" CC="${CC}" ; \ @@ -321,6 +329,31 @@ else cd ../../radare2-webui/www/m && git pull ; npm i ; $(MAKE) release endif +tree-sitter-build: tree-sitter/libtree-sitter.$(EXT_AR) + +tree-sitter/libtree-sitter.$(EXT_AR): tree-sitter/lib/src/lib.o + $(AR) rvs $@ $< + $(RANLIB) $@ + +tree-sitter/lib/src/lib.o: tree-sitter-sync + $(CC) -c tree-sitter/lib/src/lib.c -o $@ -Itree-sitter/lib/include -Itree-sitter/lib/src $(CFLAGS) + +tree-sitter-sync: + "$(SHELL)" clone_3rd_repo.sh tree-sitter "${TS_URL}" "${TS_BRA}" "${TS_TIP}" + +radare2-shell-parser-build: radare2-shell-parser/libshell-parser.$(EXT_AR) + +radare2-shell-parser/libshell-parser.$(EXT_AR): radare2-shell-parser/src/parser.o + $(AR) rvs $@ $< + $(RANLIB) $@ + +radare2-shell-parser/src/parser.o: radare2-shell-parser-sync + $(CC) -c radare2-shell-parser/src/parser.c -o $@ -Iradare2-shell-parser/src/tree_sitter -Itree-sitter/lib/include $(CFLAGS) + +radare2-shell-parser-sync: tree-sitter-sync + "$(SHELL)" clone_3rd_repo.sh radare2-shell-parser "${SHELLPARSER_URL}" "${SHELLPARSER_BRA}" "${SHELLPARSER_TIP}" + + www-sync-m sync-www-m: ../../radare2-webui/dist/m cp -rf ../../radare2-webui/dist/m www/m.tmp rm -rf www/m @@ -361,6 +394,8 @@ SHLRS+=grub/libgrubfs.a SHLRS+=java/libr_java.a SHLRS+=lz4/liblz4.a SHLRS+=qnx/lib/libqnxr.a +SHLRS+=tree-sitter/libtree-sitter.a +SHLRS+=radare2-shell-parser/libshell-parser.a #SHLRS+=sdb/src/libsdb.a #SHLRS+=tcc/libr_tcc.a SHLRS+=windbg/libr_windbg.a diff --git a/shlr/clone_3rd_repo.sh b/shlr/clone_3rd_repo.sh new file mode 100644 index 0000000000..f73bf34b95 --- /dev/null +++ b/shlr/clone_3rd_repo.sh @@ -0,0 +1,43 @@ +#!/bin/sh +REPONAME="$1" # repository name +URL="$2" # url +BRA="$3" # branch name +TIP="$4" # commit id + +git_assert() { + git --help > /dev/null 2>&1 + if [ $? != 0 ]; then + echo "ERROR: Cannot find git command in PATH" + if [ "$1" = check ]; then + return 1 + fi + exit 1 + fi + return 0 +} + +fatal_msg() { + echo "[${REPONAME}] $1" + exit 1 +} + +git_clone() { + git_assert + echo "[${REPONAME}] Cloning ${REPONAME} from git..." >&2 + git clone --quiet --single-branch --branch "${BRA}" "${URL}" "${REPONAME}" \ + || fatal_msg "Cannot clone $REPONAME from git" + cd "${REPONAME}" && git checkout --quiet "$TIP" || fatal_msg "Cannot checkout $TIP" +} + +get_repo() { + git_clone || fatal_msg 'Clone failed' +} + +### MAIN ### + +if [ -d "$REPONAME" ]; then + echo "[${REPONAME}] Nothing to do" + exit 0 +fi +git_assert +get_repo diff --git a/shlr/meson.build b/shlr/meson.build index 5c4aa4dd10..35788da61f 100644 --- a/shlr/meson.build +++ b/shlr/meson.build @@ -233,6 +233,102 @@ sdb_gen_cmd = [ ] +# handle tree-sitter dependency +if get_option('tree_sitter_in_builddir') + tree_sitter_path = join_paths(meson.current_build_dir(), 'tree-sitter') +else + tree_sitter_path = join_paths(meson.current_source_dir(), 'tree-sitter') +endif +res = run_command(py3_exe, '-c', '__import__("sys").exit(__import__("os").path.exists("@0@"))'.format(tree_sitter_path)) +if res.returncode() == 0 + if not git_exe.found() + error('Cannot load tree-sitter library. Either provide tree-sitter in ./shlr/tree-sitter or install git, so it can be downloaded') + endif + + # NOTE: when you update TS_TIP or TS_BRA, also update them in shlr/Makefile + TS_TIP = '80008b0bccbddffc8e68f66a5f173ef71fd125e3' + TS_BRA = 'master' + + message('Cloning tree-sitter ' + TS_BRA + ' branch, commit ' + TS_TIP + ', into ' + tree_sitter_path) + git_cmd = 'clone -b @0@ https://github.com/tree-sitter/tree-sitter.git @1@'.format(TS_BRA, tree_sitter_path) + clone_cmd = run_command(git_exe, git_cmd.split()) + if clone_cmd.returncode() != 0 + error('Cannot execute git clone command') + endif + + reset_cmd_str = '-C @0@ reset --hard @1@'.format(tree_sitter_path, TS_TIP) + reset_cmd = run_command(git_exe, reset_cmd_str.split()) + if reset_cmd.returncode() != 0 + error('Cannot execute git reset command') + endif +endif + +tree_sitter_files = [ + join_paths(tree_sitter_path, 'lib/src/lib.c'), +] + +tree_sitter_inc = [platform_inc, include_directories('tree-sitter/lib/src'), include_directories('tree-sitter/lib/include')] + +libtree_sitter = static_library('tree_sitter', tree_sitter_files, + include_directories: tree_sitter_inc, + implicit_include_directories: false +) + +tree_sitter_dep = declare_dependency( + link_with: libtree_sitter, + include_directories: tree_sitter_inc +) + + +# handle radare2-shell-parser dependency +if get_option('shell_parser_in_builddir') + shell_parser_path = join_paths(meson.current_build_dir(), 'radare2-shell-parser') +else + shell_parser_path = join_paths(meson.current_source_dir(), 'radare2-shell-parser') +endif +res = run_command(py3_exe, '-c', '__import__("sys").exit(__import__("os").path.exists("@0@"))'.format(shell_parser_path)) +if res.returncode() == 0 + if not git_exe.found() + error('Cannot load radare2-shell-parser library. Either provide radare2-shell-parser in ./shlr/radare2-shell-parser or install git, so it can be downloaded') + endif + + # NOTE: when you update SHELLPARSER_TIP or SHELLPARSER_BRA, also update them in shlr/Makefile + SHELLPARSER_TIP = 'dfb12492f4052b5a6c64fc626e9bf65ccb7b5522' + SHELLPARSER_BRA = 'master' + shell_parser_user = 'ret2libc' + + message('Cloning radare2-shell-parser ' + SHELLPARSER_BRA + ' branch, commit ' + SHELLPARSER_TIP + ', into ' + shell_parser_path) + git_cmd = 'clone -b @0@ https://github.com/@1@/radare2-shell-parser.git @2@'.format(SHELLPARSER_BRA, shell_parser_user, shell_parser_path) + clone_cmd = run_command(git_exe, git_cmd.split()) + if clone_cmd.returncode() != 0 + error('Cannot execute git clone command') + endif + + reset_cmd_str = '-C @0@ reset --hard @1@'.format(shell_parser_path, SHELLPARSER_TIP) + reset_cmd = run_command(git_exe, reset_cmd_str.split()) + if reset_cmd.returncode() != 0 + error('Cannot execute git reset command') + endif +endif + +shell_parser_files = [ + join_paths(shell_parser_path, 'src/parser.c'), +] + +shell_parser_inc = [platform_inc, include_directories('radare2-shell-parser/src/tree_sitter')] + +libshell_parser = static_library('shell_parser', shell_parser_files, + include_directories: shell_parser_inc + tree_sitter_inc, + implicit_include_directories: true +) + +shell_parser_dep = declare_dependency( + link_with: libshell_parser, + include_directories: shell_parser_inc, + dependencies: tree_sitter_dep +) + + # handle bochs dependency bochs_files = [ 'bochs/src/libbochs.c' diff --git a/shlr/radare2-shell-parser-deps.mk b/shlr/radare2-shell-parser-deps.mk new file mode 100644 index 0000000000..8893abdc25 --- /dev/null +++ b/shlr/radare2-shell-parser-deps.mk @@ -0,0 +1,2 @@ +LINK+=$(STOP)/radare2-shell-parser/libshell-parser.$(EXT_AR) +LINK+=$(STOP)/tree-sitter/libtree-sitter.$(EXT_AR)