Very simple tree sitter integration (#15414) ##command

* tree-sitter-integration: support tmp-seek, arged, and interpret commands
* Make meson automatically download tree-sitter and radare2-shell-parser grammar
* Add tree-sitter/radare2-shell-parser directories in gitignore
* Add Support for tree-sitter in acr/makefile
* Just use one script to download 3rd party repositories in shlr
* Use cfg.newshell
This commit is contained in:
Riccardo Schirone 2019-11-06 10:33:23 +01:00 committed by GitHub
parent 88c848cc37
commit 4ebd400fa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 333 additions and 8 deletions

2
.gitignore vendored
View File

@ -56,6 +56,8 @@ libr/include/r_userconf.h
libr/include/r_version.h
libr/include/r_version.h.tmp
shlr/capstone/
shlr/tree-sitter/
shlr/radare2-shell-parser/
shlr/java/out
shlr/java/out.exe
shlr/sdb/sdb

View File

@ -14,6 +14,7 @@ OBJS+=task.o panels.o pseudo.o vmarks.o anal_tp.o anal_objc.o blaze.o cundo.o
OBJS+=esil_data_flow.o
CFLAGS+=-I../../shlr/heap/include
CFLAGS+=-I../../shlr/tree-sitter/lib/include -I../../shlr/radare2-shell-parser/src/tree_parser
CFLAGS+=-DR2_PLUGIN_INCORE -I../../shlr
LDFLAGS+=${DL_LIBS}
@ -41,6 +42,7 @@ OBJS+=$(STATIC_OBJS)
#STATIC_OBJS=$(subst ..,p/..,$(subst core_,p/core_,$(STATIC_OBJ)))
include $(TOP)/shlr/gdb/deps.mk
include $(TOP)/shlr/radare2-shell-parser-deps.mk
include $(LTOP)/rules.mk
# include plugins

View File

@ -2017,6 +2017,13 @@ static bool cb_scrhtml(void *user, void *data) {
return true;
}
static bool cb_newshell(void *user, void *data) {
RConfigNode *node = (RConfigNode *)data;
RCore *core = (RCore *)user;
core->use_tree_sitter_r2cmd = node->i_value;
return true;
}
static bool cb_scrhighlight(void *user, void *data) {
RConfigNode *node = (RConfigNode *) data;
r_cons_highlight (node->value);
@ -3151,6 +3158,7 @@ R_API int r_core_config_init(RCore *core) {
SETCB ("cfg.sandbox", "false", &cb_cfgsanbox, "Sandbox mode disables systems and open on upper directories");
SETPREF ("cfg.wseek", "false", "Seek after write");
SETCB ("cfg.bigendian", "false", &cb_bigendian, "Use little (false) or big (true) endianness");
SETCB ("cfg.newshell", "false", &cb_newshell, "Use new commands parser");
SETI ("cfg.cpuaffinity", 0, "Run on cpuid");
/* log */

View File

@ -21,12 +21,15 @@
#include <r_cmd.h>
#include <stdint.h>
#include <sys/types.h>
#include <tree_sitter/api.h>
#include <ctype.h>
#include <stdarg.h>
#if __UNIX__
#include <sys/utsname.h>
#endif
TSLanguage *tree_sitter_r2cmd ();
R_API void r_save_panels_layout(RCore *core, const char *_name);
R_API void r_load_panels_layout(RCore *core, const char *_name);
@ -4336,7 +4339,136 @@ R_API void run_pending_anal(RCore *core) {
}
}
static inline bool is_ts_commands(TSNode node) {
return strcmp (ts_node_type (node), "commands") == 0;
}
static inline bool is_ts_arged_command(TSNode node) {
return strcmp (ts_node_type (node), "arged_command") == 0;
}
static inline bool is_ts_tmp_seek_command(TSNode node) {
return strcmp (ts_node_type (node), "tmp_seek_command") == 0;
}
static inline bool is_ts_interpret_command(TSNode node) {
return strcmp (ts_node_type (node), "interpret_command") == 0;
}
static bool handle_ts_command(RCore *core, const char *cstr, TSNode node, bool log);
static bool core_cmd_tsr2cmd(RCore *core, const char *cstr, bool log);
static bool handle_ts_arged_command(RCore *core, const char *cstr, TSNode node) {
TSNode command = ts_node_named_child (node, 0);
ut32 cmd_start_byte = ts_node_start_byte (command);
ut32 cmd_end_byte = ts_node_end_byte (command);
R_LOG_DEBUG ("command: '%.*s'\n", cmd_end_byte - cmd_start_byte, cstr + cmd_start_byte);
ut32 child_count = ts_node_child_count (node);
ut32 last_end_byte = cmd_end_byte;
int i;
for (i = 1; i < child_count; ++i) {
TSNode arg = ts_node_named_child (node, i);
ut32 start_byte = ts_node_start_byte (arg);
ut32 end_byte = ts_node_end_byte (arg);
if (last_end_byte < end_byte) {
last_end_byte = end_byte;
}
R_LOG_DEBUG ("arg: '%.*s'\n", end_byte - start_byte, cstr + start_byte);
}
char *cmd_string = r_str_newf ("%.*s", last_end_byte - cmd_start_byte, cstr + cmd_start_byte);
bool res = r_cmd_call (core->rcmd, cmd_string) != -1;
free (cmd_string);
return res;
}
static bool handle_ts_tmp_seek_command(RCore *core, const char *cstr, TSNode node, bool log) {
TSNode command = ts_node_named_child (node, 0);
TSNode offset = ts_node_named_child (node, 1);
ut32 offset_start = ts_node_start_byte (offset);
ut32 offset_end = ts_node_end_byte (offset);
char *offset_string = r_str_newf ("%.*s", offset_end - offset_start, cstr + offset_start);
ut64 orig_offset = core->offset;
R_LOG_DEBUG ("tmp_seek command, command X on tmp_seek %s\n", offset_string);
r_core_seek (core, r_num_math (core->num, offset_string), 1);
bool res = handle_ts_command (core, cstr, command, log);
r_core_seek (core, orig_offset, 1);
free (offset_string);
return res;
}
static bool handle_ts_interpret_command(RCore *core, const char *cstr, TSNode node, bool log) {
TSNode command = ts_node_named_child (node, 0);
ut32 command_start = ts_node_start_byte (command);
ut32 command_end = ts_node_end_byte (command);
char *cmd_string = r_str_newf ("%.*s", command_end - command_start, cstr + command_start);
char *str = r_core_cmd_str (core, cmd_string);
R_LOG_DEBUG ("interpret_command cmd_string = '%s', result to interpret = '%s'\n", cmd_string, str);
free (cmd_string);
bool res = core_cmd_tsr2cmd (core, str, log);
free (str);
return res;
}
static bool handle_ts_command(RCore *core, const char *cstr, TSNode node, bool log) {
bool ret = false;
if (log) {
r_line_hist_add (cstr);
}
if (is_ts_arged_command (node)) {
ret = handle_ts_arged_command (core, cstr, node);
} else if (is_ts_tmp_seek_command (node)) {
ret = handle_ts_tmp_seek_command (core, cstr, node, log);
} else if (is_ts_interpret_command (node)) {
ret = handle_ts_interpret_command (core, cstr, node, log);
}
/* run pending analysis commands */
run_pending_anal (core);
return ret;
}
static bool handle_ts_commands(RCore *core, const char *cstr, TSNode node, bool log) {
ut32 child_count = ts_node_named_child_count (node);
bool res = true;
int i;
R_LOG_DEBUG ("commands with %d childs\n", child_count);
for (i = 0; i < child_count; ++i) {
TSNode command = ts_node_named_child (node, i);
res &= handle_ts_command (core, cstr, command, log);
if (!res) {
eprintf ("Error while parsing command: %s\n", cstr);
return false;
}
}
return res;
}
static bool core_cmd_tsr2cmd(RCore *core, const char *cstr, bool log) {
TSParser *parser = ts_parser_new ();
ts_parser_set_language (parser, tree_sitter_r2cmd ());
TSTree *tree = ts_parser_parse_string (parser, NULL, cstr, strlen (cstr));
TSNode root = ts_tree_root_node (tree);
bool res = false;
if (is_ts_commands (root) && !ts_node_has_error (root)) {
res = handle_ts_commands (core, cstr, root, log);
} else {
eprintf ("Error while parsing command: `%s`\n", cstr);
}
ts_tree_delete (tree);
ts_parser_delete (parser);
return res;
}
R_API int r_core_cmd(RCore *core, const char *cstr, int log) {
if (core->use_tree_sitter_r2cmd) {
return core_cmd_tsr2cmd (core, cstr, log)? 0: 1;
}
char *cmd, *ocmd, *ptr, *rcmd;
int ret = false, i;

View File

@ -2642,6 +2642,7 @@ R_API bool r_core_init(RCore *core) {
core->incomment = false;
core->config = NULL;
core->http_up = false;
core->use_tree_sitter_r2cmd = false;
ZERO_FILL (core->root_cmd_descriptor);
core->print = r_print_new ();
core->ropchain = r_list_newf ((RListFree)free);

View File

@ -77,7 +77,6 @@ else
endif
r_core_inc = [platform_inc, include_directories(r_core_inc)]
r_core_deps = [
r_util_dep,
r_reg_dep,
@ -103,7 +102,8 @@ r_core_deps = [
platform_deps,
spp_dep,
gdb_dep,
java_dep
java_dep,
shell_parser_dep
]
if use_libuv

View File

@ -333,6 +333,7 @@ typedef struct r_core_t {
bool scr_gadgets;
bool log_events; // core.c:cb_event_handler : log actions from events if cfg.log.events is set
RList *ropchain;
bool use_tree_sitter_r2cmd;
RMainCallback r_main_radare2;
// int (*r_main_radare2)(int argc, char **argv);

View File

@ -30,3 +30,6 @@ option('use_libuv', type: 'boolean', value: true)
option('debugger', type: 'boolean', value: true)
option('use_webui', type: 'boolean', value: false, description: 'install different WebUIs for radare2')
option('shell_parser_in_builddir', type: 'boolean', value: true, description: 'When true, radare2-shell-parser is downloaded in the build directory')
option('tree_sitter_in_builddir', type: 'boolean', value: true, description: 'When true, tree-sitter is downloaded in the build directory')

View File

@ -26,6 +26,16 @@ else
WGET?=curl -o
endif
# NOTE: when you update TS_TIP or TS_BRA, also update them in shlr/meson.build
TS_URL=https://github.com/tree-sitter/tree-sitter.git
TS_BRA=master
TS_TIP=80008b0bccbddffc8e68f66a5f173ef71fd125e3
# NOTE: when you update SHELLPARSER_TIP or SHELLPARSER_BRA, also update them in shlr/meson.build
SHELLPARSER_URL=https://github.com/ret2libc/radare2-shell-parser.git
SHELLPARSER_BRA=master
SHELLPARSER_TIP=dfb12492f4052b5a6c64fc626e9bf65ccb7b5522
ifeq ($(CS_RELEASE),1)
CS_VER=4.0.1
CS_TAR=https://codeload.github.com/aquynh/capstone/tar.gz/$(CS_VER)
@ -37,6 +47,7 @@ CS_URL_BASE=github.com/aquynh/capstone
CS_URL=$(GIT_PREFIX)$(CS_URL_BASE).git
CS_ARCHIVE=https://$(CS_URL_BASE)/archive
CS_UPD=20190515
# NOTE: when you update CS_TIP or CS_BRA, also update them in shlr/meson.build
ifeq ($(CS_NEXT),1)
CS_TIP=5809774f62847e6755aa054746822ce32e369b3a
CS_BRA=next
@ -49,15 +60,12 @@ CS_ARCHIVE_URL=$(CS_ARCHIVE)/$(CS_TIP).zip
else
CS_ARCHIVE_URL=
endif
# NOTE: when you update CS_TIP or CS_BRA, also update them in shlr/meson.build
#CS_BRA=next
#CS_TIP=38607453f3de85733f9604dffc27778db3b53766
# REVERT THIS COMMIT BECAUSE ITS WRONG
CS_REV=
CS_PATCHES=1
endif
.PHONY: capstone-sync capstone-build all clean mrproper libgdbr libwindbg bochs
.PHONY: capstone-sync capstone-build all clean mrproper libgdbr libwindbg bochs tree-sitter-sync radare2-shell-parser-sync
HOST_CC?=gcc
SHLR?=$(shell pwd)
@ -89,7 +97,7 @@ all:
exit 1
endif
preall: targets libwindbg capstone-build bochs
preall: targets libwindbg capstone-build tree-sitter-build radare2-shell-parser-build bochs
@for MOD in ${MODS} ; do \
echo $(MAKE) -C $$MOD ; \
$(MAKE) -C $$MOD HAVE_VALA= ROOT="${PWD}/../" CC="${CC}" ; \
@ -321,6 +329,31 @@ else
cd ../../radare2-webui/www/m && git pull ; npm i ; $(MAKE) release
endif
tree-sitter-build: tree-sitter/libtree-sitter.$(EXT_AR)
tree-sitter/libtree-sitter.$(EXT_AR): tree-sitter/lib/src/lib.o
$(AR) rvs $@ $<
$(RANLIB) $@
tree-sitter/lib/src/lib.o: tree-sitter-sync
$(CC) -c tree-sitter/lib/src/lib.c -o $@ -Itree-sitter/lib/include -Itree-sitter/lib/src $(CFLAGS)
tree-sitter-sync:
"$(SHELL)" clone_3rd_repo.sh tree-sitter "${TS_URL}" "${TS_BRA}" "${TS_TIP}"
radare2-shell-parser-build: radare2-shell-parser/libshell-parser.$(EXT_AR)
radare2-shell-parser/libshell-parser.$(EXT_AR): radare2-shell-parser/src/parser.o
$(AR) rvs $@ $<
$(RANLIB) $@
radare2-shell-parser/src/parser.o: radare2-shell-parser-sync
$(CC) -c radare2-shell-parser/src/parser.c -o $@ -Iradare2-shell-parser/src/tree_sitter -Itree-sitter/lib/include $(CFLAGS)
radare2-shell-parser-sync: tree-sitter-sync
"$(SHELL)" clone_3rd_repo.sh radare2-shell-parser "${SHELLPARSER_URL}" "${SHELLPARSER_BRA}" "${SHELLPARSER_TIP}"
www-sync-m sync-www-m: ../../radare2-webui/dist/m
cp -rf ../../radare2-webui/dist/m www/m.tmp
rm -rf www/m
@ -361,6 +394,8 @@ SHLRS+=grub/libgrubfs.a
SHLRS+=java/libr_java.a
SHLRS+=lz4/liblz4.a
SHLRS+=qnx/lib/libqnxr.a
SHLRS+=tree-sitter/libtree-sitter.a
SHLRS+=radare2-shell-parser/libshell-parser.a
#SHLRS+=sdb/src/libsdb.a
#SHLRS+=tcc/libr_tcc.a
SHLRS+=windbg/libr_windbg.a

43
shlr/clone_3rd_repo.sh Normal file
View File

@ -0,0 +1,43 @@
#!/bin/sh
REPONAME="$1" # repository name
URL="$2" # url
BRA="$3" # branch name
TIP="$4" # commit id
git_assert() {
git --help > /dev/null 2>&1
if [ $? != 0 ]; then
echo "ERROR: Cannot find git command in PATH"
if [ "$1" = check ]; then
return 1
fi
exit 1
fi
return 0
}
fatal_msg() {
echo "[${REPONAME}] $1"
exit 1
}
git_clone() {
git_assert
echo "[${REPONAME}] Cloning ${REPONAME} from git..." >&2
git clone --quiet --single-branch --branch "${BRA}" "${URL}" "${REPONAME}" \
|| fatal_msg "Cannot clone $REPONAME from git"
cd "${REPONAME}" && git checkout --quiet "$TIP" || fatal_msg "Cannot checkout $TIP"
}
get_repo() {
git_clone || fatal_msg 'Clone failed'
}
### MAIN ###
if [ -d "$REPONAME" ]; then
echo "[${REPONAME}] Nothing to do"
exit 0
fi
git_assert
get_repo

View File

@ -233,6 +233,102 @@ sdb_gen_cmd = [
]
# handle tree-sitter dependency
if get_option('tree_sitter_in_builddir')
tree_sitter_path = join_paths(meson.current_build_dir(), 'tree-sitter')
else
tree_sitter_path = join_paths(meson.current_source_dir(), 'tree-sitter')
endif
res = run_command(py3_exe, '-c', '__import__("sys").exit(__import__("os").path.exists("@0@"))'.format(tree_sitter_path))
if res.returncode() == 0
if not git_exe.found()
error('Cannot load tree-sitter library. Either provide tree-sitter in ./shlr/tree-sitter or install git, so it can be downloaded')
endif
# NOTE: when you update TS_TIP or TS_BRA, also update them in shlr/Makefile
TS_TIP = '80008b0bccbddffc8e68f66a5f173ef71fd125e3'
TS_BRA = 'master'
message('Cloning tree-sitter ' + TS_BRA + ' branch, commit ' + TS_TIP + ', into ' + tree_sitter_path)
git_cmd = 'clone -b @0@ https://github.com/tree-sitter/tree-sitter.git @1@'.format(TS_BRA, tree_sitter_path)
clone_cmd = run_command(git_exe, git_cmd.split())
if clone_cmd.returncode() != 0
error('Cannot execute git clone command')
endif
reset_cmd_str = '-C @0@ reset --hard @1@'.format(tree_sitter_path, TS_TIP)
reset_cmd = run_command(git_exe, reset_cmd_str.split())
if reset_cmd.returncode() != 0
error('Cannot execute git reset command')
endif
endif
tree_sitter_files = [
join_paths(tree_sitter_path, 'lib/src/lib.c'),
]
tree_sitter_inc = [platform_inc, include_directories('tree-sitter/lib/src'), include_directories('tree-sitter/lib/include')]
libtree_sitter = static_library('tree_sitter', tree_sitter_files,
include_directories: tree_sitter_inc,
implicit_include_directories: false
)
tree_sitter_dep = declare_dependency(
link_with: libtree_sitter,
include_directories: tree_sitter_inc
)
# handle radare2-shell-parser dependency
if get_option('shell_parser_in_builddir')
shell_parser_path = join_paths(meson.current_build_dir(), 'radare2-shell-parser')
else
shell_parser_path = join_paths(meson.current_source_dir(), 'radare2-shell-parser')
endif
res = run_command(py3_exe, '-c', '__import__("sys").exit(__import__("os").path.exists("@0@"))'.format(shell_parser_path))
if res.returncode() == 0
if not git_exe.found()
error('Cannot load radare2-shell-parser library. Either provide radare2-shell-parser in ./shlr/radare2-shell-parser or install git, so it can be downloaded')
endif
# NOTE: when you update SHELLPARSER_TIP or SHELLPARSER_BRA, also update them in shlr/Makefile
SHELLPARSER_TIP = 'dfb12492f4052b5a6c64fc626e9bf65ccb7b5522'
SHELLPARSER_BRA = 'master'
shell_parser_user = 'ret2libc'
message('Cloning radare2-shell-parser ' + SHELLPARSER_BRA + ' branch, commit ' + SHELLPARSER_TIP + ', into ' + shell_parser_path)
git_cmd = 'clone -b @0@ https://github.com/@1@/radare2-shell-parser.git @2@'.format(SHELLPARSER_BRA, shell_parser_user, shell_parser_path)
clone_cmd = run_command(git_exe, git_cmd.split())
if clone_cmd.returncode() != 0
error('Cannot execute git clone command')
endif
reset_cmd_str = '-C @0@ reset --hard @1@'.format(shell_parser_path, SHELLPARSER_TIP)
reset_cmd = run_command(git_exe, reset_cmd_str.split())
if reset_cmd.returncode() != 0
error('Cannot execute git reset command')
endif
endif
shell_parser_files = [
join_paths(shell_parser_path, 'src/parser.c'),
]
shell_parser_inc = [platform_inc, include_directories('radare2-shell-parser/src/tree_sitter')]
libshell_parser = static_library('shell_parser', shell_parser_files,
include_directories: shell_parser_inc + tree_sitter_inc,
implicit_include_directories: true
)
shell_parser_dep = declare_dependency(
link_with: libshell_parser,
include_directories: shell_parser_inc,
dependencies: tree_sitter_dep
)
# handle bochs dependency
bochs_files = [
'bochs/src/libbochs.c'

View File

@ -0,0 +1,2 @@
LINK+=$(STOP)/radare2-shell-parser/libshell-parser.$(EXT_AR)
LINK+=$(STOP)/tree-sitter/libtree-sitter.$(EXT_AR)