From 60ae40fa5c4956f7c38c0da2e936b5429aeee38a Mon Sep 17 00:00:00 2001 From: GustavoLCR Date: Sun, 22 Sep 2019 18:58:42 -0300 Subject: [PATCH] Fix #5446 - Implement msvc RTTI demangling (#15086) * Implement demanging msvc template parameters * Add demangle func pointer in RBinBind struct and use it in avrD * Extra fixes in r_bin_demangle_list and r_bin_demangle_plugin --- libr/anal/rtti.c | 2 +- libr/anal/rtti_msvc.c | 34 ++--- libr/bin/bin.c | 1 + libr/bin/demangle.c | 4 +- libr/bin/mangling/demangler.c | 1 + libr/bin/mangling/microsoft_demangle.c | 167 ++++++++++++++++++++++++- libr/include/r_anal.h | 2 +- libr/include/r_bin.h | 2 + 8 files changed, 185 insertions(+), 28 deletions(-) diff --git a/libr/anal/rtti.c b/libr/anal/rtti.c index e4e352ed0e..f6b322173a 100644 --- a/libr/anal/rtti.c +++ b/libr/anal/rtti.c @@ -6,7 +6,7 @@ R_API char *r_anal_rtti_demangle_class_name(RAnal *anal, const char *name) { RVTableContext context; r_anal_vtable_begin (anal, &context); if (context.abi == R_ANAL_CPP_ABI_MSVC) { - return r_anal_rtti_msvc_demangle_class_name (name); + return r_anal_rtti_msvc_demangle_class_name (&context, name); } // TODO: implement class name demangling for itanium return NULL; diff --git a/libr/anal/rtti_msvc.c b/libr/anal/rtti_msvc.c index 7a5106f8e5..6db4cd798a 100644 --- a/libr/anal/rtti_msvc.c +++ b/libr/anal/rtti_msvc.c @@ -377,7 +377,7 @@ static void rtti_msvc_print_base_class_descriptor_json(rtti_base_class_descripto * .?AVClassInInnerNamespace@InnerNamespace@OuterNamespace@@ * => OuterNamespace::InnerNamespace::AVClassInInnerNamespace */ -R_API char *r_anal_rtti_msvc_demangle_class_name(const char *name) { +R_API char *r_anal_rtti_msvc_demangle_class_name(RVTableContext *context, const char *name) { if (!name) { return NULL; } @@ -387,27 +387,19 @@ R_API char *r_anal_rtti_msvc_demangle_class_name(const char *name) { || strncmp (name + original_len - 2, "@@", 2) != 0) { return NULL; } - char *ret = malloc ((original_len - 6) * 2 + 1); - if (!ret) { - return NULL; - } - char *c = ret; - const char *oc = name + original_len - 3; - size_t part_len = 0; - while (oc >= name + 4) { - if (*oc == '@') { - memcpy (c, oc + 1, part_len); - c += part_len; - *c++ = ':'; - *c++ = ':'; - part_len = 0; + char *ret = context->anal->binb.demangle (NULL, "msvc", name, 0, false); + if (ret && *ret) { + char *n = strchr (ret, ' '); + if (n && *(++n)) { + char *tmp = strdup (n); + free (ret); + ret = tmp; } else { - part_len++; + R_FREE (ret); } - oc--; + } else { + R_FREE (ret); } - memcpy (c, oc + 1, part_len); - c[part_len] = '\0'; return ret; } @@ -880,7 +872,7 @@ static const char *recovery_apply_complete_object_locator(RRTTIMSVCAnalContext * return existing; } - char *name = r_anal_rtti_msvc_demangle_class_name (col->td->td.name); + char *name = r_anal_rtti_msvc_demangle_class_name (context->vt_context, col->td->td.name); if (!name) { if (context->vt_context->anal->verbose) { eprintf ("Failed to demangle a class name: \"%s\"\n", col->td->td.name); @@ -921,7 +913,7 @@ static const char *recovery_apply_type_descriptor(RRTTIMSVCAnalContext *context, return existing; } - char *name = r_anal_rtti_msvc_demangle_class_name (td->td.name); + char *name = r_anal_rtti_msvc_demangle_class_name (context->vt_context, td->td.name); if (!name) { if (context->vt_context->anal->verbose) { eprintf("Failed to demangle a class name: \"%s\"\n", td->td.name); diff --git a/libr/bin/bin.c b/libr/bin/bin.c index 2c156e49c3..c07a131c1b 100644 --- a/libr/bin/bin.c +++ b/libr/bin/bin.c @@ -1166,6 +1166,7 @@ R_API void r_bin_bind(RBin *bin, RBinBind *b) { b->get_name = __getname; b->get_sections = r_bin_get_sections; b->get_vsect_at = __get_vsection_at; + b->demangle = r_bin_demangle; } } diff --git a/libr/bin/demangle.c b/libr/bin/demangle.c index 1e7ba389d3..70d118b00c 100644 --- a/libr/bin/demangle.c +++ b/libr/bin/demangle.c @@ -5,7 +5,7 @@ #include R_API void r_bin_demangle_list(RBin *bin) { - const char *langs[] = { "c++", "java", "objc", "swift", "dlang", "msvc", NULL }; + const char *langs[] = { "c++", "java", "objc", "swift", "dlang", "msvc", "rust", NULL }; RBinPlugin *plugin; RListIter *it; int i; @@ -27,7 +27,7 @@ R_API char *r_bin_demangle_plugin(RBin *bin, const char *name, const char *str) RListIter *it; if (bin && name && str) { r_list_foreach (bin->plugins, it, plugin) { - if (plugin->demangle) { + if (plugin->demangle && !strncmp (plugin->name, name, strlen (plugin->name))) { return plugin->demangle (str); } } diff --git a/libr/bin/mangling/demangler.c b/libr/bin/mangling/demangler.c index b83d60233f..e1013b63b9 100644 --- a/libr/bin/mangling/demangler.c +++ b/libr/bin/mangling/demangler.c @@ -24,6 +24,7 @@ static EManglingType get_mangling_type(char *sym) } switch (*sym) { + case '.': case '?': mangling_type = eManglingMicrosoft; break; diff --git a/libr/bin/mangling/microsoft_demangle.c b/libr/bin/mangling/microsoft_demangle.c index 27f882fa20..267bbf0b9a 100644 --- a/libr/bin/mangling/microsoft_demangle.c +++ b/libr/bin/mangling/microsoft_demangle.c @@ -105,9 +105,11 @@ static state_func const state_table[eTCStateMax] = { // State machine for parsing type codes functions /////////////////////////////////////////////////////////////////////////////// +static void init_state_struct(SStateInfo *state, char *buff_for_parsing); static EDemanglerErr get_type_code_string(char *sym, unsigned int *amount_of_read_chars, char **str_type_code); static int init_type_code_str_struct(STypeCodeStr *type_coder_str); static void free_type_code_str_struct(STypeCodeStr *type_code_str); +char *get_num(SStateInfo *state); static void run_state(SStateInfo *state_info, STypeCodeStr *type_code_str) { state_table[state_info->state](state_info, type_code_str); @@ -159,6 +161,126 @@ copy_string_err: return res; } +int get_template_params(char *sym, unsigned int *amount_of_read_chars, char **str_type_code) { + EDemanglerErr err = eDemanglerErrOK; + SStateInfo state; + init_state_struct (&state, sym); + const char template_param[] = "template-parameter-"; + char *tmp, *res = NULL; + if (!strncmp (sym, "?", 1)) { + // anonymous template param + state.amount_of_read_chars += 1; + state.buff_for_parsing += 1; + res = get_num (&state); + if (res) { + tmp = r_str_newf("%s%s", template_param, res); + free (res); + res = tmp; + } + } else { + if (strncmp (sym, "$", 1)) { + goto get_template_params_err; + } + sym++; + state.amount_of_read_chars += 2; + state.buff_for_parsing += 2; + if (!strncmp (sym, "0", 1)) { + // Signed integer + tmp = get_num (&state); + if (tmp) { + int signed_a = atoi (tmp); + res = r_str_newf ("%d", signed_a); + free (tmp); + } + } else if (!strncmp (sym, "2", 1)) { + // real value a ^ b + char *a = get_num (&state); + char *b = get_num (&state); + if (a && b) { + int signed_b = atoi (b); + res = r_str_newf ("%sE%d", a, signed_b); + } + free (a); + free (b); + } else if (!strncmp (sym, "D", 1)) { + // anonymous template param + res = get_num (&state); + if (res) { + tmp = r_str_newf("%s%s", template_param, res); + free (res); + res = tmp; + } + } else if (!strncmp (sym, "F", 1)) { + // Signed {a, b} + char *a = get_num (&state); + char *b = get_num (&state); + if (a && b) { + int signed_a = atoi (a); + int signed_b = atoi (b); + res = r_str_newf ("{%d, %d}", signed_a, signed_b); + } + free (a); + free (b); + } else if (!strncmp (sym, "G", 1)) { + // Signed {a, b, c} + char *a = get_num (&state); + char *b = get_num (&state); + char *c = get_num (&state); + if (a && b && c) { + int signed_a = atoi (a); + int signed_b = atoi (b); + int signed_c = atoi (c); + res = r_str_newf ("{%d, %d, %d}", signed_a, signed_b, signed_c); + } + free (a); + free (b); + free (c); + } else if (!strncmp (sym, "H", 1)) { + // Unsigned integer + res = get_num (&state); + } else if (!strncmp (sym, "I", 1)) { + // Unsigned {x, y} + char *a = get_num (&state); + char *b = get_num (&state); + if (a && b) { + res = r_str_newf ("{%s, %s}", a, b); + } + free (a); + free (b); + } else if (!strncmp (sym, "J", 1)) { + // Unsigned {x, y, z} + char *a = get_num (&state); + char *b = get_num (&state); + char *c = get_num (&state); + if (a && b && c) { + res = r_str_newf ("{%s, %s, %s}", a, b, c); + } + free (a); + free (b); + free (c); + } else if (!strncmp (sym, "Q", 1)) { + // anonymous non-type template parameter + res = get_num (&state); + if (res) { + tmp = r_str_newf("non-type-%s%s", template_param, res); + free (res); + res = tmp; + } + } + } + + if (!res) { + err = eDemanglerErrUnsupportedMangling; + goto get_template_params_err; + } + + *str_type_code = res; + *amount_of_read_chars = state.amount_of_read_chars; + +get_template_params_err: + return err; +} + /////////////////////////////////////////////////////////////////////////////// int get_template (char *buf, SStrInfo *str_info) { int len = 0; @@ -191,8 +313,10 @@ int get_template (char *buf, SStrInfo *str_info) { copy_string (&type_code_str, ", ", 0); } if (get_type_code_string (buf, &i, &str_type_code) != eDemanglerErrOK) { - len = 0; - goto get_template_err; + if (get_template_params (buf, &i, &str_type_code) != eDemanglerErrOK) { + len = 0; + goto get_template_err; + } } copy_string (&type_code_str, str_type_code, 0); buf += i; @@ -303,6 +427,7 @@ int get_namespace_and_name( char *buf, STypeCodeStr *type_code_str, i = get_template (buf + 1, str_info); if (!i) { R_FREE (str_info); + read_len--; goto get_namespace_and_name_err; } r_list_append (names_l, str_info); @@ -1421,6 +1546,37 @@ parse_microsoft_mangled_name_err: return err; } +static EDemanglerErr parse_microsoft_rtti_mangled_name(char *sym, char **demangled_name) { + EDemanglerErr err = eDemanglerErrOK; + char *type = NULL; + if (!strncmp (sym, "AT", 2)) { + type = "union"; + } else if (!strncmp (sym, "AU", 2)) { + type = "struct"; + } else if (!strncmp (sym, "AV", 2)) { + type = "class"; + } else if (!strncmp (sym, "AW", 2)) { + type = "enum"; + } else { + err = eDemanglerErrUncorrectMangledSymbol; + goto parse_microsoft_rtti_mangled_name_err; + } + int read = 0; + STypeCodeStr type_code_str; + init_type_code_str_struct (&type_code_str); + int len = get_namespace_and_name (sym + 2, &type_code_str, NULL); + if (!len) { + err = eDemanglerErrUncorrectMangledSymbol; + goto parse_microsoft_rtti_mangled_name_err; + } + + *demangled_name = r_str_newf ("%s %s", type, type_code_str.type_str); + free (type_code_str.type_str); + +parse_microsoft_rtti_mangled_name_err: + return err; +} + /////////////////////////////////////////////////////////////////////////////// EDemanglerErr microsoft_demangle(SDemangler *demangler, char **demangled_name) { EDemanglerErr err = eDemanglerErrOK; @@ -1435,7 +1591,12 @@ EDemanglerErr microsoft_demangle(SDemangler *demangler, char **demangled_name) { err = eDemanglerErrMemoryAllocation; goto microsoft_demangle_err; } - err = parse_microsoft_mangled_name(demangler->symbol + 1, demangled_name); + + if (!strncmp (demangler->symbol, ".?", 2)) { + err = parse_microsoft_rtti_mangled_name (demangler->symbol + 2, demangled_name); + } else { + err = parse_microsoft_mangled_name (demangler->symbol + 1, demangled_name); + } microsoft_demangle_err: r_list_free (abbr_names); diff --git a/libr/include/r_anal.h b/libr/include/r_anal.h index 74998b97b0..621319b3c4 100644 --- a/libr/include/r_anal.h +++ b/libr/include/r_anal.h @@ -1859,7 +1859,7 @@ R_API RList *r_anal_vtable_search(RVTableContext *context); R_API void r_anal_list_vtables(RAnal *anal, int rad); /* rtti */ -R_API char *r_anal_rtti_msvc_demangle_class_name(const char *name); +R_API char *r_anal_rtti_msvc_demangle_class_name(RVTableContext *context, const char *name); R_API void r_anal_rtti_msvc_print_complete_object_locator(RVTableContext *context, ut64 addr, int mode); R_API void r_anal_rtti_msvc_print_type_descriptor(RVTableContext *context, ut64 addr, int mode); R_API void r_anal_rtti_msvc_print_class_hierarchy_descriptor(RVTableContext *context, ut64 addr, int mode); diff --git a/libr/include/r_bin.h b/libr/include/r_bin.h index 76bf94b7d3..c8f5a0fd96 100644 --- a/libr/include/r_bin.h +++ b/libr/include/r_bin.h @@ -617,6 +617,7 @@ typedef int (*RBinGetOffset)(RBin *bin, int type, int idx); typedef const char *(*RBinGetName)(RBin *bin, int type, int idx, bool sd); typedef RList *(*RBinGetSections)(RBin *bin); typedef RBinSection *(*RBinGetSectionAt)(RBin *bin, ut64 addr); +typedef char *(*RBinDemangle)(RBinFile *bf, const char *def, const char *str, ut64 vaddr, bool libs); typedef struct r_bin_bind_t { RBin *bin; @@ -624,6 +625,7 @@ typedef struct r_bin_bind_t { RBinGetName get_name; RBinGetSections get_sections; RBinGetSectionAt get_vsect_at; + RBinDemangle demangle; ut32 visibility; } RBinBind;