asm.strenc: guess whether string is utf32le (#8127)

* asm.strenc: guess whether string is utf32le

* Don't guess utf32le if there is char > 0x10ffff
This commit is contained in:
Khairul Azhar Kasmiran 2017-08-06 09:08:17 +08:00 committed by radare
parent e82a40ac78
commit 19fa6abbb8
2 changed files with 21 additions and 1 deletions

View File

@ -703,6 +703,7 @@ static int cb_asmstrenc (void *user, void *data) {
if (node->value[0] == '?') {
print_node_options (node);
r_cons_printf (" -- if string's 2nd & 4th bytes are 0 then utf16le else "
"if 2nd - 4th & 6th bytes are 0 & no char > 0x10ffff then utf32le else "
"if utf8 char detected then utf8 else latin1\n");
return false;
}

View File

@ -2679,9 +2679,28 @@ static void ds_print_str(RDisasmState *ds, const char *str, int len) {
default:
str_len = strlen (str);
if (str_len == 1 && len > 3 && str[2] && !str[3]) {
// could be a utf16le string
escstr = r_str_escape_utf16le (str, len, ds->show_asciidot);
prefix = "u";
} else if (str_len == 1 && len > 7 && !str[2] && !str[3] && str[4] && !str[5]) {
RStrEnc enc = R_STRING_ENC_UTF32LE;
RRune ch;
const char *ptr, *end;
end = (const char *)r_mem_mem_aligned ((ut8 *)str, len, (ut8 *)"\0\0\0\0", 4, 4);
if (!end) {
end = str + len - 1;
}
for (ptr = str; ptr < end; ptr += 4) {
if (r_utf32le_decode ((ut8 *)ptr, end - ptr, &ch) > 0 && ch > 0x10ffff) {
enc = R_STRING_ENC_LATIN1;
break;
}
}
if (enc == R_STRING_ENC_UTF32LE) {
escstr = r_str_escape_utf32le (str, len, ds->show_asciidot);
prefix = "U";
} else {
escstr = r_str_escape_latin1 (str, ds->show_asciidot);
}
} else {
RStrEnc enc = R_STRING_ENC_LATIN1;
const char *ptr = str, *end = str + str_len;