mirror of
https://github.com/radareorg/radare2.git
synced 2024-12-01 00:51:19 +00:00
asm.strenc: guess whether string is utf32le (#8127)
* asm.strenc: guess whether string is utf32le * Don't guess utf32le if there is char > 0x10ffff
This commit is contained in:
parent
e82a40ac78
commit
19fa6abbb8
@ -703,6 +703,7 @@ static int cb_asmstrenc (void *user, void *data) {
|
||||
if (node->value[0] == '?') {
|
||||
print_node_options (node);
|
||||
r_cons_printf (" -- if string's 2nd & 4th bytes are 0 then utf16le else "
|
||||
"if 2nd - 4th & 6th bytes are 0 & no char > 0x10ffff then utf32le else "
|
||||
"if utf8 char detected then utf8 else latin1\n");
|
||||
return false;
|
||||
}
|
||||
|
@ -2679,9 +2679,28 @@ static void ds_print_str(RDisasmState *ds, const char *str, int len) {
|
||||
default:
|
||||
str_len = strlen (str);
|
||||
if (str_len == 1 && len > 3 && str[2] && !str[3]) {
|
||||
// could be a utf16le string
|
||||
escstr = r_str_escape_utf16le (str, len, ds->show_asciidot);
|
||||
prefix = "u";
|
||||
} else if (str_len == 1 && len > 7 && !str[2] && !str[3] && str[4] && !str[5]) {
|
||||
RStrEnc enc = R_STRING_ENC_UTF32LE;
|
||||
RRune ch;
|
||||
const char *ptr, *end;
|
||||
end = (const char *)r_mem_mem_aligned ((ut8 *)str, len, (ut8 *)"\0\0\0\0", 4, 4);
|
||||
if (!end) {
|
||||
end = str + len - 1;
|
||||
}
|
||||
for (ptr = str; ptr < end; ptr += 4) {
|
||||
if (r_utf32le_decode ((ut8 *)ptr, end - ptr, &ch) > 0 && ch > 0x10ffff) {
|
||||
enc = R_STRING_ENC_LATIN1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (enc == R_STRING_ENC_UTF32LE) {
|
||||
escstr = r_str_escape_utf32le (str, len, ds->show_asciidot);
|
||||
prefix = "U";
|
||||
} else {
|
||||
escstr = r_str_escape_latin1 (str, ds->show_asciidot);
|
||||
}
|
||||
} else {
|
||||
RStrEnc enc = R_STRING_ENC_LATIN1;
|
||||
const char *ptr = str, *end = str + str_len;
|
||||
|
Loading…
Reference in New Issue
Block a user