Support UTF32-BE decoding (#15472) ##bin

This commit is contained in:
Khairul Azhar Kasmiran 2019-11-16 18:27:07 +08:00 committed by radare
parent ac84c4ee2f
commit c4e80f8c6d
7 changed files with 54 additions and 25 deletions

View File

@ -3122,7 +3122,7 @@ R_API int r_core_config_init(RCore *core) {
SETICB ("bin.maxstrbuf", 1024*1024*10, & cb_binmaxstrbuf, "Maximum size of range to load strings from");
n = NODECB ("bin.str.enc", "guess", &cb_binstrenc);
SETDESC (n, "Default string encoding of binary");
SETOPTIONS (n, "latin1", "utf8", "utf16le", "utf32le", "utf16be", "guess", NULL);
SETOPTIONS (n, "latin1", "utf8", "utf16le", "utf32le", "utf16be", "utf32be", "guess", NULL);
SETCB ("bin.prefix", NULL, &cb_binprefix, "Prefix all symbols/sections/relocs with a specific string");
SETCB ("bin.rawstr", "false", &cb_rawstr, "Load strings from raw binaries");
SETCB ("bin.strings", "true", &cb_binstrings, "Load strings from rbin on startup");

View File

@ -746,6 +746,8 @@ static RDisasmState * ds_init(RCore *core) {
ds->strenc = R_STRING_ENC_UTF32LE;
} else if (!strcmp (strenc_str, "utf16be")) {
ds->strenc = R_STRING_ENC_UTF16BE;
} else if (!strcmp (strenc_str, "utf32be")) {
ds->strenc = R_STRING_ENC_UTF32BE;
} else {
ds->strenc = R_STRING_ENC_GUESS;
}
@ -3720,6 +3722,10 @@ static char *ds_esc_str(RDisasmState *ds, const char *str, int len, const char *
escstr = r_str_escape_utf16be (str, len, ds->show_asciidot, esc_bslash);
prefix = "ub";
break;
case R_STRING_ENC_UTF32BE:
escstr = r_str_escape_utf32be (str, len, ds->show_asciidot, esc_bslash);
prefix = "Ub";
break;
default:
str_len = strlen (str);
if ((str_len == 1 && len > 3 && str[2] && !str[3])

View File

@ -15,6 +15,7 @@ typedef enum {
R_STRING_ENC_UTF16LE = 'u',
R_STRING_ENC_UTF32LE = 'U',
R_STRING_ENC_UTF16BE = 'b',
R_STRING_ENC_UTF32BE = 'B',
R_STRING_ENC_GUESS = 'g',
} RStrEnc;
@ -155,6 +156,7 @@ R_API char *r_str_escape_utf8(const char *buf, bool show_asciidot, bool esc_bsla
R_API char *r_str_escape_utf16le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
R_API char *r_str_escape_utf32le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
R_API char *r_str_escape_utf16be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
R_API char *r_str_escape_utf32be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
R_API void r_str_byte_escape(const char *p, char **dst, int dot_nl, bool default_dot, bool esc_bslash);
R_API void r_str_uri_decode(char *buf);
R_API char *r_str_uri_encode(const char *buf);

View File

@ -4,6 +4,8 @@
/* For RRune definition */
#include "r_utf8.h"
R_API int r_utf32_decode(const ut8 *ptr, int ptrlen, RRune *ch, bool bigendian);
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch);
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch);
#endif // R_UTF32_H

View File

@ -1349,6 +1349,7 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
case R_STRING_ENC_UTF16LE:
case R_STRING_ENC_UTF16BE:
case R_STRING_ENC_UTF32LE:
case R_STRING_ENC_UTF32BE:
if (buf_size < 0) {
return NULL;
}
@ -1378,10 +1379,11 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
case R_STRING_ENC_UTF16LE:
case R_STRING_ENC_UTF16BE:
case R_STRING_ENC_UTF32LE:
case R_STRING_ENC_UTF32BE:
if (enc == R_STRING_ENC_UTF16LE || enc == R_STRING_ENC_UTF16BE) {
ch_bytes = r_utf16_decode ((ut8 *)p, end - p, &ch, enc == R_STRING_ENC_UTF16BE);
} else {
ch_bytes = r_utf32le_decode ((ut8 *)p, end - p, &ch);
ch_bytes = r_utf32_decode ((ut8 *)p, end - p, &ch, enc == R_STRING_ENC_UTF32BE);
}
if (ch_bytes == 0) {
p++;
@ -1404,7 +1406,8 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
*q++ = "0123456789abcdef"[ch >> 4 * i & 0xf];
}
} else {
r_str_byte_escape (p + (enc == R_STRING_ENC_UTF16BE), &q, false, false, esc_bslash);
int offset = enc == R_STRING_ENC_UTF16BE ? 1 : enc == R_STRING_ENC_UTF32BE ? 3 : 0;
r_str_byte_escape (p + offset, &q, false, false, esc_bslash);
}
switch (enc) {
case R_STRING_ENC_UTF16LE:
@ -1412,6 +1415,7 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
p += ch_bytes < 2 ? 2 : ch_bytes;
break;
case R_STRING_ENC_UTF32LE:
case R_STRING_ENC_UTF32BE:
p += 4;
break;
default:
@ -1438,6 +1442,10 @@ R_API char *r_str_escape_utf16be(const char *buf, int buf_size, bool show_asciid
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF16BE, show_asciidot, esc_bslash);
}
R_API char *r_str_escape_utf32be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash) {
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF32BE, show_asciidot, esc_bslash);
}
// JSON has special escaping requirements
// TODO: merge with r_str_escape_utf() and r_str_byte_escape() using RStrEsc
R_API char *r_str_escape_utf8_for_json(const char *buf, int buf_size) {

View File

@ -3,34 +3,43 @@
#include <r_types.h>
#include <r_util.h>
/* Convert an UTF-32LE buf into a unicode RRune */
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch) {
/* Convert an UTF-32 buf into a unicode RRune */
R_API int r_utf32_decode(const ut8 *ptr, int ptrlen, RRune *ch, bool bigendian) {
if (ptrlen < 1) {
return 0;
}
int low = 0;
int high = 3;
if (bigendian) {
low = 3;
high = 0;
}
if (ptrlen > 3) {
if (ptr[3]) {
if (ch) {
*ch = (ut32)ptr[3] << 24 | (ut32)ptr[2] << 16 | (ut32)ptr[1] << 8 | ptr[0];
}
return 4;
}
if (ptr[2]) {
if (ch) {
*ch = (ut32)ptr[2] << 16 | (ut32)ptr[1] << 8 | ptr[0];
}
return 4;
}
if (ptr[1]) {
if (ch) {
*ch = (ut32)ptr[1] << 8 | ptr[0];
}
return 2;
}
int sign = bigendian ? -1 : 1;
if (ch) {
*ch = (ut32)ptr[0];
int i;
*ch = (ut32)ptr[low];
for (i = 1; i < 4; i++) {
*ch |= (ut32)ptr[3 - high + i * sign] << 8 * i;
}
}
if (ptr[high] || ptr[high - 1 * sign]) {
return 4;
}
if (ptr[low + 1 * sign]) {
return 2;
}
return 1;
}
return 0;
}
/* Convert an UTF-32LE buf into a unicode RRune */
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch) {
return r_utf32_decode (ptr, ptrlen, ch, false);
}
/* Convert an UTF-32BE buf into a unicode RRune */
R_API int r_utf32be_decode(const ut8 *ptr, int ptrlen, RRune *ch) {
return r_utf32_decode (ptr, ptrlen, ch, true);
}

View File

@ -791,7 +791,9 @@ R_API RStrEnc r_utf_bom_encoding(const ut8 *ptr, int ptrlen) {
if (ptr[0] == 0xff && ptr[1] == 0xfe && !ptr[2] && !ptr[3]) {
return R_STRING_ENC_UTF32LE;
}
/* TODO: R_STRING_ENC_UTF32BE */
if (!ptr[0] && !ptr[1] && ptr[2] == 0xfe && ptr[3] == 0xff) {
return R_STRING_ENC_UTF32BE;
}
}
if (ptrlen > 2) {
if (ptr[0] == 0xef && ptr[1] == 0xbb && ptr[2] == 0xbf) {