mirror of
https://github.com/radareorg/radare2.git
synced 2025-01-26 07:44:29 +00:00
Support UTF32-BE decoding (#15472) ##bin
This commit is contained in:
parent
ac84c4ee2f
commit
c4e80f8c6d
@ -3122,7 +3122,7 @@ R_API int r_core_config_init(RCore *core) {
|
||||
SETICB ("bin.maxstrbuf", 1024*1024*10, & cb_binmaxstrbuf, "Maximum size of range to load strings from");
|
||||
n = NODECB ("bin.str.enc", "guess", &cb_binstrenc);
|
||||
SETDESC (n, "Default string encoding of binary");
|
||||
SETOPTIONS (n, "latin1", "utf8", "utf16le", "utf32le", "utf16be", "guess", NULL);
|
||||
SETOPTIONS (n, "latin1", "utf8", "utf16le", "utf32le", "utf16be", "utf32be", "guess", NULL);
|
||||
SETCB ("bin.prefix", NULL, &cb_binprefix, "Prefix all symbols/sections/relocs with a specific string");
|
||||
SETCB ("bin.rawstr", "false", &cb_rawstr, "Load strings from raw binaries");
|
||||
SETCB ("bin.strings", "true", &cb_binstrings, "Load strings from rbin on startup");
|
||||
|
@ -746,6 +746,8 @@ static RDisasmState * ds_init(RCore *core) {
|
||||
ds->strenc = R_STRING_ENC_UTF32LE;
|
||||
} else if (!strcmp (strenc_str, "utf16be")) {
|
||||
ds->strenc = R_STRING_ENC_UTF16BE;
|
||||
} else if (!strcmp (strenc_str, "utf32be")) {
|
||||
ds->strenc = R_STRING_ENC_UTF32BE;
|
||||
} else {
|
||||
ds->strenc = R_STRING_ENC_GUESS;
|
||||
}
|
||||
@ -3720,6 +3722,10 @@ static char *ds_esc_str(RDisasmState *ds, const char *str, int len, const char *
|
||||
escstr = r_str_escape_utf16be (str, len, ds->show_asciidot, esc_bslash);
|
||||
prefix = "ub";
|
||||
break;
|
||||
case R_STRING_ENC_UTF32BE:
|
||||
escstr = r_str_escape_utf32be (str, len, ds->show_asciidot, esc_bslash);
|
||||
prefix = "Ub";
|
||||
break;
|
||||
default:
|
||||
str_len = strlen (str);
|
||||
if ((str_len == 1 && len > 3 && str[2] && !str[3])
|
||||
|
@ -15,6 +15,7 @@ typedef enum {
|
||||
R_STRING_ENC_UTF16LE = 'u',
|
||||
R_STRING_ENC_UTF32LE = 'U',
|
||||
R_STRING_ENC_UTF16BE = 'b',
|
||||
R_STRING_ENC_UTF32BE = 'B',
|
||||
R_STRING_ENC_GUESS = 'g',
|
||||
} RStrEnc;
|
||||
|
||||
@ -155,6 +156,7 @@ R_API char *r_str_escape_utf8(const char *buf, bool show_asciidot, bool esc_bsla
|
||||
R_API char *r_str_escape_utf16le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
|
||||
R_API char *r_str_escape_utf32le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
|
||||
R_API char *r_str_escape_utf16be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
|
||||
R_API char *r_str_escape_utf32be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
|
||||
R_API void r_str_byte_escape(const char *p, char **dst, int dot_nl, bool default_dot, bool esc_bslash);
|
||||
R_API void r_str_uri_decode(char *buf);
|
||||
R_API char *r_str_uri_encode(const char *buf);
|
||||
|
@ -4,6 +4,8 @@
|
||||
/* For RRune definition */
|
||||
#include "r_utf8.h"
|
||||
|
||||
R_API int r_utf32_decode(const ut8 *ptr, int ptrlen, RRune *ch, bool bigendian);
|
||||
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch);
|
||||
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch);
|
||||
|
||||
#endif // R_UTF32_H
|
||||
|
@ -1349,6 +1349,7 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
|
||||
case R_STRING_ENC_UTF16LE:
|
||||
case R_STRING_ENC_UTF16BE:
|
||||
case R_STRING_ENC_UTF32LE:
|
||||
case R_STRING_ENC_UTF32BE:
|
||||
if (buf_size < 0) {
|
||||
return NULL;
|
||||
}
|
||||
@ -1378,10 +1379,11 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
|
||||
case R_STRING_ENC_UTF16LE:
|
||||
case R_STRING_ENC_UTF16BE:
|
||||
case R_STRING_ENC_UTF32LE:
|
||||
case R_STRING_ENC_UTF32BE:
|
||||
if (enc == R_STRING_ENC_UTF16LE || enc == R_STRING_ENC_UTF16BE) {
|
||||
ch_bytes = r_utf16_decode ((ut8 *)p, end - p, &ch, enc == R_STRING_ENC_UTF16BE);
|
||||
} else {
|
||||
ch_bytes = r_utf32le_decode ((ut8 *)p, end - p, &ch);
|
||||
ch_bytes = r_utf32_decode ((ut8 *)p, end - p, &ch, enc == R_STRING_ENC_UTF32BE);
|
||||
}
|
||||
if (ch_bytes == 0) {
|
||||
p++;
|
||||
@ -1404,7 +1406,8 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
|
||||
*q++ = "0123456789abcdef"[ch >> 4 * i & 0xf];
|
||||
}
|
||||
} else {
|
||||
r_str_byte_escape (p + (enc == R_STRING_ENC_UTF16BE), &q, false, false, esc_bslash);
|
||||
int offset = enc == R_STRING_ENC_UTF16BE ? 1 : enc == R_STRING_ENC_UTF32BE ? 3 : 0;
|
||||
r_str_byte_escape (p + offset, &q, false, false, esc_bslash);
|
||||
}
|
||||
switch (enc) {
|
||||
case R_STRING_ENC_UTF16LE:
|
||||
@ -1412,6 +1415,7 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
|
||||
p += ch_bytes < 2 ? 2 : ch_bytes;
|
||||
break;
|
||||
case R_STRING_ENC_UTF32LE:
|
||||
case R_STRING_ENC_UTF32BE:
|
||||
p += 4;
|
||||
break;
|
||||
default:
|
||||
@ -1438,6 +1442,10 @@ R_API char *r_str_escape_utf16be(const char *buf, int buf_size, bool show_asciid
|
||||
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF16BE, show_asciidot, esc_bslash);
|
||||
}
|
||||
|
||||
R_API char *r_str_escape_utf32be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash) {
|
||||
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF32BE, show_asciidot, esc_bslash);
|
||||
}
|
||||
|
||||
// JSON has special escaping requirements
|
||||
// TODO: merge with r_str_escape_utf() and r_str_byte_escape() using RStrEsc
|
||||
R_API char *r_str_escape_utf8_for_json(const char *buf, int buf_size) {
|
||||
|
@ -3,34 +3,43 @@
|
||||
#include <r_types.h>
|
||||
#include <r_util.h>
|
||||
|
||||
/* Convert an UTF-32LE buf into a unicode RRune */
|
||||
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch) {
|
||||
/* Convert an UTF-32 buf into a unicode RRune */
|
||||
R_API int r_utf32_decode(const ut8 *ptr, int ptrlen, RRune *ch, bool bigendian) {
|
||||
if (ptrlen < 1) {
|
||||
return 0;
|
||||
}
|
||||
int low = 0;
|
||||
int high = 3;
|
||||
if (bigendian) {
|
||||
low = 3;
|
||||
high = 0;
|
||||
}
|
||||
if (ptrlen > 3) {
|
||||
if (ptr[3]) {
|
||||
if (ch) {
|
||||
*ch = (ut32)ptr[3] << 24 | (ut32)ptr[2] << 16 | (ut32)ptr[1] << 8 | ptr[0];
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
if (ptr[2]) {
|
||||
if (ch) {
|
||||
*ch = (ut32)ptr[2] << 16 | (ut32)ptr[1] << 8 | ptr[0];
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
if (ptr[1]) {
|
||||
if (ch) {
|
||||
*ch = (ut32)ptr[1] << 8 | ptr[0];
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
int sign = bigendian ? -1 : 1;
|
||||
if (ch) {
|
||||
*ch = (ut32)ptr[0];
|
||||
int i;
|
||||
*ch = (ut32)ptr[low];
|
||||
for (i = 1; i < 4; i++) {
|
||||
*ch |= (ut32)ptr[3 - high + i * sign] << 8 * i;
|
||||
}
|
||||
}
|
||||
if (ptr[high] || ptr[high - 1 * sign]) {
|
||||
return 4;
|
||||
}
|
||||
if (ptr[low + 1 * sign]) {
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Convert an UTF-32LE buf into a unicode RRune */
|
||||
R_API int r_utf32le_decode(const ut8 *ptr, int ptrlen, RRune *ch) {
|
||||
return r_utf32_decode (ptr, ptrlen, ch, false);
|
||||
}
|
||||
|
||||
/* Convert an UTF-32BE buf into a unicode RRune */
|
||||
R_API int r_utf32be_decode(const ut8 *ptr, int ptrlen, RRune *ch) {
|
||||
return r_utf32_decode (ptr, ptrlen, ch, true);
|
||||
}
|
||||
|
@ -791,7 +791,9 @@ R_API RStrEnc r_utf_bom_encoding(const ut8 *ptr, int ptrlen) {
|
||||
if (ptr[0] == 0xff && ptr[1] == 0xfe && !ptr[2] && !ptr[3]) {
|
||||
return R_STRING_ENC_UTF32LE;
|
||||
}
|
||||
/* TODO: R_STRING_ENC_UTF32BE */
|
||||
if (!ptr[0] && !ptr[1] && ptr[2] == 0xfe && ptr[3] == 0xff) {
|
||||
return R_STRING_ENC_UTF32BE;
|
||||
}
|
||||
}
|
||||
if (ptrlen > 2) {
|
||||
if (ptr[0] == 0xef && ptr[1] == 0xbb && ptr[2] == 0xbf) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user