Escape broken UTF-8 in Filename (#16351)

This commit is contained in:
Florian Märkl 2020-03-29 21:00:17 +02:00 committed by GitHub
parent 2c7ecc9736
commit b9538189cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 29 additions and 16 deletions

View File

@ -289,7 +289,11 @@ static void r_core_file_info(RCore *core, int mode) {
pair ("fd", sdb_fmt ("%d", desc->fd));
}
if (fn || (desc && desc->uri)) {
pair ("file", fn? fn: desc->uri);
char *escaped = r_str_escape_utf8_keep_printable (fn? fn: desc->uri, false, false);
if (escaped) {
pair ("file", escaped);
free (escaped);
}
}
if (desc) {
ut64 fsz = r_io_desc_size (desc);

View File

@ -149,6 +149,7 @@ R_API char *r_str_escape(const char *buf);
R_API char *r_str_escape_dot(const char *buf);
R_API char *r_str_escape_latin1(const char *buf, bool show_asciidot, bool esc_bslash, bool colors);
R_API char *r_str_escape_utf8(const char *buf, bool show_asciidot, bool esc_bslash);
R_API char *r_str_escape_utf8_keep_printable(const char *buf, bool show_asciidot, bool esc_bslash); // like escape_utf8 but leaves valid \uXXXX chars directly in utf-8
R_API char *r_str_escape_utf16le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
R_API char *r_str_escape_utf32le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);
R_API char *r_str_escape_utf16be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash);

View File

@ -1351,7 +1351,7 @@ R_API char *r_str_escape_latin1(const char *buf, bool show_asciidot, bool esc_bs
return r_str_escape_ (buf, false, colors, !colors, show_asciidot, esc_bslash);
}
static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool show_asciidot, bool esc_bslash) {
static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool show_asciidot, bool esc_bslash, bool keep_printable) {
char *new_buf, *q;
const char *p, *end;
RRune ch;
@ -1414,11 +1414,15 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
if (show_asciidot && !IS_PRINTABLE(ch)) {
*q++ = '.';
} else if (ch_bytes > 1) {
*q++ = '\\';
*q++ = ch_bytes == 4 ? 'U' : 'u';
for (i = ch_bytes == 4 ? 6 : 2; i >= 0; i -= 2) {
*q++ = "0123456789abcdef"[ch >> 4 * (i + 1) & 0xf];
*q++ = "0123456789abcdef"[ch >> 4 * i & 0xf];
if (keep_printable) {
q += r_utf8_encode ((ut8 *)q, ch);
} else {
*q++ = '\\';
*q++ = ch_bytes == 4 ? 'U' : 'u';
for (i = ch_bytes == 4 ? 6 : 2; i >= 0; i -= 2) {
*q++ = "0123456789abcdef"[ch >> 4 * (i + 1) & 0xf];
*q++ = "0123456789abcdef"[ch >> 4 * i & 0xf];
}
}
} else {
int offset = enc == R_STRING_ENC_UTF16BE ? 1 : enc == R_STRING_ENC_UTF32BE ? 3 : 0;
@ -1442,23 +1446,27 @@ static char *r_str_escape_utf(const char *buf, int buf_size, RStrEnc enc, bool s
}
R_API char *r_str_escape_utf8(const char *buf, bool show_asciidot, bool esc_bslash) {
return r_str_escape_utf (buf, -1, R_STRING_ENC_UTF8, show_asciidot, esc_bslash);
return r_str_escape_utf (buf, -1, R_STRING_ENC_UTF8, show_asciidot, esc_bslash, false);
}
R_API char *r_str_escape_utf8_keep_printable(const char *buf, bool show_asciidot, bool esc_bslash) {
return r_str_escape_utf (buf, -1, R_STRING_ENC_UTF8, show_asciidot, esc_bslash, true);
}
R_API char *r_str_escape_utf16le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash) {
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF16LE, show_asciidot, esc_bslash);
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF16LE, show_asciidot, esc_bslash, false);
}
R_API char *r_str_escape_utf32le(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash) {
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF32LE, show_asciidot, esc_bslash);
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF32LE, show_asciidot, esc_bslash, false);
}
R_API char *r_str_escape_utf16be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash) {
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF16BE, show_asciidot, esc_bslash);
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF16BE, show_asciidot, esc_bslash, false);
}
R_API char *r_str_escape_utf32be(const char *buf, int buf_size, bool show_asciidot, bool esc_bslash) {
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF32BE, show_asciidot, esc_bslash);
return r_str_escape_utf (buf, buf_size, R_STRING_ENC_UTF32BE, show_asciidot, esc_bslash, false);
}
// JSON has special escaping requirements

View File

@ -10,9 +10,9 @@ e io.cache=true
# INVALID FILENAME .(show_fname B\x1b¢\302\200\200€𝄞\363\240\201\201\\.bin)
EOF
EXPECT=<<EOF
file A<EFBFBD>€€𝄞󠁁\.bin
file A\x1b¢€€𝄞󠁁\.bin
{"core":{"type":"","file":"A\u001b¢\u0080€𝄞\udb40\udc41\\.bin","fd":4,"size":256,"humansz":"256","iorw":true,"mode":"r-x","obsz":0,"block":256,"format":"any"}}
file B<EFBFBD>€€𝄞󠁁\.bin
file B\x1b¢€€𝄞󠁁\.bin
{"core":{"type":"","file":"B\u001b¢\u0080€𝄞\udb40\udc41\\.bin","fd":5,"size":256,"humansz":"256","iorw":true,"mode":"r-x","obsz":0,"block":256,"format":"any"}}
EOF
RUN

View File

@ -10,9 +10,9 @@ e io.cache=true
# INVALID FILENAME .(show_fname B\x1b¢\302\200\200€𝄞\363\240\201\201\\.bin)
EOF
EXPECT=<<EOF
file A<EFBFBD>€€𝄞󠁁\.bin
file A\x1b¢€€𝄞󠁁\.bin
{"core":{"type":"","file":"A\u001b¢\u0080€𝄞\udb40\udc41\\.bin","fd":4,"size":256,"humansz":"256","iorw":true,"mode":"r-x","obsz":0,"block":256,"format":"any"}}
file B<EFBFBD>€€𝄞󠁁\.bin
file B\x1b¢€€𝄞󠁁\.bin
{"core":{"type":"","file":"B\u001b¢\u0080€𝄞\udb40\udc41\\.bin","fd":5,"size":256,"humansz":"256","iorw":true,"mode":"r-x","obsz":0,"block":256,"format":"any"}}
EOF
RUN