Fix rafind2 -S for UTF8 chars > U+00ff (#12866)

* Fix rafind2 -S for UTF8 chars > U+00ff

* Use '< 1' instead of '== 0'

* Remove unnecessary else

* Add a 0
This commit is contained in:
Khairul Azhar Kasmiran 2019-01-22 16:43:10 +08:00 committed by radare
parent b9f4e09b31
commit 01d5dfe306
4 changed files with 40 additions and 7 deletions

View File

@ -97,7 +97,7 @@ static int show_help(char *argv0, int line) {
" -n do not stop on read errors\n"
" -r print using radare commands\n"
" -s [str] search for a specific string (can be used multiple times)\n"
" -S [str] search for a specific wide string (can be used multiple times)\n"
" -S [str] search for a specific wide string (can be used multiple times). Assumes str is UTF-8.\n"
" -t [to] stop search at address 'to'\n"
" -q quiet - do not show headings (filenames) above matching contents (default for searching a single file)\n"
" -v print version and exit\n"

View File

@ -5,5 +5,6 @@
#include "r_utf8.h"
R_API int r_utf16le_decode(const ut8 *ptr, int ptrlen, RRune *ch);
R_API int r_utf16le_encode(ut8 *ptr, RRune ch);
#endif // R_UTF16_H

View File

@ -99,16 +99,27 @@ R_API RSearchKeyword* r_search_keyword_new_wide(const char *kwbuf, const char *b
len = strlen(kwbuf);
str = malloc((len+1)*2);
for (p2=kwbuf, p=str; *p2; p+=2, p2++) {
if (ignore_case) {
p[0] = tolower((const unsigned char)*p2);
} else {
for (p2=kwbuf, p=str; *p2; ) {
RRune ch;
int num_utf8_bytes = r_utf8_decode ((const ut8 *)p2, kwbuf + len - p2, &ch);
if (num_utf8_bytes < 1) {
eprintf ("WARNING: Malformed UTF8 at pos %td\n", p2 - kwbuf);
p[0] = *p2;
p[1] = 0;
p2++;
p += 2;
continue;
}
p[1] = 0;
if (ignore_case && ch <= 0xff) {
ch = tolower (ch);
}
int num_wide_bytes = r_utf16le_encode ((ut8 *)p, ch);
r_warn_if_fail (num_wide_bytes != 0);
p2 += num_utf8_bytes;
p += num_wide_bytes;
}
kw = r_search_keyword_new ((ut8 *)str, len*2, bmbuf, bmlen, data);
kw = r_search_keyword_new ((ut8 *)str, p - str, bmbuf, bmlen, data);
free(str);
if (kw) {
kw->icase = ignore_case;

View File

@ -28,3 +28,24 @@ R_API int r_utf16le_decode(const ut8 *ptr, int ptrlen, RRune *ch) {
}
return 0;
}
/* Convert a unicode RRune into a UTF-16LE buf */
R_API int r_utf16le_encode(ut8 *ptr, RRune ch) {
if (ch < 0x10000) {
ptr[0] = ch & 0xff;
ptr[1] = ch >> 8 & 0xff;
return 2;
}
if (ch < 0x110000) {
RRune high, low;
ch -= 0x10000;
high = 0xd800 + (ch >> 10 & 0x3ff);
low = 0xdc00 + (ch & 0x3ff);
ptr[0] = high & 0xff;
ptr[1] = high >> 8 & 0xff;
ptr[2] = low & 0xff;
ptr[3] = low >> 8 & 0xff;
return 4;
}
return 0;
}