simplify the search engine

This commit is contained in:
LemonBoy 2014-06-03 22:23:08 +02:00 committed by pancake
parent 315e8866e1
commit 9a9c5e750a
5 changed files with 142 additions and 128 deletions

View File

@ -557,9 +557,6 @@ static int cmd_search(void *data, const char *input) {
ignorecase = R_TRUE;
case ' ': /* search string */
inp = strdup (input+1+ignorecase);
if (ignorecase)
for (i=1; inp[i]; i++)
inp[i] = tolower (inp[i]);
len = r_str_unescape (inp);
eprintf ("Searching %d bytes from 0x%08"PFMT64x" to 0x%08"PFMT64x": ", len, from, to);
for (i=0; i<len; i++) eprintf ("%02x ", (ut8)inp[i]);

View File

@ -29,8 +29,6 @@ enum {
#define R_SEARCH_KEYWORD_TYPE_STRING 's'
typedef struct r_search_keyword_t {
char keyword[128];
char binmask[128];
ut8 bin_keyword[128];
ut8 bin_binmask[128];
ut32 keyword_length;
@ -101,6 +99,8 @@ R_API int r_search_range_set(RSearch *s, ut64 from, ut64 to);
R_API int r_search_range_reset(RSearch *s);
R_API int r_search_set_blocksize(RSearch *s, ut32 bsize);
R_API int r_search_bmh(const RSearchKeyword *kw, const ut64 from, const ut8 *buf, const int len, ut64 *out);
// TODO: is this an internal API?
R_API int r_search_mybinparse_update(void *s, ut64 from, const ut8 *buf, int len);
R_API int r_search_aes_update(void *s, ut64 from, const ut8 *buf, int len);

View File

@ -2,71 +2,91 @@
#include <r_search.h>
R_API RSearchKeyword* r_search_keyword_new(const ut8 *kw, int kwlen, const ut8 *bm, int bmlen, const char *data) {
RSearchKeyword *k;
if (kwlen<1 || bmlen<0 || (kwlen >=sizeof (k->keyword)) || (bmlen >= sizeof (k->binmask)))
R_API RSearchKeyword* r_search_keyword_new(const ut8 *kwbuf, int kwlen, const ut8 *bmbuf, int bmlen, const char *data) {
RSearchKeyword *kw;
if (!kw || kwlen < 1 || bmlen < 0)
return NULL;
if (bm == NULL)
bm = (const ut8*) "";
if ((k = R_NEW (RSearchKeyword))) {
k->type = R_SEARCH_KEYWORD_TYPE_BINARY;
k->icase = 0;
memcpy (k->keyword, kw, kwlen);
k->keyword_length = kwlen;
memcpy (k->bin_keyword, kw, kwlen);
if (bm && bmlen>0) {
//memcpy (k->binmask, bm, bmlen);
// XXX Fix this conversion.. r_hex_str.. ?
snprintf (k->binmask, sizeof (k->binmask),
"%02x%02x%02x..", bm[0], bm[1], bm[2]);
memcpy (k->bin_binmask, bm, bmlen);
k->binmask_length = bmlen;
} else k->binmask[0] = k->binmask_length = 0;
}
return k;
kw = R_NEW0(RSearchKeyword);
if (!kw)
return NULL;
kw->type = R_SEARCH_KEYWORD_TYPE_BINARY;
kw->keyword_length = kwlen;
memcpy(kw->bin_keyword, kwbuf, kwlen);
if (bmbuf && bmlen > 0) {
memcpy(kw->bin_binmask, bmbuf, bmlen);
kw->binmask_length = bmlen;
}
return kw;
}
R_API RSearchKeyword* r_search_keyword_new_str(const char *kw, const char *bmhex, const char *data, int icase) {
RSearchKeyword *ks = NULL;
int bmlen = 0;
ut8 *bm = NULL;
if (!kw) return NULL;
if (bmhex != NULL) {
bm = malloc (strlen (bmhex)+1);
if (bm != NULL) {
bmlen = r_hex_str2bin (bmhex, (ut8*)bm);
if (bmlen<1) {
free (bm);
bm = NULL;
}
R_API RSearchKeyword* r_search_keyword_new_str(const char *kwbuf, const char *bmstr, const char *data, int ignore_case) {
RSearchKeyword *kw;
ut8 *bmbuf;
int bmlen;
bmbuf = NULL;
if (bmstr) {
bmbuf = malloc (strlen(bmstr)+1);
if (!bmbuf)
return NULL;
bmlen = r_hex_str2bin (bmstr, bmbuf);
if (bmlen < 1) {
free (bmbuf);
bmbuf = NULL;
}
}
ks = r_search_keyword_new ((ut8 *)kw, strlen (kw), bm, bmlen, data);
if (ks) {
ks->icase = icase;
ks->type = R_SEARCH_KEYWORD_TYPE_STRING;
kw = r_search_keyword_new((ut8 *)kwbuf, strlen(kwbuf), bmbuf, bmlen, data);
if (kw) {
kw->icase = ignore_case;
kw->type = R_SEARCH_KEYWORD_TYPE_STRING;
}
free (bm);
return ks;
free(bmbuf);
return kw;
}
R_API RSearchKeyword* r_search_keyword_new_hex(const char *kwstr, const char *bmstr, const char *data) {
RSearchKeyword *ks = NULL;
ut8 *kw, *bm;
RSearchKeyword *kw;
ut8 *kwbuf, *bmbuf;
int bmlen, kwlen;
if (kwstr != NULL) {
kw = malloc (strlen (kwstr)+1);
bm = malloc (strlen (bmstr)+1);
if (kw != NULL && bm != NULL) {
bmlen = r_hex_str2bin (bmstr, (ut8*)bm);
kwlen = r_hex_str2bin (kwstr, (ut8*)kw);
if (bmlen>=0 && kwlen>0)
ks = r_search_keyword_new (kw, kwlen, bm, bmlen, data);
}
free (kw);
free (bm);
if (!kwstr)
return NULL;
kwbuf = malloc(strlen(kwstr)+1);
if (!kwbuf)
return NULL;
kwlen = r_hex_str2bin(kwstr, kwbuf);
if (kwlen < 1) {
free(kwbuf);
return NULL;
}
return ks;
bmbuf = NULL;
if (bmstr) {
bmbuf = malloc(strlen(bmstr)+1);
if (!bmbuf) {
free(kwbuf);
return NULL;
}
bmlen = r_hex_str2bin(bmstr, bmbuf);
if (bmlen < 1) {
free(bmbuf);
free(kwbuf);
return NULL;
}
}
kw = r_search_keyword_new(kwbuf, kwlen, bmbuf, bmlen, data);
free(kwbuf);
free(bmbuf);
return kw;
}
R_API RSearchKeyword* r_search_keyword_new_hexmask(const char *kwstr, const char *data) {

View File

@ -20,11 +20,11 @@ R_API int r_search_regexp_update(void *_s, ut64 from, const ut8 *buf, int len) {
RRegexMatch matches[10];
RRegex compiled;
if (strchr (kw->binmask, 'i'))
if (kw->icase)
reflags |= R_REGEX_ICASE;
if (r_regex_comp (&compiled, kw->keyword, reflags)) {
eprintf ("Cannot compile '%s' regexp\n",kw->keyword);
if (r_regex_comp (&compiled, kw->bin_keyword, reflags)) {
eprintf ("Cannot compile '%s' regexp\n",kw->bin_keyword);
free(buffer);
return -1;
}

View File

@ -148,77 +148,74 @@ R_API int r_search_deltakey_update(void *_s, ut64 from, const ut8 *buf, int len)
return count;
}
// TODO: move into a plugin */
// TODO: This algorithm can be simplified by just using a non-distance search
// ... split this algorithm in two for performance
R_API int r_search_mybinparse_update(void *_s, ut64 from, const ut8 *buf, int len) {
RListIter *iter;
int i, j, hit, count = 0;
RSearch *s = (RSearch*)_s;
/* Boyer-Moore-Horspool pattern matching */
R_API int r_search_bmh (const RSearchKeyword *kw, const ut64 from, const ut8 *buf, const int len, ut64 *out) {
ut64 bad_char_shift[UT8_MAX + 1];
ut64 pos = from;
int i, kw_len;
ut8 ch;
for (i=0; i<len; i++) {
RSearchKeyword *kw;
r_list_foreach (s->kws, iter, kw) {
if (s->inverse && s->nhits>0) {
//eprintf ("nhits = %d\n", s->nhits);
return -1;
kw_len = kw->keyword_length - 1;
if (kw_len < 0)
return R_FALSE;
for (i = 0; i < 256; i++)
bad_char_shift[i] = kw->keyword_length;
for (i = 0; i < kw_len; i++) {
ch = kw->bin_keyword[i];
bad_char_shift[kw->icase?tolower(ch):ch] = kw_len - i;
}
while (pos < len) {
for (i = kw_len; ; i--) {
ut8 ch1 = buf[pos + i];
ut8 ch2 = kw->bin_keyword[i];
if (kw->icase) {
ch1 = tolower(ch1);
ch2 = tolower(ch2);
}
for (j=0; j<=kw->distance; j++) {
ut8 ch = kw->bin_keyword[kw->idx[j]];
ut8 ch2 = buf[i];
if (kw->icase) {
ch = tolower (ch);
ch2 = tolower (ch2);
}
if (kw->binmask_length != 0 && kw->idx[j]<kw->binmask_length) {
ch &= kw->bin_binmask[kw->idx[j]];
ch2 &= kw->bin_binmask[kw->idx[j]];
}
if (ch != ch2) {
if (s->inverse) {
if (!r_search_hit_new (s, kw, (ut64)
from+i-kw->keyword_length+1))
return -1;
kw->idx[j] = 0;
//kw->idx[0] = 0;
kw->distance = 0;
//eprintf ("HIT FOUND !!! %x %x 0x%llx %d\n", ch, ch2, from+i, i);
kw->count++;
s->nhits++;
return 1; // only return 1 keyword if inverse mode
}
if (kw->distance<s->distance) {
kw->idx[kw->distance+1] = kw->idx[kw->distance];
kw->distance++;
hit = R_TRUE;
} else {
kw->idx[0] = 0;
kw->distance = 0;
hit = R_FALSE;
}
} else hit = R_TRUE;
if (hit) {
kw->idx[j]++;
if (kw->idx[j] == kw->keyword_length) {
if (s->inverse) {
kw->idx[j] = 0;
continue;
}
if (!r_search_hit_new (s, kw, (ut64)
from+i-kw->keyword_length+1))
return -1;
kw->idx[j] = 0;
//kw->idx[0] = 0;
kw->distance = 0;
kw->count++;
count++;
//s->nhits++;
}
}
if (kw->binmask_length && i < kw->binmask_length) {
ch1 &= kw->bin_binmask[i];
ch2 &= kw->bin_binmask[i];
}
if (ch1 != ch2)
break;
if (i == 0) {
if (out)
*out = pos;
return R_TRUE;
}
}
count = 0;
ch = buf[pos + kw_len];
pos += bad_char_shift[kw->icase?tolower(ch):ch];
}
return R_FALSE;
}
R_API int r_search_mybinparse_update(void *_s, ut64 from, const ut8 *buf, int len) {
RSearch *s = (RSearch*)_s;
RListIter *iter;
int count = 0;
ut64 match_pos;
RSearchKeyword *kw;
r_list_foreach (s->kws, iter, kw) {
if (r_search_bmh(kw, 0, buf, len, &match_pos)) {
if (!r_search_hit_new(s, kw, from + match_pos)) {
eprintf("Something very bad has happened...\n");
return -1;
}
kw->count++;
count++;
/* Stop at the first occurence */
if (s->inverse)
return -1;
}
}
return count;
}