Add Rabin Karp algorythm to ##search

This commit is contained in:
Dennis Goodlett 2021-12-30 04:30:30 -05:00 committed by GitHub
parent d76bb693f2
commit 2a117d5f4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 263 additions and 3 deletions

View File

@ -113,6 +113,7 @@ static const char *help_msg_slash[] = {
"/g", "[g] [from]", "find all graph paths A to B (/gg follow jumps, see search.count and anal.depth)",
"/h", "[t] [hash] [len]", "find block matching this hash. See ph",
"/i", " foo", "search for string 'foo' ignoring case",
"/k", " foo", "search for string 'foo' using Rabin Karp alg",
"/m", "[?][ebm] magicfile", "search for magic, filesystems or binary headers",
"/o", " [n]", "show offset of n instructions backward",
"/O", " [n]", "same as /o, but with a different fallback if anal cannot be used",
@ -4051,6 +4052,27 @@ reread:
r_search_begin (core->search);
dosearch = true;
break;
case 'k': // "/k" Rabin Karp String search
inp = r_str_trim_dup (input + 1);
len = r_str_unescape (inp);
r_search_reset (core->search, R_SEARCH_RABIN_KARP);
r_search_set_distance (core->search, (int)r_config_get_i (core->config, "search.distance"));
{
RSearchKeyword *skw;
skw = r_search_keyword_new ((const ut8 *)inp, len, NULL, 0, NULL);
free (inp);
if (skw) {
skw->icase = ignorecase;
skw->type = R_SEARCH_KEYWORD_TYPE_STRING;
r_search_kw_add (core->search, skw);
} else {
eprintf ("Invalid keyword\n");
break;
}
}
r_search_begin (core->search);
dosearch_read = true;
break;
case 'e': // "/e" match regexp
if (input[1] == '?') {
eprintf ("Usage: /e /foo/i or /e/foo/i\n");

View File

@ -24,6 +24,7 @@ enum {
R_SEARCH_PRIV_KEY,
R_SEARCH_DELTAKEY,
R_SEARCH_MAGIC,
R_SEARCH_RABIN_KARP,
R_SEARCH_LAST
};

View File

@ -2,7 +2,7 @@ include ../config.mk
NAME=r_search
OBJS=search.o bytepat.o strings.o aes_find.o privkey_find.o
OBJS+=regexp.o keyword.o uds.o
OBJS+=regexp.o keyword.o uds.o rabin_karp.o
# OBJ+=rsakey.o
R2DEPS=r_util
CFLAGS+=-g

View File

@ -5,6 +5,7 @@ r_search_sources = [
'regexp.c',
'uds.c',
'privkey_find.c',
'rabin_karp.c',
'search.c',
'strings.c'
]

192
libr/search/rabin_karp.c Normal file
View File

@ -0,0 +1,192 @@
/* radare - LGPL - Copyright 2021 pancake bemodtwz */
#include <r_search.h>
#include "search.h"
#define rhash ut64
#define RHASH_BITS (sizeof (rhash) * 8)
#define RSHIFT 17
#define ROL(x, n) ((x << n) | (x >> (RHASH_BITS - n)))
#define ROR(x, n) ((x >> n) | (x << (RHASH_BITS - n)))
const ut64 UT_MAP[256] = {
0x000000000000000, 0x000000000000001, 0x000000000000100, 0x000000000000101,
0x000000000010000, 0x000000000010001, 0x000000000010100, 0x000000000010101,
0x000000001000000, 0x000000001000001, 0x000000001000100, 0x000000001000101,
0x000000001010000, 0x000000001010001, 0x000000001010100, 0x000000001010101,
0x000000100000000, 0x000000100000001, 0x000000100000100, 0x000000100000101,
0x000000100010000, 0x000000100010001, 0x000000100010100, 0x000000100010101,
0x000000101000000, 0x000000101000001, 0x000000101000100, 0x000000101000101,
0x000000101010000, 0x000000101010001, 0x000000101010100, 0x000000101010101,
0x000010000000000, 0x000010000000001, 0x000010000000100, 0x000010000000101,
0x000010000010000, 0x000010000010001, 0x000010000010100, 0x000010000010101,
0x000010001000000, 0x000010001000001, 0x000010001000100, 0x000010001000101,
0x000010001010000, 0x000010001010001, 0x000010001010100, 0x000010001010101,
0x000010100000000, 0x000010100000001, 0x000010100000100, 0x000010100000101,
0x000010100010000, 0x000010100010001, 0x000010100010100, 0x000010100010101,
0x000010101000000, 0x000010101000001, 0x000010101000100, 0x000010101000101,
0x000010101010000, 0x000010101010001, 0x000010101010100, 0x000010101010101,
0x001000000000000, 0x001000000000001, 0x001000000000100, 0x001000000000101,
0x001000000010000, 0x001000000010001, 0x001000000010100, 0x001000000010101,
0x001000001000000, 0x001000001000001, 0x001000001000100, 0x001000001000101,
0x001000001010000, 0x001000001010001, 0x001000001010100, 0x001000001010101,
0x001000100000000, 0x001000100000001, 0x001000100000100, 0x001000100000101,
0x001000100010000, 0x001000100010001, 0x001000100010100, 0x001000100010101,
0x001000101000000, 0x001000101000001, 0x001000101000100, 0x001000101000101,
0x001000101010000, 0x001000101010001, 0x001000101010100, 0x001000101010101,
0x001010000000000, 0x001010000000001, 0x001010000000100, 0x001010000000101,
0x001010000010000, 0x001010000010001, 0x001010000010100, 0x001010000010101,
0x001010001000000, 0x001010001000001, 0x001010001000100, 0x001010001000101,
0x001010001010000, 0x001010001010001, 0x001010001010100, 0x001010001010101,
0x001010100000000, 0x001010100000001, 0x001010100000100, 0x001010100000101,
0x001010100010000, 0x001010100010001, 0x001010100010100, 0x001010100010101,
0x001010101000000, 0x001010101000001, 0x001010101000100, 0x001010101000101,
0x001010101010000, 0x001010101010001, 0x001010101010100, 0x001010101010101,
0x100000000000000, 0x100000000000001, 0x100000000000100, 0x100000000000101,
0x100000000010000, 0x100000000010001, 0x100000000010100, 0x100000000010101,
0x100000001000000, 0x100000001000001, 0x100000001000100, 0x100000001000101,
0x100000001010000, 0x100000001010001, 0x100000001010100, 0x100000001010101,
0x100000100000000, 0x100000100000001, 0x100000100000100, 0x100000100000101,
0x100000100010000, 0x100000100010001, 0x100000100010100, 0x100000100010101,
0x100000101000000, 0x100000101000001, 0x100000101000100, 0x100000101000101,
0x100000101010000, 0x100000101010001, 0x100000101010100, 0x100000101010101,
0x100010000000000, 0x100010000000001, 0x100010000000100, 0x100010000000101,
0x100010000010000, 0x100010000010001, 0x100010000010100, 0x100010000010101,
0x100010001000000, 0x100010001000001, 0x100010001000100, 0x100010001000101,
0x100010001010000, 0x100010001010001, 0x100010001010100, 0x100010001010101,
0x100010100000000, 0x100010100000001, 0x100010100000100, 0x100010100000101,
0x100010100010000, 0x100010100010001, 0x100010100010100, 0x100010100010101,
0x100010101000000, 0x100010101000001, 0x100010101000100, 0x100010101000101,
0x100010101010000, 0x100010101010001, 0x100010101010100, 0x100010101010101,
0x101000000000000, 0x101000000000001, 0x101000000000100, 0x101000000000101,
0x101000000010000, 0x101000000010001, 0x101000000010100, 0x101000000010101,
0x101000001000000, 0x101000001000001, 0x101000001000100, 0x101000001000101,
0x101000001010000, 0x101000001010001, 0x101000001010100, 0x101000001010101,
0x101000100000000, 0x101000100000001, 0x101000100000100, 0x101000100000101,
0x101000100010000, 0x101000100010001, 0x101000100010100, 0x101000100010101,
0x101000101000000, 0x101000101000001, 0x101000101000100, 0x101000101000101,
0x101000101010000, 0x101000101010001, 0x101000101010100, 0x101000101010101,
0x101010000000000, 0x101010000000001, 0x101010000000100, 0x101010000000101,
0x101010000010000, 0x101010000010001, 0x101010000010100, 0x101010000010101,
0x101010001000000, 0x101010001000001, 0x101010001000100, 0x101010001000101,
0x101010001010000, 0x101010001010001, 0x101010001010100, 0x101010001010101,
0x101010100000000, 0x101010100000001, 0x101010100000100, 0x101010100000101,
0x101010100010000, 0x101010100010001, 0x101010100010100, 0x101010100010101,
0x101010101000000, 0x101010101000001, 0x101010101000100, 0x101010101000101,
0x101010101010000, 0x101010101010001, 0x101010101010100, 0x101010101010101,
};
/* This really just reorders the bits put into it for the first 4 bytes. This
* means there are no collisions in the first 8 bytes.
*/
static rhash hash_full(const ut8 *buf, ut32 len) {
rhash hsh = 0;
int i;
for (i = 0; i < len; i++) {
hsh = ROL (hsh, RSHIFT) ^ UT_MAP[buf[i]];
}
return hsh;
}
// pre-compute params for unrolling/re-rolling hash
typedef struct ROLLDATA {
rhash roll;
ut8 right, left;
} RollData;
static inline void roll_forward(RollData *rd, ut8 prev, ut8 next) {
rd->roll = ROR (rd->roll, rd->right) ^ UT_MAP[prev];
rd->roll = ROL (rd->roll, rd->left) ^ UT_MAP[next];
}
static inline void rdata_init(RollData *rd, const ut8 *buf, ut32 len) {
rd->roll = hash_full (buf, len);
rd->right = (len - 1) * RSHIFT % RHASH_BITS;
rd->left = (rd->right + RSHIFT) % RHASH_BITS;
}
static inline bool kw_cmp(const ut8 *buf, RSearchKeyword *kw) {
int i = memcmp (buf, kw->bin_keyword, kw->keyword_length);
return i? false: true;
}
static inline int rk_many(RSearch *srch, ut64 from, ut64 to) {
// TODO handle many with hash table
eprintf ("Can't use RK on many inputs yet\n");
return -1;
}
R_IPI int search_rk(RSearch *srch, ut64 from, ut64 to) {
int cnt = r_list_length (srch->kws);
r_return_val_if_fail (cnt > 0, -1);
if (cnt > 1) {
return rk_many (srch, from, to);
}
RSearchKeyword *kw = r_list_get_top (srch->kws);
if (!kw) {
return -1;
}
ut32 klen = kw->keyword_length;
if (klen > to - from) {
return 0; // no possible matches
}
// fill buffer
const ut32 maxbuf = R_MAX (0x1000, klen * 2);
ut32 blen = R_MIN (maxbuf, to - from);
ut8 *buf = malloc (blen);
if (!buf || !srch->iob.read_at (srch->iob.io, from, buf, blen)) {
free (buf);
return -1;
}
// init hashes
RollData hay = { 0 };
rhash needle = hash_full (kw->bin_keyword, klen);
rdata_init (&hay, buf, klen);
int skip = 0;
int hits = 0;
ut64 addr = from;
while (true) {
// eat through data in buffer
ut32 i;
for (i = 0; i < blen - klen; i++) {
if (skip) {
skip--;
} else {
if (needle == hay.roll && kw_cmp (buf + i, kw)) {
int t = r_search_hit_sz (srch, kw, addr + i, klen);
hits++;
if (!t || t > 1) {
free (buf);
return t? hits: -1;
}
if (!srch->overlap) {
skip = klen - 1;
}
}
}
// remove first and add next ut8's in buff to hash
roll_forward (&hay, buf[i], buf[i + klen]);
}
addr += i;
if (addr >= to - klen || srch->consb.is_breaked ()) {
break;
}
// move leftover to start of buffer, and fill the rest
memmove (buf, buf + i, klen);
blen = R_MIN (maxbuf, to - addr);
if (!srch->iob.read_at (srch->iob.io, addr + klen, buf + klen, blen - klen)) {
free (buf);
return -1;
}
}
free (buf);
return 0;
}

View File

@ -81,6 +81,7 @@ static int search_magic_update(RSearch *s, ut64 from, const ut8 *buf, int len) {
R_API int r_search_set_mode(RSearch *s, int mode) {
s->update = NULL;
bool ok = true;
switch (mode) {
case R_SEARCH_KEYWORD: s->update = search_kw_update; break;
case R_SEARCH_REGEXP: s->update = search_regexp_update; break;
@ -89,9 +90,16 @@ R_API int r_search_set_mode(RSearch *s, int mode) {
case R_SEARCH_STRING: s->update = search_strings_update; break;
case R_SEARCH_DELTAKEY: s->update = search_deltakey_update; break;
case R_SEARCH_MAGIC: s->update = search_magic_update; break;
case R_SEARCH_PATTERN: s->update = NULL; break;
// no r_search_update for these
case R_SEARCH_RABIN_KARP:
case R_SEARCH_PATTERN:
break;
default:
ok = false;
break;
}
if (s->update || mode == R_SEARCH_PATTERN) {
if (ok) {
s->mode = mode;
return true;
}
@ -523,6 +531,8 @@ R_API int r_search_update_read(RSearch *s, ut64 from, ut64 to) {
return search_pattern (s, from, to);
case R_SEARCH_REGEXP:
return search_regex_read (s, from, to);
case R_SEARCH_RABIN_KARP:
return search_rk (s, from, to);
default:
eprintf ("Unsupported mode\n");
return -1;

View File

@ -9,5 +9,6 @@ R_IPI int search_regexp_update(RSearch *s, ut64 from, const ut8 *buf, int len);
// update read API's use RSearch.iob instead of provided buf
R_IPI int search_pattern(RSearch *s, ut64 from, ut64 to);
R_IPI int search_regex_read(RSearch *s, ut64 from, ut64 to);
R_IPI int search_rk(RSearch *s, ut64 from, ut64 to);
R_IPI int r_search_hit_sz(RSearch *s, RSearchKeyword *kw, ut64 addr, ut32 sz);

View File

@ -1042,3 +1042,36 @@ EXPECT=<<EOF
0x00000005 hit0_0 a5a5bf
EOF
RUN
NAME=/k at start
FILE=-
CMDS=<<EOF
w AAABAAABAAA
/k AAABAAA
EOF
EXPECT=<<EOF
0x00000000 hit0_0 "AAABAAABAAA"
EOF
RUN
NAME=/k with overlap, middle
FILE=-
CMDS=<<EOF
w ZAAABAAABAAA
/k AAABAAA
EOF
EXPECT=<<EOF
0x00000001 hit0_0 "ZAAABAAABAAA"
EOF
RUN
NAME=/k at end
FILE=-
CMDS=<<EOF
w ZZZZAAABAAA
/k AAABAAA
EOF
EXPECT=<<EOF
0x00000004 hit0_0 "ZZZZAAABAAA"
EOF
RUN