mirror of
https://github.com/radareorg/radare2.git
synced 2024-11-23 05:09:43 +00:00
Add Rabin Karp algorythm to ##search
This commit is contained in:
parent
d76bb693f2
commit
2a117d5f4f
@ -113,6 +113,7 @@ static const char *help_msg_slash[] = {
|
||||
"/g", "[g] [from]", "find all graph paths A to B (/gg follow jumps, see search.count and anal.depth)",
|
||||
"/h", "[t] [hash] [len]", "find block matching this hash. See ph",
|
||||
"/i", " foo", "search for string 'foo' ignoring case",
|
||||
"/k", " foo", "search for string 'foo' using Rabin Karp alg",
|
||||
"/m", "[?][ebm] magicfile", "search for magic, filesystems or binary headers",
|
||||
"/o", " [n]", "show offset of n instructions backward",
|
||||
"/O", " [n]", "same as /o, but with a different fallback if anal cannot be used",
|
||||
@ -4051,6 +4052,27 @@ reread:
|
||||
r_search_begin (core->search);
|
||||
dosearch = true;
|
||||
break;
|
||||
case 'k': // "/k" Rabin Karp String search
|
||||
inp = r_str_trim_dup (input + 1);
|
||||
len = r_str_unescape (inp);
|
||||
r_search_reset (core->search, R_SEARCH_RABIN_KARP);
|
||||
r_search_set_distance (core->search, (int)r_config_get_i (core->config, "search.distance"));
|
||||
{
|
||||
RSearchKeyword *skw;
|
||||
skw = r_search_keyword_new ((const ut8 *)inp, len, NULL, 0, NULL);
|
||||
free (inp);
|
||||
if (skw) {
|
||||
skw->icase = ignorecase;
|
||||
skw->type = R_SEARCH_KEYWORD_TYPE_STRING;
|
||||
r_search_kw_add (core->search, skw);
|
||||
} else {
|
||||
eprintf ("Invalid keyword\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
r_search_begin (core->search);
|
||||
dosearch_read = true;
|
||||
break;
|
||||
case 'e': // "/e" match regexp
|
||||
if (input[1] == '?') {
|
||||
eprintf ("Usage: /e /foo/i or /e/foo/i\n");
|
||||
|
@ -24,6 +24,7 @@ enum {
|
||||
R_SEARCH_PRIV_KEY,
|
||||
R_SEARCH_DELTAKEY,
|
||||
R_SEARCH_MAGIC,
|
||||
R_SEARCH_RABIN_KARP,
|
||||
R_SEARCH_LAST
|
||||
};
|
||||
|
||||
|
@ -2,7 +2,7 @@ include ../config.mk
|
||||
|
||||
NAME=r_search
|
||||
OBJS=search.o bytepat.o strings.o aes_find.o privkey_find.o
|
||||
OBJS+=regexp.o keyword.o uds.o
|
||||
OBJS+=regexp.o keyword.o uds.o rabin_karp.o
|
||||
# OBJ+=rsakey.o
|
||||
R2DEPS=r_util
|
||||
CFLAGS+=-g
|
||||
|
@ -5,6 +5,7 @@ r_search_sources = [
|
||||
'regexp.c',
|
||||
'uds.c',
|
||||
'privkey_find.c',
|
||||
'rabin_karp.c',
|
||||
'search.c',
|
||||
'strings.c'
|
||||
]
|
||||
|
192
libr/search/rabin_karp.c
Normal file
192
libr/search/rabin_karp.c
Normal file
@ -0,0 +1,192 @@
|
||||
/* radare - LGPL - Copyright 2021 pancake bemodtwz */
|
||||
|
||||
#include <r_search.h>
|
||||
#include "search.h"
|
||||
|
||||
#define rhash ut64
|
||||
#define RHASH_BITS (sizeof (rhash) * 8)
|
||||
#define RSHIFT 17
|
||||
|
||||
#define ROL(x, n) ((x << n) | (x >> (RHASH_BITS - n)))
|
||||
#define ROR(x, n) ((x >> n) | (x << (RHASH_BITS - n)))
|
||||
|
||||
const ut64 UT_MAP[256] = {
|
||||
0x000000000000000, 0x000000000000001, 0x000000000000100, 0x000000000000101,
|
||||
0x000000000010000, 0x000000000010001, 0x000000000010100, 0x000000000010101,
|
||||
0x000000001000000, 0x000000001000001, 0x000000001000100, 0x000000001000101,
|
||||
0x000000001010000, 0x000000001010001, 0x000000001010100, 0x000000001010101,
|
||||
0x000000100000000, 0x000000100000001, 0x000000100000100, 0x000000100000101,
|
||||
0x000000100010000, 0x000000100010001, 0x000000100010100, 0x000000100010101,
|
||||
0x000000101000000, 0x000000101000001, 0x000000101000100, 0x000000101000101,
|
||||
0x000000101010000, 0x000000101010001, 0x000000101010100, 0x000000101010101,
|
||||
0x000010000000000, 0x000010000000001, 0x000010000000100, 0x000010000000101,
|
||||
0x000010000010000, 0x000010000010001, 0x000010000010100, 0x000010000010101,
|
||||
0x000010001000000, 0x000010001000001, 0x000010001000100, 0x000010001000101,
|
||||
0x000010001010000, 0x000010001010001, 0x000010001010100, 0x000010001010101,
|
||||
0x000010100000000, 0x000010100000001, 0x000010100000100, 0x000010100000101,
|
||||
0x000010100010000, 0x000010100010001, 0x000010100010100, 0x000010100010101,
|
||||
0x000010101000000, 0x000010101000001, 0x000010101000100, 0x000010101000101,
|
||||
0x000010101010000, 0x000010101010001, 0x000010101010100, 0x000010101010101,
|
||||
0x001000000000000, 0x001000000000001, 0x001000000000100, 0x001000000000101,
|
||||
0x001000000010000, 0x001000000010001, 0x001000000010100, 0x001000000010101,
|
||||
0x001000001000000, 0x001000001000001, 0x001000001000100, 0x001000001000101,
|
||||
0x001000001010000, 0x001000001010001, 0x001000001010100, 0x001000001010101,
|
||||
0x001000100000000, 0x001000100000001, 0x001000100000100, 0x001000100000101,
|
||||
0x001000100010000, 0x001000100010001, 0x001000100010100, 0x001000100010101,
|
||||
0x001000101000000, 0x001000101000001, 0x001000101000100, 0x001000101000101,
|
||||
0x001000101010000, 0x001000101010001, 0x001000101010100, 0x001000101010101,
|
||||
0x001010000000000, 0x001010000000001, 0x001010000000100, 0x001010000000101,
|
||||
0x001010000010000, 0x001010000010001, 0x001010000010100, 0x001010000010101,
|
||||
0x001010001000000, 0x001010001000001, 0x001010001000100, 0x001010001000101,
|
||||
0x001010001010000, 0x001010001010001, 0x001010001010100, 0x001010001010101,
|
||||
0x001010100000000, 0x001010100000001, 0x001010100000100, 0x001010100000101,
|
||||
0x001010100010000, 0x001010100010001, 0x001010100010100, 0x001010100010101,
|
||||
0x001010101000000, 0x001010101000001, 0x001010101000100, 0x001010101000101,
|
||||
0x001010101010000, 0x001010101010001, 0x001010101010100, 0x001010101010101,
|
||||
0x100000000000000, 0x100000000000001, 0x100000000000100, 0x100000000000101,
|
||||
0x100000000010000, 0x100000000010001, 0x100000000010100, 0x100000000010101,
|
||||
0x100000001000000, 0x100000001000001, 0x100000001000100, 0x100000001000101,
|
||||
0x100000001010000, 0x100000001010001, 0x100000001010100, 0x100000001010101,
|
||||
0x100000100000000, 0x100000100000001, 0x100000100000100, 0x100000100000101,
|
||||
0x100000100010000, 0x100000100010001, 0x100000100010100, 0x100000100010101,
|
||||
0x100000101000000, 0x100000101000001, 0x100000101000100, 0x100000101000101,
|
||||
0x100000101010000, 0x100000101010001, 0x100000101010100, 0x100000101010101,
|
||||
0x100010000000000, 0x100010000000001, 0x100010000000100, 0x100010000000101,
|
||||
0x100010000010000, 0x100010000010001, 0x100010000010100, 0x100010000010101,
|
||||
0x100010001000000, 0x100010001000001, 0x100010001000100, 0x100010001000101,
|
||||
0x100010001010000, 0x100010001010001, 0x100010001010100, 0x100010001010101,
|
||||
0x100010100000000, 0x100010100000001, 0x100010100000100, 0x100010100000101,
|
||||
0x100010100010000, 0x100010100010001, 0x100010100010100, 0x100010100010101,
|
||||
0x100010101000000, 0x100010101000001, 0x100010101000100, 0x100010101000101,
|
||||
0x100010101010000, 0x100010101010001, 0x100010101010100, 0x100010101010101,
|
||||
0x101000000000000, 0x101000000000001, 0x101000000000100, 0x101000000000101,
|
||||
0x101000000010000, 0x101000000010001, 0x101000000010100, 0x101000000010101,
|
||||
0x101000001000000, 0x101000001000001, 0x101000001000100, 0x101000001000101,
|
||||
0x101000001010000, 0x101000001010001, 0x101000001010100, 0x101000001010101,
|
||||
0x101000100000000, 0x101000100000001, 0x101000100000100, 0x101000100000101,
|
||||
0x101000100010000, 0x101000100010001, 0x101000100010100, 0x101000100010101,
|
||||
0x101000101000000, 0x101000101000001, 0x101000101000100, 0x101000101000101,
|
||||
0x101000101010000, 0x101000101010001, 0x101000101010100, 0x101000101010101,
|
||||
0x101010000000000, 0x101010000000001, 0x101010000000100, 0x101010000000101,
|
||||
0x101010000010000, 0x101010000010001, 0x101010000010100, 0x101010000010101,
|
||||
0x101010001000000, 0x101010001000001, 0x101010001000100, 0x101010001000101,
|
||||
0x101010001010000, 0x101010001010001, 0x101010001010100, 0x101010001010101,
|
||||
0x101010100000000, 0x101010100000001, 0x101010100000100, 0x101010100000101,
|
||||
0x101010100010000, 0x101010100010001, 0x101010100010100, 0x101010100010101,
|
||||
0x101010101000000, 0x101010101000001, 0x101010101000100, 0x101010101000101,
|
||||
0x101010101010000, 0x101010101010001, 0x101010101010100, 0x101010101010101,
|
||||
};
|
||||
|
||||
/* This really just reorders the bits put into it for the first 4 bytes. This
|
||||
* means there are no collisions in the first 8 bytes.
|
||||
*/
|
||||
static rhash hash_full(const ut8 *buf, ut32 len) {
|
||||
rhash hsh = 0;
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
hsh = ROL (hsh, RSHIFT) ^ UT_MAP[buf[i]];
|
||||
}
|
||||
return hsh;
|
||||
}
|
||||
|
||||
// pre-compute params for unrolling/re-rolling hash
|
||||
typedef struct ROLLDATA {
|
||||
rhash roll;
|
||||
ut8 right, left;
|
||||
} RollData;
|
||||
|
||||
static inline void roll_forward(RollData *rd, ut8 prev, ut8 next) {
|
||||
rd->roll = ROR (rd->roll, rd->right) ^ UT_MAP[prev];
|
||||
rd->roll = ROL (rd->roll, rd->left) ^ UT_MAP[next];
|
||||
}
|
||||
|
||||
static inline void rdata_init(RollData *rd, const ut8 *buf, ut32 len) {
|
||||
rd->roll = hash_full (buf, len);
|
||||
rd->right = (len - 1) * RSHIFT % RHASH_BITS;
|
||||
rd->left = (rd->right + RSHIFT) % RHASH_BITS;
|
||||
}
|
||||
|
||||
static inline bool kw_cmp(const ut8 *buf, RSearchKeyword *kw) {
|
||||
int i = memcmp (buf, kw->bin_keyword, kw->keyword_length);
|
||||
return i? false: true;
|
||||
}
|
||||
|
||||
static inline int rk_many(RSearch *srch, ut64 from, ut64 to) {
|
||||
// TODO handle many with hash table
|
||||
eprintf ("Can't use RK on many inputs yet\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
R_IPI int search_rk(RSearch *srch, ut64 from, ut64 to) {
|
||||
int cnt = r_list_length (srch->kws);
|
||||
r_return_val_if_fail (cnt > 0, -1);
|
||||
|
||||
if (cnt > 1) {
|
||||
return rk_many (srch, from, to);
|
||||
}
|
||||
|
||||
RSearchKeyword *kw = r_list_get_top (srch->kws);
|
||||
if (!kw) {
|
||||
return -1;
|
||||
}
|
||||
ut32 klen = kw->keyword_length;
|
||||
if (klen > to - from) {
|
||||
return 0; // no possible matches
|
||||
}
|
||||
|
||||
// fill buffer
|
||||
const ut32 maxbuf = R_MAX (0x1000, klen * 2);
|
||||
ut32 blen = R_MIN (maxbuf, to - from);
|
||||
ut8 *buf = malloc (blen);
|
||||
if (!buf || !srch->iob.read_at (srch->iob.io, from, buf, blen)) {
|
||||
free (buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// init hashes
|
||||
RollData hay = { 0 };
|
||||
rhash needle = hash_full (kw->bin_keyword, klen);
|
||||
rdata_init (&hay, buf, klen);
|
||||
|
||||
int skip = 0;
|
||||
int hits = 0;
|
||||
ut64 addr = from;
|
||||
while (true) {
|
||||
// eat through data in buffer
|
||||
ut32 i;
|
||||
for (i = 0; i < blen - klen; i++) {
|
||||
if (skip) {
|
||||
skip--;
|
||||
} else {
|
||||
if (needle == hay.roll && kw_cmp (buf + i, kw)) {
|
||||
int t = r_search_hit_sz (srch, kw, addr + i, klen);
|
||||
hits++;
|
||||
if (!t || t > 1) {
|
||||
free (buf);
|
||||
return t? hits: -1;
|
||||
}
|
||||
if (!srch->overlap) {
|
||||
skip = klen - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// remove first and add next ut8's in buff to hash
|
||||
roll_forward (&hay, buf[i], buf[i + klen]);
|
||||
}
|
||||
|
||||
addr += i;
|
||||
if (addr >= to - klen || srch->consb.is_breaked ()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// move leftover to start of buffer, and fill the rest
|
||||
memmove (buf, buf + i, klen);
|
||||
blen = R_MIN (maxbuf, to - addr);
|
||||
if (!srch->iob.read_at (srch->iob.io, addr + klen, buf + klen, blen - klen)) {
|
||||
free (buf);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
free (buf);
|
||||
return 0;
|
||||
}
|
@ -81,6 +81,7 @@ static int search_magic_update(RSearch *s, ut64 from, const ut8 *buf, int len) {
|
||||
|
||||
R_API int r_search_set_mode(RSearch *s, int mode) {
|
||||
s->update = NULL;
|
||||
bool ok = true;
|
||||
switch (mode) {
|
||||
case R_SEARCH_KEYWORD: s->update = search_kw_update; break;
|
||||
case R_SEARCH_REGEXP: s->update = search_regexp_update; break;
|
||||
@ -89,9 +90,16 @@ R_API int r_search_set_mode(RSearch *s, int mode) {
|
||||
case R_SEARCH_STRING: s->update = search_strings_update; break;
|
||||
case R_SEARCH_DELTAKEY: s->update = search_deltakey_update; break;
|
||||
case R_SEARCH_MAGIC: s->update = search_magic_update; break;
|
||||
case R_SEARCH_PATTERN: s->update = NULL; break;
|
||||
|
||||
// no r_search_update for these
|
||||
case R_SEARCH_RABIN_KARP:
|
||||
case R_SEARCH_PATTERN:
|
||||
break;
|
||||
default:
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
if (s->update || mode == R_SEARCH_PATTERN) {
|
||||
if (ok) {
|
||||
s->mode = mode;
|
||||
return true;
|
||||
}
|
||||
@ -523,6 +531,8 @@ R_API int r_search_update_read(RSearch *s, ut64 from, ut64 to) {
|
||||
return search_pattern (s, from, to);
|
||||
case R_SEARCH_REGEXP:
|
||||
return search_regex_read (s, from, to);
|
||||
case R_SEARCH_RABIN_KARP:
|
||||
return search_rk (s, from, to);
|
||||
default:
|
||||
eprintf ("Unsupported mode\n");
|
||||
return -1;
|
||||
|
@ -9,5 +9,6 @@ R_IPI int search_regexp_update(RSearch *s, ut64 from, const ut8 *buf, int len);
|
||||
// update read API's use RSearch.iob instead of provided buf
|
||||
R_IPI int search_pattern(RSearch *s, ut64 from, ut64 to);
|
||||
R_IPI int search_regex_read(RSearch *s, ut64 from, ut64 to);
|
||||
R_IPI int search_rk(RSearch *s, ut64 from, ut64 to);
|
||||
|
||||
R_IPI int r_search_hit_sz(RSearch *s, RSearchKeyword *kw, ut64 addr, ut32 sz);
|
||||
|
@ -1042,3 +1042,36 @@ EXPECT=<<EOF
|
||||
0x00000005 hit0_0 a5a5bf
|
||||
EOF
|
||||
RUN
|
||||
|
||||
NAME=/k at start
|
||||
FILE=-
|
||||
CMDS=<<EOF
|
||||
w AAABAAABAAA
|
||||
/k AAABAAA
|
||||
EOF
|
||||
EXPECT=<<EOF
|
||||
0x00000000 hit0_0 "AAABAAABAAA"
|
||||
EOF
|
||||
RUN
|
||||
|
||||
NAME=/k with overlap, middle
|
||||
FILE=-
|
||||
CMDS=<<EOF
|
||||
w ZAAABAAABAAA
|
||||
/k AAABAAA
|
||||
EOF
|
||||
EXPECT=<<EOF
|
||||
0x00000001 hit0_0 "ZAAABAAABAAA"
|
||||
EOF
|
||||
RUN
|
||||
|
||||
NAME=/k at end
|
||||
FILE=-
|
||||
CMDS=<<EOF
|
||||
w ZZZZAAABAAA
|
||||
/k AAABAAA
|
||||
EOF
|
||||
EXPECT=<<EOF
|
||||
0x00000004 hit0_0 "ZZZZAAABAAA"
|
||||
EOF
|
||||
RUN
|
||||
|
Loading…
Reference in New Issue
Block a user