radare2/libr/search/karp.c

193 lines
7.8 KiB
C

/* radare - LGPL - Copyright 2021 pancake bemodtwz */
#include <r_search.h>
#include "search.h"
#define rhash ut64
#define RHASH_BITS (sizeof (rhash) * 8)
#define RSHIFT 17
#define ROL(x, n) ((x << n) | (x >> (RHASH_BITS - n)))
#define ROR(x, n) ((x >> n) | (x << (RHASH_BITS - n)))
const ut64 UT_MAP[256] = {
0x000000000000000, 0x000000000000001, 0x000000000000100, 0x000000000000101,
0x000000000010000, 0x000000000010001, 0x000000000010100, 0x000000000010101,
0x000000001000000, 0x000000001000001, 0x000000001000100, 0x000000001000101,
0x000000001010000, 0x000000001010001, 0x000000001010100, 0x000000001010101,
0x000000100000000, 0x000000100000001, 0x000000100000100, 0x000000100000101,
0x000000100010000, 0x000000100010001, 0x000000100010100, 0x000000100010101,
0x000000101000000, 0x000000101000001, 0x000000101000100, 0x000000101000101,
0x000000101010000, 0x000000101010001, 0x000000101010100, 0x000000101010101,
0x000010000000000, 0x000010000000001, 0x000010000000100, 0x000010000000101,
0x000010000010000, 0x000010000010001, 0x000010000010100, 0x000010000010101,
0x000010001000000, 0x000010001000001, 0x000010001000100, 0x000010001000101,
0x000010001010000, 0x000010001010001, 0x000010001010100, 0x000010001010101,
0x000010100000000, 0x000010100000001, 0x000010100000100, 0x000010100000101,
0x000010100010000, 0x000010100010001, 0x000010100010100, 0x000010100010101,
0x000010101000000, 0x000010101000001, 0x000010101000100, 0x000010101000101,
0x000010101010000, 0x000010101010001, 0x000010101010100, 0x000010101010101,
0x001000000000000, 0x001000000000001, 0x001000000000100, 0x001000000000101,
0x001000000010000, 0x001000000010001, 0x001000000010100, 0x001000000010101,
0x001000001000000, 0x001000001000001, 0x001000001000100, 0x001000001000101,
0x001000001010000, 0x001000001010001, 0x001000001010100, 0x001000001010101,
0x001000100000000, 0x001000100000001, 0x001000100000100, 0x001000100000101,
0x001000100010000, 0x001000100010001, 0x001000100010100, 0x001000100010101,
0x001000101000000, 0x001000101000001, 0x001000101000100, 0x001000101000101,
0x001000101010000, 0x001000101010001, 0x001000101010100, 0x001000101010101,
0x001010000000000, 0x001010000000001, 0x001010000000100, 0x001010000000101,
0x001010000010000, 0x001010000010001, 0x001010000010100, 0x001010000010101,
0x001010001000000, 0x001010001000001, 0x001010001000100, 0x001010001000101,
0x001010001010000, 0x001010001010001, 0x001010001010100, 0x001010001010101,
0x001010100000000, 0x001010100000001, 0x001010100000100, 0x001010100000101,
0x001010100010000, 0x001010100010001, 0x001010100010100, 0x001010100010101,
0x001010101000000, 0x001010101000001, 0x001010101000100, 0x001010101000101,
0x001010101010000, 0x001010101010001, 0x001010101010100, 0x001010101010101,
0x100000000000000, 0x100000000000001, 0x100000000000100, 0x100000000000101,
0x100000000010000, 0x100000000010001, 0x100000000010100, 0x100000000010101,
0x100000001000000, 0x100000001000001, 0x100000001000100, 0x100000001000101,
0x100000001010000, 0x100000001010001, 0x100000001010100, 0x100000001010101,
0x100000100000000, 0x100000100000001, 0x100000100000100, 0x100000100000101,
0x100000100010000, 0x100000100010001, 0x100000100010100, 0x100000100010101,
0x100000101000000, 0x100000101000001, 0x100000101000100, 0x100000101000101,
0x100000101010000, 0x100000101010001, 0x100000101010100, 0x100000101010101,
0x100010000000000, 0x100010000000001, 0x100010000000100, 0x100010000000101,
0x100010000010000, 0x100010000010001, 0x100010000010100, 0x100010000010101,
0x100010001000000, 0x100010001000001, 0x100010001000100, 0x100010001000101,
0x100010001010000, 0x100010001010001, 0x100010001010100, 0x100010001010101,
0x100010100000000, 0x100010100000001, 0x100010100000100, 0x100010100000101,
0x100010100010000, 0x100010100010001, 0x100010100010100, 0x100010100010101,
0x100010101000000, 0x100010101000001, 0x100010101000100, 0x100010101000101,
0x100010101010000, 0x100010101010001, 0x100010101010100, 0x100010101010101,
0x101000000000000, 0x101000000000001, 0x101000000000100, 0x101000000000101,
0x101000000010000, 0x101000000010001, 0x101000000010100, 0x101000000010101,
0x101000001000000, 0x101000001000001, 0x101000001000100, 0x101000001000101,
0x101000001010000, 0x101000001010001, 0x101000001010100, 0x101000001010101,
0x101000100000000, 0x101000100000001, 0x101000100000100, 0x101000100000101,
0x101000100010000, 0x101000100010001, 0x101000100010100, 0x101000100010101,
0x101000101000000, 0x101000101000001, 0x101000101000100, 0x101000101000101,
0x101000101010000, 0x101000101010001, 0x101000101010100, 0x101000101010101,
0x101010000000000, 0x101010000000001, 0x101010000000100, 0x101010000000101,
0x101010000010000, 0x101010000010001, 0x101010000010100, 0x101010000010101,
0x101010001000000, 0x101010001000001, 0x101010001000100, 0x101010001000101,
0x101010001010000, 0x101010001010001, 0x101010001010100, 0x101010001010101,
0x101010100000000, 0x101010100000001, 0x101010100000100, 0x101010100000101,
0x101010100010000, 0x101010100010001, 0x101010100010100, 0x101010100010101,
0x101010101000000, 0x101010101000001, 0x101010101000100, 0x101010101000101,
0x101010101010000, 0x101010101010001, 0x101010101010100, 0x101010101010101,
};
/* This really just reorders the bits put into it for the first 4 bytes. This
* means there are no collisions in the first 8 bytes.
*/
static rhash hash_full(const ut8 *buf, ut32 len) {
rhash hsh = 0;
int i;
for (i = 0; i < len; i++) {
hsh = ROL (hsh, RSHIFT) ^ UT_MAP[buf[i]];
}
return hsh;
}
// pre-compute params for unrolling/re-rolling hash
typedef struct ROLLDATA {
rhash roll;
ut8 right, left;
} RollData;
static inline void roll_forward(RollData *rd, ut8 prev, ut8 next) {
rd->roll = ROR (rd->roll, rd->right) ^ UT_MAP[prev];
rd->roll = ROL (rd->roll, rd->left) ^ UT_MAP[next];
}
static inline void rdata_init(RollData *rd, const ut8 *buf, ut32 len) {
rd->roll = hash_full (buf, len);
rd->right = (len - 1) * RSHIFT % RHASH_BITS;
rd->left = (rd->right + RSHIFT) % RHASH_BITS;
}
static inline bool kw_cmp(const ut8 *buf, RSearchKeyword *kw) {
int i = memcmp (buf, kw->bin_keyword, kw->keyword_length);
return i? false: true;
}
static inline int rk_many(RSearch *srch, ut64 from, ut64 to) {
// TODO handle many with hash table
eprintf ("Can't use RK on many inputs yet\n");
return -1;
}
R_IPI int search_rk(RSearch *srch, ut64 from, ut64 to) {
int cnt = r_list_length (srch->kws);
R_RETURN_VAL_IF_FAIL (cnt > 0, -1);
if (cnt > 1) {
return rk_many (srch, from, to);
}
RSearchKeyword *kw = r_list_last (srch->kws);
if (!kw) {
return -1;
}
ut32 klen = kw->keyword_length;
if (klen > to - from) {
return 0; // no possible matches
}
// fill buffer
const ut32 maxbuf = R_MAX (0x1000, klen * 2);
ut32 blen = R_MIN (maxbuf, to - from);
ut8 *buf = malloc (blen);
if (!buf || !srch->iob.read_at (srch->iob.io, from, buf, blen)) {
free (buf);
return -1;
}
// init hashes
RollData hay = {0};
rhash needle = hash_full (kw->bin_keyword, klen);
rdata_init (&hay, buf, klen);
int skip = 0;
int hits = 0;
ut64 addr = from;
while (true) {
// eat through data in buffer
ut32 i;
for (i = 0; i < blen - klen; i++) {
if (skip) {
skip--;
} else {
if (needle == hay.roll && kw_cmp (buf + i, kw)) {
int t = r_search_hit_sz (srch, kw, addr + i, klen);
hits++;
if (!t || t > 1) {
free (buf);
return t? hits: -1;
}
if (!srch->overlap) {
skip = klen - 1;
}
}
}
// remove first and add next ut8's in buff to hash
roll_forward (&hay, buf[i], buf[i + klen]);
}
addr += i;
if (addr >= to - klen || srch->consb.is_breaked ()) {
break;
}
// move leftover to start of buffer, and fill the rest
memmove (buf, buf + i, klen);
blen = R_MIN (maxbuf, to - addr);
if (!srch->iob.read_at (srch->iob.io, addr + klen, buf + klen, blen - klen)) {
free (buf);
return -1;
}
}
free (buf);
return 0;
}