mirror of
https://github.com/radareorg/radare2.git
synced 2025-02-17 20:58:14 +00:00
fix off by one when diffing
This commit is contained in:
parent
392dd94bf7
commit
8ad4d74316
122
libr/anal/diff.c
122
libr/anal/diff.c
@ -88,7 +88,7 @@ R_API int r_anal_diff_fingerprint_fcn(RAnal *anal, RAnalFunction *fcn) {
|
||||
fcn->fingerprint = NULL;
|
||||
r_list_foreach (fcn->bbs, iter, bb) {
|
||||
len += bb->size;
|
||||
fcn->fingerprint = realloc (fcn->fingerprint, len);
|
||||
fcn->fingerprint = realloc (fcn->fingerprint, len + 1);
|
||||
if (!fcn->fingerprint)
|
||||
return 0;
|
||||
memcpy (fcn->fingerprint+len-bb->size, bb->fingerprint, bb->size);
|
||||
@ -150,52 +150,60 @@ R_API int r_anal_diff_fcn(RAnal *anal, RList *fcns, RList *fcns2) {
|
||||
ut64 maxsize, minsize;
|
||||
double t, ot;
|
||||
|
||||
if (!anal)
|
||||
if (!anal) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (anal->cur && anal->cur->diff_fcn)
|
||||
if (anal->cur && anal->cur->diff_fcn) {
|
||||
return (anal->cur->diff_fcn (anal, fcns, fcns2));
|
||||
}
|
||||
|
||||
/* Compare functions with the same name */
|
||||
if (fcns)
|
||||
r_list_foreach (fcns, iter, fcn) {
|
||||
if (fcn->type != R_ANAL_FCN_TYPE_SYM || fcn->name == NULL)
|
||||
continue;
|
||||
r_list_foreach (fcns2, iter2, fcn2) {
|
||||
if (fcn2->type != R_ANAL_FCN_TYPE_SYM || fcn2->name == NULL ||
|
||||
strcmp (fcn->name, fcn2->name))
|
||||
if (fcns) {
|
||||
r_list_foreach (fcns, iter, fcn) {
|
||||
if (fcn->type != R_ANAL_FCN_TYPE_SYM || fcn->name == NULL) {
|
||||
continue;
|
||||
r_diff_buffers_distance (NULL, fcn->fingerprint, r_anal_fcn_size (fcn),
|
||||
fcn2->fingerprint, r_anal_fcn_size (fcn2), NULL, &t);
|
||||
#if 0
|
||||
eprintf ("FCN NAME (NAME): %s - %s => %lli - %lli => %f\n", fcn->name, fcn2->name,
|
||||
fcn->size, fcn2->size, t);
|
||||
#endif
|
||||
/* Set flag in matched functions */
|
||||
fcn->diff->type = fcn2->diff->type = (t==1)?
|
||||
R_ANAL_DIFF_TYPE_MATCH: R_ANAL_DIFF_TYPE_UNMATCH;
|
||||
fcn->diff->dist = fcn2->diff->dist = t;
|
||||
R_FREE (fcn->fingerprint);
|
||||
R_FREE (fcn2->fingerprint);
|
||||
fcn->diff->addr = fcn2->addr;
|
||||
fcn2->diff->addr = fcn->addr;
|
||||
fcn->diff->size = r_anal_fcn_size (fcn2);
|
||||
fcn2->diff->size = r_anal_fcn_size (fcn);
|
||||
R_FREE (fcn->diff->name);
|
||||
if (fcn2->name)
|
||||
fcn->diff->name = strdup (fcn2->name);
|
||||
R_FREE (fcn2->diff->name);
|
||||
if (fcn->name)
|
||||
fcn2->diff->name = strdup (fcn->name);
|
||||
r_anal_diff_bb (anal, fcn, fcn2);
|
||||
break;
|
||||
}
|
||||
r_list_foreach (fcns2, iter2, fcn2) {
|
||||
if (fcn2->type != R_ANAL_FCN_TYPE_SYM ||
|
||||
fcn2->name == NULL ||
|
||||
strcmp (fcn->name, fcn2->name)) {
|
||||
continue;
|
||||
}
|
||||
r_diff_buffers_distance (NULL, fcn->fingerprint, r_anal_fcn_size (fcn),
|
||||
fcn2->fingerprint, r_anal_fcn_size (fcn2),
|
||||
NULL, &t);
|
||||
/* Set flag in matched functions */
|
||||
fcn->diff->type = fcn2->diff->type = (t == 1)
|
||||
? R_ANAL_DIFF_TYPE_MATCH
|
||||
: R_ANAL_DIFF_TYPE_UNMATCH;
|
||||
fcn->diff->dist = fcn2->diff->dist = t;
|
||||
R_FREE (fcn->fingerprint);
|
||||
R_FREE (fcn2->fingerprint);
|
||||
fcn->diff->addr = fcn2->addr;
|
||||
fcn2->diff->addr = fcn->addr;
|
||||
fcn->diff->size = r_anal_fcn_size (fcn2);
|
||||
fcn2->diff->size = r_anal_fcn_size (fcn);
|
||||
R_FREE (fcn->diff->name);
|
||||
if (fcn2->name) {
|
||||
fcn->diff->name = strdup (fcn2->name);
|
||||
}
|
||||
R_FREE (fcn2->diff->name);
|
||||
if (fcn->name) {
|
||||
fcn2->diff->name = strdup (fcn->name);
|
||||
}
|
||||
r_anal_diff_bb (anal, fcn, fcn2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Compare remaining functions */
|
||||
r_list_foreach (fcns, iter, fcn) {
|
||||
if ((fcn->type != R_ANAL_FCN_TYPE_FCN && fcn->type != R_ANAL_FCN_TYPE_SYM) ||
|
||||
fcn->diff->type != R_ANAL_DIFF_TYPE_NULL)
|
||||
if ((fcn->type != R_ANAL_FCN_TYPE_FCN &&
|
||||
fcn->type != R_ANAL_FCN_TYPE_SYM) ||
|
||||
fcn->diff->type != R_ANAL_DIFF_TYPE_NULL) {
|
||||
continue;
|
||||
}
|
||||
ot = 0;
|
||||
mfcn = mfcn2 = NULL;
|
||||
r_list_foreach (fcns2, iter2, fcn2) {
|
||||
@ -208,40 +216,28 @@ R_API int r_anal_diff_fcn(RAnal *anal, RList *fcns, RList *fcns2) {
|
||||
maxsize = fcn2_size;
|
||||
minsize = fcn_size;
|
||||
}
|
||||
if ((fcn2->type != R_ANAL_FCN_TYPE_FCN && fcn2->type != R_ANAL_FCN_TYPE_SYM) ||
|
||||
fcn2->diff->type != R_ANAL_DIFF_TYPE_NULL || (maxsize * anal->diff_thfcn > minsize))
|
||||
if ((fcn2->type != R_ANAL_FCN_TYPE_FCN
|
||||
&& fcn2->type != R_ANAL_FCN_TYPE_SYM) ||
|
||||
fcn2->diff->type != R_ANAL_DIFF_TYPE_NULL ||
|
||||
(maxsize * anal->diff_thfcn > minsize)) {
|
||||
continue;
|
||||
r_diff_buffers_distance (NULL, fcn->fingerprint, fcn_size,
|
||||
fcn2->fingerprint, fcn2_size, NULL, &t);
|
||||
}
|
||||
r_diff_buffers_distance (NULL, fcn->fingerprint, fcn_size, fcn2->fingerprint, fcn2_size, NULL, &t);
|
||||
fcn->diff->dist = fcn2->diff->dist = t;
|
||||
#if 0
|
||||
int i;
|
||||
eprintf ("FP0 ");
|
||||
for (i=0;i<fcn->size;i++)
|
||||
eprintf ("%02x", fcn->fingerprint[i]);
|
||||
eprintf ("\n");
|
||||
|
||||
eprintf ("FP1 ");
|
||||
for (i=0;i<fcn2->size;i++)
|
||||
eprintf ("%02x", fcn2->fingerprint[i]);
|
||||
eprintf ("\n");
|
||||
eprintf ("FCN: %s - %s => %lli - %lli => %f\n", fcn->name, fcn2->name,
|
||||
fcn->size, fcn2->size, t);
|
||||
#endif
|
||||
if (t > anal->diff_thfcn && t > ot) {
|
||||
ot = t;
|
||||
mfcn = fcn;
|
||||
mfcn2 = fcn2;
|
||||
if (t == 1) break;
|
||||
if (t == 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mfcn && mfcn2) {
|
||||
#if 0
|
||||
eprintf ("Match => %s - %s\n", mfcn->name, mfcn2->name);
|
||||
#endif
|
||||
/* Set flag in matched functions */
|
||||
mfcn->diff->type = mfcn2->diff->type = (ot==1)?
|
||||
R_ANAL_DIFF_TYPE_MATCH: R_ANAL_DIFF_TYPE_UNMATCH;
|
||||
mfcn->diff->type = mfcn2->diff->type = (ot == 1)
|
||||
? R_ANAL_DIFF_TYPE_MATCH
|
||||
: R_ANAL_DIFF_TYPE_UNMATCH;
|
||||
R_FREE (mfcn->fingerprint);
|
||||
R_FREE (mfcn2->fingerprint);
|
||||
mfcn->diff->addr = mfcn2->addr;
|
||||
@ -249,11 +245,13 @@ R_API int r_anal_diff_fcn(RAnal *anal, RList *fcns, RList *fcns2) {
|
||||
mfcn->diff->size = r_anal_fcn_size (mfcn2);
|
||||
mfcn2->diff->size = r_anal_fcn_size (mfcn);
|
||||
R_FREE (mfcn->diff->name);
|
||||
if (mfcn2->name)
|
||||
if (mfcn2->name) {
|
||||
mfcn->diff->name = strdup (mfcn2->name);
|
||||
}
|
||||
R_FREE (mfcn2->diff->name);
|
||||
if (mfcn->name)
|
||||
if (mfcn->name) {
|
||||
mfcn2->diff->name = strdup (mfcn->name);
|
||||
}
|
||||
r_anal_diff_bb (anal, mfcn, mfcn2);
|
||||
}
|
||||
}
|
||||
|
@ -215,6 +215,30 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
ut32 aLen;
|
||||
ut32 bLen;
|
||||
|
||||
// temp pointer will be used to switch v0 and v1 after processing the inner loop.
|
||||
int *temp;
|
||||
int *v0, *v1;
|
||||
|
||||
// We need these variables outside the context of the loops as we need to
|
||||
// survive multiple loop iterations.
|
||||
// start and stop are used in our inner loop
|
||||
// colMin tells us the current 'best' edit distance.
|
||||
// extendStop & extendStart are used when we get 'double up' edge conditions
|
||||
// that require us to keep some more data.
|
||||
int start = 0;
|
||||
int stop = 0;
|
||||
int smallest;
|
||||
int colMin = 0;
|
||||
int extendStop = 0;
|
||||
int extendStart = 0;
|
||||
|
||||
//we could move cost into the 'i' loop.
|
||||
int cost = 0;
|
||||
|
||||
// loops can get very big, this can be removed, but it's currently in there for debugging
|
||||
// and optimisation testing.
|
||||
ut64 loops = 0;
|
||||
|
||||
// We need the longest file to be 'A' because our optimisation tries to stop and start
|
||||
// around the diagonal.
|
||||
// AAAAAAA
|
||||
@ -241,7 +265,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
aLen = la;
|
||||
bLen = lb;
|
||||
}
|
||||
|
||||
stop = bLen;
|
||||
// Preliminary tests
|
||||
|
||||
//Do we have both files a & b, and are they at least one byte?
|
||||
@ -259,27 +283,22 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Only calloc if we have to do some processing
|
||||
|
||||
// calloc v0 & v1 and check they initialised
|
||||
int *v0 = (int*) calloc ((bLen + 3), sizeof (int));
|
||||
v0 = (int*) calloc ((bLen + 3), sizeof (int));
|
||||
if (!v0) {
|
||||
eprintf("Error: cannot allocate %i bytes.", bLen + 3);
|
||||
eprintf ("Error: cannot allocate %i bytes.", bLen + 3);
|
||||
return false;
|
||||
}
|
||||
|
||||
int *v1 = (int*) calloc ((bLen + 3), sizeof (int));
|
||||
v1 = (int*) calloc ((bLen + 3), sizeof (int));
|
||||
if (!v1) {
|
||||
eprintf("Error: cannot allocate %i bytes", 2 * (bLen + 3));
|
||||
free(v0);
|
||||
eprintf ("Error: cannot allocate %i bytes", 2 * (bLen + 3));
|
||||
free (v0);
|
||||
return false;
|
||||
}
|
||||
|
||||
// temp pointer will be used to switch v0 and v1 after processing the inner loop.
|
||||
int *temp;
|
||||
|
||||
|
||||
// initialise v0 and v1.
|
||||
// With optimisiation we only strictly we only need to initialise v0[0..2]=0..2 & v1[0] = 1;
|
||||
for (i = 0; i < bLen + 1 ; i++) {
|
||||
@ -287,27 +306,6 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
v1[i] = i + 1;
|
||||
}
|
||||
|
||||
|
||||
// We need these variables outside the context of the loops as we need to
|
||||
// survive multiple loop iterations.
|
||||
// start and stop are used in our inner loop
|
||||
// colMin tells us the current 'best' edit distance.
|
||||
// extendStop & extendStart are used when we get 'double up' edge conditions
|
||||
// that require us to keep some more data.
|
||||
int start = 0;
|
||||
int stop = bLen;
|
||||
int smallest;
|
||||
int colMin = 0;
|
||||
int extendStop = 0;
|
||||
int extendStart = 0;
|
||||
|
||||
//we could move cost into the 'i' loop.
|
||||
int cost = 0;
|
||||
|
||||
// loops can get very big, this can be removed, but it's currently in there for debugging
|
||||
// and optimisation testing.
|
||||
ut64 loops = 0;
|
||||
|
||||
// Outer loop = the length of the longest input file.
|
||||
for (i = 0; i < aLen; i++) {
|
||||
|
||||
@ -315,7 +313,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
// bLen (so we don't run off the end of our array)
|
||||
// or 'two below the diagonal' PLUS any extension we need for 'double up' edge values
|
||||
// (see extendStop for logic)
|
||||
stop = R_MIN((i + extendStop + 2), bLen);
|
||||
stop = R_MIN ((i + extendStop + 2), bLen);
|
||||
|
||||
// We need a value in the result column (v1[start]).
|
||||
// If you look at the loop below, we need it because we look at v1[j] as one of the
|
||||
@ -342,7 +340,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
v1[j + 2] = smallest + 1;
|
||||
|
||||
// If we have seen a smaller number, it's the new column Minimum
|
||||
colMin=R_MIN((colMin),(smallest));
|
||||
colMin = R_MIN ((colMin), (smallest));
|
||||
|
||||
}
|
||||
|
||||
@ -370,7 +368,6 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
start --;
|
||||
extendStart ++;
|
||||
}
|
||||
|
||||
//Switch v0 and v1 pointers via temp pointer
|
||||
temp = v0;
|
||||
v0 = v1;
|
||||
@ -381,12 +378,10 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
eprintf ("\rProcessing %d of %d\r", i, aLen);
|
||||
}
|
||||
}
|
||||
|
||||
//Clean up output on loop exit (purely aesthetic)
|
||||
if (verbose) {
|
||||
eprintf ("\rProcessing %d of %d (loops=%llu)\n", i, aLen,loops);
|
||||
}
|
||||
|
||||
if (distance) {
|
||||
// the final distance is the last byte we processed in the inner loop.
|
||||
// v0 is used instead of v1 because we switched the pointers before exiting the outer loop
|
||||
@ -396,9 +391,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
|
||||
*similarity = (double)1 - diff;
|
||||
}
|
||||
}
|
||||
|
||||
free (v0);
|
||||
free (v1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user