fix off by one when diffing

This commit is contained in:
Álvaro Felipe Melchor 2016-07-17 12:09:31 +02:00
parent 392dd94bf7
commit 8ad4d74316
2 changed files with 92 additions and 101 deletions

View File

@ -88,7 +88,7 @@ R_API int r_anal_diff_fingerprint_fcn(RAnal *anal, RAnalFunction *fcn) {
fcn->fingerprint = NULL;
r_list_foreach (fcn->bbs, iter, bb) {
len += bb->size;
fcn->fingerprint = realloc (fcn->fingerprint, len);
fcn->fingerprint = realloc (fcn->fingerprint, len + 1);
if (!fcn->fingerprint)
return 0;
memcpy (fcn->fingerprint+len-bb->size, bb->fingerprint, bb->size);
@ -150,52 +150,60 @@ R_API int r_anal_diff_fcn(RAnal *anal, RList *fcns, RList *fcns2) {
ut64 maxsize, minsize;
double t, ot;
if (!anal)
if (!anal) {
return false;
}
if (anal->cur && anal->cur->diff_fcn)
if (anal->cur && anal->cur->diff_fcn) {
return (anal->cur->diff_fcn (anal, fcns, fcns2));
}
/* Compare functions with the same name */
if (fcns)
r_list_foreach (fcns, iter, fcn) {
if (fcn->type != R_ANAL_FCN_TYPE_SYM || fcn->name == NULL)
continue;
r_list_foreach (fcns2, iter2, fcn2) {
if (fcn2->type != R_ANAL_FCN_TYPE_SYM || fcn2->name == NULL ||
strcmp (fcn->name, fcn2->name))
if (fcns) {
r_list_foreach (fcns, iter, fcn) {
if (fcn->type != R_ANAL_FCN_TYPE_SYM || fcn->name == NULL) {
continue;
r_diff_buffers_distance (NULL, fcn->fingerprint, r_anal_fcn_size (fcn),
fcn2->fingerprint, r_anal_fcn_size (fcn2), NULL, &t);
#if 0
eprintf ("FCN NAME (NAME): %s - %s => %lli - %lli => %f\n", fcn->name, fcn2->name,
fcn->size, fcn2->size, t);
#endif
/* Set flag in matched functions */
fcn->diff->type = fcn2->diff->type = (t==1)?
R_ANAL_DIFF_TYPE_MATCH: R_ANAL_DIFF_TYPE_UNMATCH;
fcn->diff->dist = fcn2->diff->dist = t;
R_FREE (fcn->fingerprint);
R_FREE (fcn2->fingerprint);
fcn->diff->addr = fcn2->addr;
fcn2->diff->addr = fcn->addr;
fcn->diff->size = r_anal_fcn_size (fcn2);
fcn2->diff->size = r_anal_fcn_size (fcn);
R_FREE (fcn->diff->name);
if (fcn2->name)
fcn->diff->name = strdup (fcn2->name);
R_FREE (fcn2->diff->name);
if (fcn->name)
fcn2->diff->name = strdup (fcn->name);
r_anal_diff_bb (anal, fcn, fcn2);
break;
}
r_list_foreach (fcns2, iter2, fcn2) {
if (fcn2->type != R_ANAL_FCN_TYPE_SYM ||
fcn2->name == NULL ||
strcmp (fcn->name, fcn2->name)) {
continue;
}
r_diff_buffers_distance (NULL, fcn->fingerprint, r_anal_fcn_size (fcn),
fcn2->fingerprint, r_anal_fcn_size (fcn2),
NULL, &t);
/* Set flag in matched functions */
fcn->diff->type = fcn2->diff->type = (t == 1)
? R_ANAL_DIFF_TYPE_MATCH
: R_ANAL_DIFF_TYPE_UNMATCH;
fcn->diff->dist = fcn2->diff->dist = t;
R_FREE (fcn->fingerprint);
R_FREE (fcn2->fingerprint);
fcn->diff->addr = fcn2->addr;
fcn2->diff->addr = fcn->addr;
fcn->diff->size = r_anal_fcn_size (fcn2);
fcn2->diff->size = r_anal_fcn_size (fcn);
R_FREE (fcn->diff->name);
if (fcn2->name) {
fcn->diff->name = strdup (fcn2->name);
}
R_FREE (fcn2->diff->name);
if (fcn->name) {
fcn2->diff->name = strdup (fcn->name);
}
r_anal_diff_bb (anal, fcn, fcn2);
break;
}
}
}
/* Compare remaining functions */
r_list_foreach (fcns, iter, fcn) {
if ((fcn->type != R_ANAL_FCN_TYPE_FCN && fcn->type != R_ANAL_FCN_TYPE_SYM) ||
fcn->diff->type != R_ANAL_DIFF_TYPE_NULL)
if ((fcn->type != R_ANAL_FCN_TYPE_FCN &&
fcn->type != R_ANAL_FCN_TYPE_SYM) ||
fcn->diff->type != R_ANAL_DIFF_TYPE_NULL) {
continue;
}
ot = 0;
mfcn = mfcn2 = NULL;
r_list_foreach (fcns2, iter2, fcn2) {
@ -208,40 +216,28 @@ R_API int r_anal_diff_fcn(RAnal *anal, RList *fcns, RList *fcns2) {
maxsize = fcn2_size;
minsize = fcn_size;
}
if ((fcn2->type != R_ANAL_FCN_TYPE_FCN && fcn2->type != R_ANAL_FCN_TYPE_SYM) ||
fcn2->diff->type != R_ANAL_DIFF_TYPE_NULL || (maxsize * anal->diff_thfcn > minsize))
if ((fcn2->type != R_ANAL_FCN_TYPE_FCN
&& fcn2->type != R_ANAL_FCN_TYPE_SYM) ||
fcn2->diff->type != R_ANAL_DIFF_TYPE_NULL ||
(maxsize * anal->diff_thfcn > minsize)) {
continue;
r_diff_buffers_distance (NULL, fcn->fingerprint, fcn_size,
fcn2->fingerprint, fcn2_size, NULL, &t);
}
r_diff_buffers_distance (NULL, fcn->fingerprint, fcn_size, fcn2->fingerprint, fcn2_size, NULL, &t);
fcn->diff->dist = fcn2->diff->dist = t;
#if 0
int i;
eprintf ("FP0 ");
for (i=0;i<fcn->size;i++)
eprintf ("%02x", fcn->fingerprint[i]);
eprintf ("\n");
eprintf ("FP1 ");
for (i=0;i<fcn2->size;i++)
eprintf ("%02x", fcn2->fingerprint[i]);
eprintf ("\n");
eprintf ("FCN: %s - %s => %lli - %lli => %f\n", fcn->name, fcn2->name,
fcn->size, fcn2->size, t);
#endif
if (t > anal->diff_thfcn && t > ot) {
ot = t;
mfcn = fcn;
mfcn2 = fcn2;
if (t == 1) break;
if (t == 1) {
break;
}
}
}
if (mfcn && mfcn2) {
#if 0
eprintf ("Match => %s - %s\n", mfcn->name, mfcn2->name);
#endif
/* Set flag in matched functions */
mfcn->diff->type = mfcn2->diff->type = (ot==1)?
R_ANAL_DIFF_TYPE_MATCH: R_ANAL_DIFF_TYPE_UNMATCH;
mfcn->diff->type = mfcn2->diff->type = (ot == 1)
? R_ANAL_DIFF_TYPE_MATCH
: R_ANAL_DIFF_TYPE_UNMATCH;
R_FREE (mfcn->fingerprint);
R_FREE (mfcn2->fingerprint);
mfcn->diff->addr = mfcn2->addr;
@ -249,11 +245,13 @@ R_API int r_anal_diff_fcn(RAnal *anal, RList *fcns, RList *fcns2) {
mfcn->diff->size = r_anal_fcn_size (mfcn2);
mfcn2->diff->size = r_anal_fcn_size (mfcn);
R_FREE (mfcn->diff->name);
if (mfcn2->name)
if (mfcn2->name) {
mfcn->diff->name = strdup (mfcn2->name);
}
R_FREE (mfcn2->diff->name);
if (mfcn->name)
if (mfcn->name) {
mfcn2->diff->name = strdup (mfcn->name);
}
r_anal_diff_bb (anal, mfcn, mfcn2);
}
}

View File

@ -215,6 +215,30 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
ut32 aLen;
ut32 bLen;
// temp pointer will be used to switch v0 and v1 after processing the inner loop.
int *temp;
int *v0, *v1;
// We need these variables outside the context of the loops as we need to
// survive multiple loop iterations.
// start and stop are used in our inner loop
// colMin tells us the current 'best' edit distance.
// extendStop & extendStart are used when we get 'double up' edge conditions
// that require us to keep some more data.
int start = 0;
int stop = 0;
int smallest;
int colMin = 0;
int extendStop = 0;
int extendStart = 0;
//we could move cost into the 'i' loop.
int cost = 0;
// loops can get very big, this can be removed, but it's currently in there for debugging
// and optimisation testing.
ut64 loops = 0;
// We need the longest file to be 'A' because our optimisation tries to stop and start
// around the diagonal.
// AAAAAAA
@ -241,7 +265,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
aLen = la;
bLen = lb;
}
stop = bLen;
// Preliminary tests
//Do we have both files a & b, and are they at least one byte?
@ -259,27 +283,22 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
}
return true;
}
// Only calloc if we have to do some processing
// calloc v0 & v1 and check they initialised
int *v0 = (int*) calloc ((bLen + 3), sizeof (int));
v0 = (int*) calloc ((bLen + 3), sizeof (int));
if (!v0) {
eprintf("Error: cannot allocate %i bytes.", bLen + 3);
eprintf ("Error: cannot allocate %i bytes.", bLen + 3);
return false;
}
int *v1 = (int*) calloc ((bLen + 3), sizeof (int));
v1 = (int*) calloc ((bLen + 3), sizeof (int));
if (!v1) {
eprintf("Error: cannot allocate %i bytes", 2 * (bLen + 3));
free(v0);
eprintf ("Error: cannot allocate %i bytes", 2 * (bLen + 3));
free (v0);
return false;
}
// temp pointer will be used to switch v0 and v1 after processing the inner loop.
int *temp;
// initialise v0 and v1.
// With optimisiation we only strictly we only need to initialise v0[0..2]=0..2 & v1[0] = 1;
for (i = 0; i < bLen + 1 ; i++) {
@ -287,27 +306,6 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
v1[i] = i + 1;
}
// We need these variables outside the context of the loops as we need to
// survive multiple loop iterations.
// start and stop are used in our inner loop
// colMin tells us the current 'best' edit distance.
// extendStop & extendStart are used when we get 'double up' edge conditions
// that require us to keep some more data.
int start = 0;
int stop = bLen;
int smallest;
int colMin = 0;
int extendStop = 0;
int extendStart = 0;
//we could move cost into the 'i' loop.
int cost = 0;
// loops can get very big, this can be removed, but it's currently in there for debugging
// and optimisation testing.
ut64 loops = 0;
// Outer loop = the length of the longest input file.
for (i = 0; i < aLen; i++) {
@ -315,7 +313,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
// bLen (so we don't run off the end of our array)
// or 'two below the diagonal' PLUS any extension we need for 'double up' edge values
// (see extendStop for logic)
stop = R_MIN((i + extendStop + 2), bLen);
stop = R_MIN ((i + extendStop + 2), bLen);
// We need a value in the result column (v1[start]).
// If you look at the loop below, we need it because we look at v1[j] as one of the
@ -342,7 +340,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
v1[j + 2] = smallest + 1;
// If we have seen a smaller number, it's the new column Minimum
colMin=R_MIN((colMin),(smallest));
colMin = R_MIN ((colMin), (smallest));
}
@ -370,7 +368,6 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
start --;
extendStart ++;
}
//Switch v0 and v1 pointers via temp pointer
temp = v0;
v0 = v1;
@ -381,12 +378,10 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
eprintf ("\rProcessing %d of %d\r", i, aLen);
}
}
//Clean up output on loop exit (purely aesthetic)
if (verbose) {
eprintf ("\rProcessing %d of %d (loops=%llu)\n", i, aLen,loops);
}
if (distance) {
// the final distance is the last byte we processed in the inner loop.
// v0 is used instead of v1 because we switched the pointers before exiting the outer loop
@ -396,9 +391,7 @@ R_API bool r_diff_buffers_distance(RDiff *d, const ut8 *a, ut32 la, const ut8 *b
*similarity = (double)1 - diff;
}
}
free (v0);
free (v1);
return true;
}