Moved cbin.c's string filtering code into bin/filter.c

This commit is contained in:
Khairul Kasmiran 2018-02-15 20:42:05 +08:00
parent 8fef260ff2
commit 6484c12547
5 changed files with 223 additions and 223 deletions

View File

@ -130,3 +130,221 @@ R_API void r_bin_filter_classes(RList *list) {
}
sdb_free (db);
}
static bool false_positive(const char *str) {
int i;
ut8 bo[0x100];
int up = 0;
int lo = 0;
int ot = 0;
int di = 0;
int ln = 0;
int sp = 0;
int nm = 0;
for (i = 0; i < 0x100; i++) {
bo[i] = 0;
}
for (i = 0; str[i]; i++) {
if (IS_DIGIT(str[i])) {
nm++;
} else if (str[i]>='a' && str[i]<='z') {
lo++;
} else if (str[i]>='A' && str[i]<='Z') {
up++;
} else {
ot++;
}
if (str[i]=='\\') {
ot++;
}
if (str[i]==' ') {
sp++;
}
bo[(ut8)str[i]] = 1;
ln++;
}
for (i = 0; i<0x100; i++) {
if (bo[i]) {
di++;
}
}
if (ln > 2 && str[0] != '_') {
if (ln < 10) {
return true;
}
if (ot >= (nm + up + lo)) {
return true;
}
if (lo < 3) {
return true;
}
}
return false;
}
R_API bool r_bin_strpurge(RBin *bin, const char *str, ut64 refaddr) {
bool purge = false;
if (bin->strpurge) {
char *addrs = strdup (bin->strpurge);
if (addrs) {
int splits = r_str_split (addrs, ',');
int i;
char *ptr;
char *range_sep;
ut64 addr, from, to;
for (i = 0, ptr = addrs; i < splits; i++, ptr += strlen (ptr) + 1) {
bool bang = false;
if (!strcmp (ptr, "true") && false_positive (str)) {
purge = true;
continue;
}
if (*ptr == '!') {
bang = true;
ptr++;
}
if (!strcmp (ptr, "all")) {
purge = !bang;
continue;
}
range_sep = strchr (ptr, '-');
if (range_sep) {
*range_sep = 0;
from = r_num_get (NULL, ptr);
ptr = range_sep + 1;
to = r_num_get (NULL, ptr);
if (refaddr >= from && refaddr <= to) {
purge = !bang;
continue;
}
}
addr = r_num_get (NULL, ptr);
if (addr != 0 || *ptr == '0') {
if (refaddr == addr) {
purge = !bang;
continue;
}
}
}
free (addrs);
}
}
return purge;
}
static bool bin_strfilter(RBin *bin, const char *str) {
int i;
switch (bin->strfilter) {
case 'U': // only uppercase strings
for (i = 0; str[i]; i++) {
char ch = str[i];
if (ch == ' ') {
continue;
}
if (ch < '@'|| ch > 'Z') {
return false;
}
if (ch < 0 || !IS_PRINTABLE (ch)) {
return false;
}
}
if (str[0] && str[1]) {
for (i = 2; i<6 && str[i]; i++) {
if (str[i] == str[0]) {
return false;
}
if (str[i] == str[1]) {
return false;
}
}
}
if (str[0] == str[2]) {
return false; // rm false positives
}
break;
case 'a': // only alphanumeric - plain ascii
for (i = 0; str[i]; i++) {
char ch = str[i];
if (ch < 1 || !IS_PRINTABLE (ch)) {
return false;
}
}
break;
case 'e': // emails
if (str && *str) {
if (!strstr (str + 1, "@")) {
return false;
}
if (!strstr (str + 1, ".")) {
return false;
}
} else {
return false;
}
break;
case 'f': // format-string
if (str && *str) {
if (!strstr (str + 1, "%")) {
return false;
}
} else {
return false;
}
break;
case 'u': // URLs
if (!strstr (str, "://")) {
return false;
}
break;
case 'i': //IPV4
{
int segment = 0;
int segmentsum = 0;
bool prevd = false;
for (i = 0; str[i]; i++) {
char ch = str[i];
if (IS_DIGIT (ch)) {
segmentsum = segmentsum*10 + (ch - '0');
if (segment == 3) {
return true;
}
prevd = true;
} else if (ch == '.') {
if (prevd == true && segmentsum < 256){
segment++;
segmentsum = 0;
} else {
segmentsum = 0;
segment = 0;
}
prevd = false;
} else {
segmentsum = 0;
prevd = false;
segment = 0;
}
}
return false;
}
case 'p': // path
if (str[0] != '/') {
return false;
}
break;
case '8': // utf8
for (i = 0; str[i]; i++) {
char ch = str[i];
if (ch < 0) {
return true;
}
}
return false;
}
return true;
}
R_API bool r_bin_string_filter(RBin *bin, const char *str, ut64 addr) {
if (r_bin_strpurge (bin, str, addr) || !bin_strfilter (bin, str)) {
return false;
}
return true;
}

View File

@ -148,224 +148,6 @@ R_API RBinFile * r_core_bin_cur(RCore *core) {
return binfile;
}
static bool false_positive(const char *str) {
int i;
ut8 bo[0x100];
int up = 0;
int lo = 0;
int ot = 0;
int di = 0;
int ln = 0;
int sp = 0;
int nm = 0;
for (i = 0; i < 0x100; i++) {
bo[i] = 0;
}
for (i = 0; str[i]; i++) {
if (IS_DIGIT(str[i])) {
nm++;
} else if (str[i]>='a' && str[i]<='z') {
lo++;
} else if (str[i]>='A' && str[i]<='Z') {
up++;
} else {
ot++;
}
if (str[i]=='\\') {
ot++;
}
if (str[i]==' ') {
sp++;
}
bo[(ut8)str[i]] = 1;
ln++;
}
for (i = 0; i<0x100; i++) {
if (bo[i]) {
di++;
}
}
if (ln > 2 && str[0] != '_') {
if (ln < 10) {
return true;
}
if (ot >= (nm + up + lo)) {
return true;
}
if (lo < 3) {
return true;
}
}
return false;
}
R_API bool r_core_bin_strpurge(RCore *core, const char *str, ut64 refaddr) {
bool purge = false;
if (core->bin->strpurge) {
char *addrs = strdup (core->bin->strpurge);
if (addrs) {
int splits = r_str_split (addrs, ',');
int i;
char *ptr;
char *range_sep;
ut64 addr, from, to;
for (i = 0, ptr = addrs; i < splits; i++, ptr += strlen (ptr) + 1) {
bool bang = false;
if (!strcmp (ptr, "true") && false_positive (str)) {
purge = true;
continue;
}
if (*ptr == '!') {
bang = true;
ptr++;
}
if (!strcmp (ptr, "all")) {
purge = !bang;
continue;
}
range_sep = strchr (ptr, '-');
if (range_sep) {
*range_sep = 0;
from = r_num_get (NULL, ptr);
ptr = range_sep + 1;
to = r_num_get (NULL, ptr);
if (refaddr >= from && refaddr <= to) {
purge = !bang;
continue;
}
}
addr = r_num_get (NULL, ptr);
if (addr != 0 || *ptr == '0') {
if (refaddr == addr) {
purge = !bang;
continue;
}
}
}
free (addrs);
}
}
return purge;
}
static bool bin_strfilter(RCore *core, const char *str) {
int i;
switch (core->bin->strfilter) {
case 'U': // only uppercase strings
for (i = 0; str[i]; i++) {
char ch = str[i];
if (ch == ' ') {
continue;
}
if (ch < '@'|| ch > 'Z') {
return false;
}
if (ch < 0 || !IS_PRINTABLE (ch)) {
return false;
}
}
if (str[0] && str[1]) {
for (i = 2; i<6 && str[i]; i++) {
if (str[i] == str[0]) {
return false;
}
if (str[i] == str[1]) {
return false;
}
}
}
if (str[0] == str[2]) {
return false; // rm false positives
}
break;
case 'a': // only alphanumeric - plain ascii
for (i = 0; str[i]; i++) {
char ch = str[i];
if (ch < 1 || !IS_PRINTABLE (ch)) {
return false;
}
}
break;
case 'e': // emails
if (str && *str) {
if (!strstr (str + 1, "@")) {
return false;
}
if (!strstr (str + 1, ".")) {
return false;
}
} else {
return false;
}
break;
case 'f': // format-string
if (str && *str) {
if (!strstr (str + 1, "%")) {
return false;
}
} else {
return false;
}
break;
case 'u': // URLs
if (!strstr (str, "://")) {
return false;
}
break;
case 'i': //IPV4
{
int segment = 0;
int segmentsum = 0;
bool prevd = false;
for (i = 0; str[i]; i++) {
char ch = str[i];
if (IS_DIGIT (ch)) {
segmentsum = segmentsum*10 + (ch - '0');
if (segment == 3) {
return true;
}
prevd = true;
} else if (ch == '.') {
if (prevd == true && segmentsum < 256){
segment++;
segmentsum = 0;
} else {
segmentsum = 0;
segment = 0;
}
prevd = false;
} else {
segmentsum = 0;
prevd = false;
segment = 0;
}
}
return false;
}
case 'p': // path
if (str[0] != '/') {
return false;
}
break;
case '8': // utf8
for (i = 0; str[i]; i++) {
char ch = str[i];
if (ch < 0) {
return true;
}
}
return false;
}
return true;
}
R_API bool r_core_bin_string_filter(RCore *core, const char *str, ut64 addr) {
if (r_core_bin_strpurge (core, str, addr) || !bin_strfilter (core, str)) {
return false;
}
return true;
}
static void _print_strings(RCore *r, RList *list, int mode, int va) {
bool b64str = r_config_get_i (r->config, "bin.b64str");
int minstr = r_config_get_i (r->config, "bin.minstr");
@ -396,7 +178,7 @@ static void _print_strings(RCore *r, RList *list, int mode, int va) {
paddr = string->paddr;
vaddr = r_bin_get_vaddr (bin, paddr, string->vaddr);
addr = va ? vaddr : paddr;
if (!r_core_bin_string_filter (r, string->string, addr)) {
if (!r_bin_string_filter (bin, string->string, addr)) {
continue;
}
if (string->length < minstr) {

View File

@ -3144,7 +3144,7 @@ static char *ds_esc_str(RDisasmState *ds, const char *str, int len, const char *
static void ds_print_str(RDisasmState *ds, const char *str, int len, ut64 refaddr) {
const char *prefix;
if (!r_core_bin_string_filter (ds->core, str, refaddr)) {
if (!r_bin_string_filter (ds->core->bin, str, refaddr)) {
return;
}
char *escstr = ds_esc_str (ds, str, len, &prefix);
@ -3520,7 +3520,7 @@ static int myregwrite(RAnalEsil *esil, const char *name, ut64 *val) {
if (*val) {
(void)r_io_read_at (esil->anal->iob.io, *val, (ut8*)str, sizeof (str)-1);
str[sizeof (str)-1] = 0;
if (ds && *str && !r_core_bin_strpurge (ds->core, str, *val) && r_str_is_printable (str)) {
if (ds && *str && !r_bin_strpurge (ds->core->bin, str, *val) && r_str_is_printable (str)) {
bool jump_op = false;
switch (ds->analop.type) {
case R_ANAL_OP_TYPE_JMP:

View File

@ -685,6 +685,8 @@ R_API void r_bin_filter_name(Sdb *db, ut64 addr, char *name, int maxlen);
R_API void r_bin_filter_symbols (RList *list);
R_API void r_bin_filter_sections (RList *list);
R_API void r_bin_filter_classes (RList *list);
R_API bool r_bin_strpurge(RBin *bin, const char *str, ut64 addr);
R_API bool r_bin_string_filter(RBin *bin, const char *str, ut64 addr);
/* plugin pointers */
extern RBinPlugin r_bin_plugin_any;

View File

@ -447,8 +447,6 @@ R_API bool r_core_bin_delete (RCore *core, ut32 binfile_idx, ut32 binobj_idx);
// XXX - this is kinda hacky, maybe there should be a way to
// refresh the bin environment without specific calls?
R_API int r_core_bin_refresh_strings(RCore *core);
R_API bool r_core_bin_strpurge(RCore *core, const char *str, ut64 addr);
R_API bool r_core_bin_string_filter(RCore *core, const char *str, ut64 addr);
R_API int r_core_pseudo_code (RCore *core, const char *input);
/* gdiff.c */