From 6ae041727854f8685973af1bc8b5c1d680091388 Mon Sep 17 00:00:00 2001
From: pancake <pancake@nopcode.org>
Date: Fri, 24 Jul 2015 12:49:02 +0200
Subject: [PATCH] Updsate SDB to optimize dupped symbol checks in mach0 parser

---
 libr/bin/format/mach0/mach0.c | 23 +++++++++++-----------
 libr/include/sdb/sdb.h        |  2 ++
 shlr/sdb/src/cdb.c            |  1 -
 shlr/sdb/src/disk.c           |  4 +---
 shlr/sdb/src/fmt.c            |  5 +++--
 shlr/sdb/src/main.c           | 15 ++++++++++++++-
 shlr/sdb/src/sdb.c            | 36 ++++++++++++++++++++++++++++++++++-
 shlr/sdb/src/sdb.h            |  2 ++
 8 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/libr/bin/format/mach0/mach0.c b/libr/bin/format/mach0/mach0.c
index a2edb7a9c1..c57d420c88 100644
--- a/libr/bin/format/mach0/mach0.c
+++ b/libr/bin/format/mach0/mach0.c
@@ -876,22 +876,19 @@ static ut64 get_text_base(struct MACH0_(obj_t)* bin) {
 }
 #endif
 
-static int inSymtab (struct symbol_t *symbols, int last, const char *name, ut64 addr) {
-	int i;
-	for (i=0; i<last; i++) {
-		if (symbols[i].addr != addr)
-			continue;
-		if (!strcmp (symbols[i].name, name)) {
-			return 1;
-		}
-	}
-	return 0;
+static int inSymtab (Sdb *db, struct symbol_t *symbols, int last, const char *name, ut64 addr) {
+	const char *key = sdb_fmt (0, "%s.%"PFMT64x, name, addr);
+	if (sdb_const_get (db, key, NULL))
+		return R_TRUE;
+	sdb_set (db, key, "1", 0);
+	return R_FALSE;
 }
 
 struct symbol_t* MACH0_(get_symbols)(struct MACH0_(obj_t)* bin) {
 	const char *symstr;
 	struct symbol_t *symbols;
 	int from, to, i, j, s, stridx, symbols_size, symbols_count;
+	Sdb *db;
 	//ut64 text_base = get_text_base (bin);
 
 	if (!bin || !bin->symtab || !bin->symstr)
@@ -910,6 +907,7 @@ struct symbol_t* MACH0_(get_symbols)(struct MACH0_(obj_t)* bin) {
 
 	if (!(symbols = calloc (1, symbols_size)))
 		return NULL;
+	db = sdb_new0 ();
 
 	j = 0; // symbol_idx
 	for (s = 0; s < 2; s++) {
@@ -982,7 +980,7 @@ struct symbol_t* MACH0_(get_symbols)(struct MACH0_(obj_t)* bin) {
 				}
 				symbols[j].last = 0;
 			}
-			if (inSymtab (symbols, j, symbols[j].name, symbols[j].addr)) {
+			if (inSymtab (db, symbols, j, symbols[j].name, symbols[j].addr)) {
 				symbols[j].name[0] = 0;
 				j--;
 			}
@@ -1019,7 +1017,7 @@ struct symbol_t* MACH0_(get_symbols)(struct MACH0_(obj_t)* bin) {
 			strncpy (symbols[j].name, symstr, R_BIN_MACH0_STRING_LENGTH);
 			symbols[j].name[R_BIN_MACH0_STRING_LENGTH-1] = 0;
 			symbols[j].last = 0;
-			if (inSymtab (symbols, j, symbols[j].name, symbols[j].addr)) {
+			if (inSymtab (db, symbols, j, symbols[j].name, symbols[j].addr)) {
 				symbols[j].name[0] = 0;
 			} else {
 				j++;
@@ -1027,6 +1025,7 @@ struct symbol_t* MACH0_(get_symbols)(struct MACH0_(obj_t)* bin) {
 		}
 	}
 #endif
+	sdb_free (db);
 	symbols[j].last = 1;
 	return symbols;
 }
diff --git a/libr/include/sdb/sdb.h b/libr/include/sdb/sdb.h
index d7f035bfc2..8c15c7d764 100644
--- a/libr/include/sdb/sdb.h
+++ b/libr/include/sdb/sdb.h
@@ -106,6 +106,8 @@ void sdb_file (Sdb* s, const char *dir);
 void sdb_reset (Sdb* s);
 void sdb_setup (Sdb* s, int options);
 void sdb_drain (Sdb*, Sdb*);
+int sdb_stats(Sdb *s, ut32 *disk, ut32 *mem);
+int sdb_dump_hasnext (Sdb* s);
 
 typedef int (*SdbForeachCallback)(void *user, const char *k, const char *v);
 int sdb_foreach (Sdb* s, SdbForeachCallback cb, void *user);
diff --git a/shlr/sdb/src/cdb.c b/shlr/sdb/src/cdb.c
index e38868fd1a..81e6932db4 100644
--- a/shlr/sdb/src/cdb.c
+++ b/shlr/sdb/src/cdb.c
@@ -130,7 +130,6 @@ int cdb_findnext(struct cdb *c, ut32 u, const char *key, ut32 len) {
 		ut32_unpack (buf, &u);
 		if (u == c->khash) {
 			if (!seek_set (c->fd, pos)) {
-
 				return -1;
 			}
 			if (!cdb_getkvlen (c->fd, &u, &c->dlen)) {
diff --git a/shlr/sdb/src/disk.c b/shlr/sdb/src/disk.c
index 907ec3c917..6d2302098c 100644
--- a/shlr/sdb/src/disk.c
+++ b/shlr/sdb/src/disk.c
@@ -28,9 +28,7 @@ static inline int r_sys_rmkdir(char *dir) {
         if (*ptr==slash) ptr++;
 #if __WINDOWS__
         char *p = strstr (ptr, ":\\");
-        if (p) {
-                ptr = p + 2;
-        }
+        if (p) ptr = p + 2;
 #endif
         while ((ptr = strchr (ptr, slash))) {
                 *ptr = 0;
diff --git a/shlr/sdb/src/fmt.c b/shlr/sdb/src/fmt.c
index 0976f5a473..1e002688db 100644
--- a/shlr/sdb/src/fmt.c
+++ b/shlr/sdb/src/fmt.c
@@ -35,7 +35,7 @@ SDB_API char *sdb_fmt(int n, const char *fmt, ...) {
 		return Key[n];
 	va_start (ap, fmt);
 	*Key[n] = 0;
-	vsnprintf (Key[n], 255, fmt, ap);
+	vsnprintf (Key[n], sizeof (Key[n]), fmt, ap);
 	Key[n][255] = 0;
 	va_end (ap);
 	return Key[n];
@@ -191,7 +191,8 @@ SDB_API char** sdb_fmt_array(const char *list) {
 		}
 		do {
 			const char *str = sdb_anext2 (ptr, &next);
-			int slen = next?(next-str)-1:strlen (str)+1;
+			int slen = next? (next-str) - 1:
+				(int)strlen (str) + 1;
 			memcpy (_s, str, slen);
 			_s[slen]=0;
 			*retp++ = _s;
diff --git a/shlr/sdb/src/main.c b/shlr/sdb/src/main.c
index 737edf892a..38970e8be5 100644
--- a/shlr/sdb/src/main.c
+++ b/shlr/sdb/src/main.c
@@ -302,10 +302,11 @@ static int createdb(const char *f, const char **args, int nargs) {
 }
 
 static int showusage(int o) {
-	printf ("usage: sdb [-0dehjJv|-D A B] [-|db] "
+	printf ("usage: sdb [-0cdehjJv|-D A B] [-|db] "
 		"[.file]|[-=]|[-+][(idx)key[:json|=value] ..]\n");
 	if (o==2) {
 		printf ("  -0      terminate results with \\x00\n"
+			"  -c      count the number of keys database\n"
 			"  -d      decode base64 from stdin\n"
 			"  -D      diff two databases\n"
 			"  -e      encode stdin as base64\n"
@@ -409,6 +410,17 @@ static int dbdiff (const char *a, const char *b) {
 	return n;
 }
 
+int showcount (const char *db) {
+	ut32 d;
+	s = sdb_new (NULL, db, 0);
+	if (sdb_stats (s, &d, NULL)) {
+		printf ("%d\n", d);
+	}
+	// TODO: show version, timestamp information
+	sdb_free (s);
+	return 0;
+}
+
 int main(int argc, const char **argv) {
 	char *line;
 	const char *arg, *grep = NULL;
@@ -451,6 +463,7 @@ int main(int argc, const char **argv) {
 				return showusage(1);
 			}
 			break;
+		case 'c': return (argc<3)? showusage (1) : showcount (argv[2]);
 		case 'v': return showversion ();
 		case 'h': return showusage (2);
 		case 'e': return base64encode ();
diff --git a/shlr/sdb/src/sdb.c b/shlr/sdb/src/sdb.c
index 332252d547..f2d984aa44 100644
--- a/shlr/sdb/src/sdb.c
+++ b/shlr/sdb/src/sdb.c
@@ -616,6 +616,39 @@ SDB_API SdbKv *sdb_dump_next (Sdb* s) {
 	return &s->tmpkv;
 }
 
+SDB_API int sdb_dump_hasnext (Sdb* s) {
+	ut32 k, v;
+	if (s->fd==-1)
+		return 0;
+	if (!cdb_getkvlen (s->fd, &k, &v))
+		return 0;
+	if (k<1 || v<1)
+		return 0;
+	if (lseek (s->fd, k+v, SEEK_CUR) == -1) {
+		return 0;
+	}
+	s->pos += k + v + 4;
+	return 1;
+}
+
+SDB_API int sdb_stats(Sdb *s, ut32 *disk, ut32 *mem) {
+	if (!s) return 0;
+	if (disk) {
+		ut32 count = 0;
+		if (s->fd != -1) {
+			sdb_dump_begin (s);
+			while (sdb_dump_hasnext (s)) {
+				count ++;
+			}
+		}
+		*disk = count;
+	}
+	if (mem) {
+		*mem = s->ht->list->length;
+	}
+	return 1;
+}
+
 // TODO: make it static? internal api?
 SDB_API int sdb_dump_dupnext (Sdb* s, char **key, char **value, int *_vlen) {
 	ut32 vlen = 0, klen = 0;
@@ -627,8 +660,9 @@ SDB_API int sdb_dump_dupnext (Sdb* s, char **key, char **value, int *_vlen) {
 		return 0;
 	if (!cdb_getkvlen (s->fd, &klen, &vlen))
 		return 0;
-	if (klen<1 || vlen<1)
+	if (klen<1 || vlen<1) {
 		return 0;
+	}
 	if (_vlen)
 		*_vlen = vlen;
 	if (key) {
diff --git a/shlr/sdb/src/sdb.h b/shlr/sdb/src/sdb.h
index d7f035bfc2..8c15c7d764 100644
--- a/shlr/sdb/src/sdb.h
+++ b/shlr/sdb/src/sdb.h
@@ -106,6 +106,8 @@ void sdb_file (Sdb* s, const char *dir);
 void sdb_reset (Sdb* s);
 void sdb_setup (Sdb* s, int options);
 void sdb_drain (Sdb*, Sdb*);
+int sdb_stats(Sdb *s, ut32 *disk, ut32 *mem);
+int sdb_dump_hasnext (Sdb* s);
 
 typedef int (*SdbForeachCallback)(void *user, const char *k, const char *v);
 int sdb_foreach (Sdb* s, SdbForeachCallback cb, void *user);