fixing androdd and adding tests

2024-11-27 06:50:41 +00:00 · 2018-04-25 11:48:13 +02:00 · 2018-04-25 11:48:13 +02:00 · 3ee22996ce
commit 3ee22996ce
parent 861a62139b
4 changed files with 109 additions and 32 deletions
--- a/androdd.py
+++ b/androdd.py
@ -9,6 +9,7 @@ import sys
 from optparse import OptionParser

 from androguard import session
+from androguard.misc import clean_file_name
 from androguard.core import androconf
 from androguard.core.bytecode import method2dot, method2format
 from androguard.core.bytecodes import dvm
@ -52,22 +53,13 @@ options = [option_0, option_1, option_2, option_3, option_4, option_5]

 def valid_class_name(class_name):
    if class_name[-1] == ";":
-        return class_name[1:-1]
-    return class_name
+        class_name = class_name[1:-1]
+    return os.path.join(*class_name.split("/"))


-def create_directory(class_name, output):
-    output_name = output
-    if output_name[-1] != "/":
-        output_name = output_name + "/"
-
-    pathdir = output_name + class_name
-    try:
-        if not os.path.exists(pathdir):
-            os.makedirs(pathdir)
-    except OSError:
-        # FIXME
-        pass
+def create_directory(pathdir):
+    if not os.path.exists(pathdir):
+        os.makedirs(pathdir)


 def export_apps_to_format(filename,
@ -91,10 +83,6 @@ def export_apps_to_format(filename,
    if methods_filter:
        methods_filter_expr = re.compile(methods_filter)

-    output_name = output
-    if output_name[-1] != "/":
-        output_name = output_name + "/"
-
    dump_classes = []
    for _, vm, vmx in s.get_objects_dex():
        print("Decompilation ...", end=' ')
@ -128,7 +116,7 @@ def export_apps_to_format(filename,
            filenamejar = decompiler.Dex2Jar(vm,
                                             androconf.CONF["BIN_DEX2JAR"],
                                             androconf.CONF["TMP_DIRECTORY"]).get_jar()
-            shutil.move(filenamejar, output + "classes.jar")
+            shutil.move(filenamejar, os.path.join(output, "classes.jar"))
            print("End")

        for method in vm.get_methods():
@ -138,17 +126,15 @@ def export_apps_to_format(filename,
                if not methods_filter_expr.search(msig):
                    continue

+            # Current Folder to write to
            filename_class = valid_class_name(method.get_class_name())
-            create_directory(filename_class, output)
+            filename_class = os.path.join(output, filename_class)
+            create_directory(filename_class)

            print("Dump %s %s %s ..." % (method.get_class_name(),
                                         method.get_name(),
                                         method.get_descriptor()), end=' ')

-            filename_class = output_name + filename_class
-            if filename_class[-1] != "/":
-                filename_class = filename_class + "/"
-
            descriptor = method.get_descriptor()
            descriptor = descriptor.replace(";", "")
            descriptor = descriptor.replace(" ", "")
@ -156,7 +142,7 @@ def export_apps_to_format(filename,
            descriptor = descriptor.replace(")", "-")
            descriptor = descriptor.replace("/", "_")

-            filename = filename_class + method.get_name() + descriptor
+            filename = clean_file_name(os.path.join(filename_class, method.get_name() + descriptor))
            if len(method.get_name() + descriptor) > 250:
                all_identical_name_methods = vm.get_methods_descriptor(
                    method.get_class_name(), method.get_name())
@ -166,25 +152,29 @@ def export_apps_to_format(filename,
                        break
                    pos += 1

-                filename = filename_class + method.get_name() + "_%d" % pos
+                filename = clean_file_name(os.path.join(filename_class, method.get_name() + "_%d" % pos))
+
+            print(filename)

            buff = method2dot(vmx.get_method(method))

+            # Write Graph of method
            if form:
                print("%s ..." % form, end=' ')
                method2format(filename + "." + form, form, None, buff)

+            # Write the Java file for the whole class
            if method.get_class_name() not in dump_classes:
                print("source codes ...", end=' ')
                current_class = vm.get_class(method.get_class_name())
-                current_filename_class = valid_class_name(
-                    current_class.get_name())
+                current_filename_class = valid_class_name(current_class.get_name())

-                current_filename_class = output_name + current_filename_class + ".java"
+                current_filename_class = os.path.join(output, current_filename_class + ".java")
                with open(current_filename_class, "w") as fd:
                    fd.write(current_class.get_source())
                dump_classes.append(method.get_class_name())

+            # Write SMALI like code
            print("bytecodes ...", end=' ')
            bytecode_buff = dvm.get_bytecodes_method(vm, vmx, method)
            with open(filename + ".ag", "w") as fd:
--- a/androguard/core/bytecode.py
+++ b/androguard/core/bytecode.py
@ -377,7 +377,9 @@ def method2format(output, _format="png", mx=None, raw=None):
        data = method2dot(mx)

    # subgraphs cluster
-    buff += "subgraph cluster_" + hashlib.md5(bytearray(output, "UTF-8")).hexdigest() + " {\nlabel=\"%s\"\n" % data['name']
+    buff += "subgraph cluster_{} ".format(hashlib.md5(bytearray(output, "UTF-8")).hexdigest())
+    buff += "{\n"
+    buff += "label=\"{}\"\n".format(data['name'])
    buff += data['nodes']
    buff += "}\n"

@ -385,9 +387,10 @@ def method2format(output, _format="png", mx=None, raw=None):
    buff += data['edges']
    buff += "}\n"

-    d = pydot.graph_from_dot_data(buff.encode("UTF-8"))
+    d = pydot.graph_from_dot_data(buff)
    if d:
-        getattr(d, "write_" + _format.lower())(output)
+        for g in d:
+            getattr(g, "write_" + _format.lower())(output)


 def method2png(output, mx, raw=False):
--- a/androguard/misc.py
+++ b/androguard/misc.py
@ -2,6 +2,8 @@ from androguard import session
 from androguard.decompiler import decompiler
 from androguard.core import androconf
 import hashlib
+import re
+import os

 import logging
 log = logging.getLogger("androguard.misc")
@ -148,3 +150,65 @@ def sign_apk(filename, keystore, storepass):
                stdout=PIPE,
                stderr=STDOUT)
    stdout, stderr = cmd.communicate()
+
+
+def clean_file_name(filename, unique=True, replace="_", force_nt=False):
+    """
+    Return a filename version, which has no characters in it which are forbidden.
+    On Windows these are for example <, /, ?, ...
+
+    The intention of this function is to allow distribution of files to different OSes.
+
+    :param filename: string to clean
+    :param unique: check if the filename is already taken and append an integer to be unique (default: True)
+    :param replace: replacement character. (default: '_')
+    :param force_nt: Force shortening of paths like on NT systems (default: False)
+    :return: clean string
+    """
+
+    if re.match(r'[<>:"/\\|?* .\x00-\x1f]', replace):
+        raise ValueError("replacement character is not allowed!")
+
+    path, fname = os.path.split(filename)
+    # For Windows see: https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
+    # Other operating systems seems to be more tolerant...
+
+    # Not allowed filenames, attach replace character if necessary
+    if re.match(r'(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])', fname):
+        fname += replace
+
+    # reserved characters
+    fname = re.sub(r'[<>:"/\\|?*\x00-\x1f]', replace, fname)
+    # Do not end with dot or space
+    fname = re.sub(r'[ .]$', replace, fname)
+
+    if force_nt or os.name == 'nt':
+        PATH_MAX_LENGTH = 230  # give extra space for other stuff...
+        # Check filename length limit, usually a problem on older Windows versions
+        if len(fname) > PATH_MAX_LENGTH:
+            if "." in fname:
+                f, ext = fname.rsplit(".", 1)
+                fname = "{}.{}".format(f[:PATH_MAX_LENGTH-(len(ext)+1)], ext)
+            else:
+                fname = fname[:PATH_MAX_LENGTH]
+
+        # Special behaviour... On Windows, there is also a problem with the maximum path length in explorer.exe
+        # maximum length is limited to 260 chars, so use 250 to have room for other stuff
+        if len(os.path.abspath(os.path.join(path, fname))) > 250:
+            fname = fname[:250 - (len(os.path.abspath(path)) + 1)]
+
+    if unique:
+        counter = 0
+        origname = fname
+        while os.path.isfile(os.path.join(path, fname)):
+            if "." in fname:
+                # assume extension
+                f, ext = origname.rsplit(".", 1)
+                fname = "{}_{}.{}".format(f, counter, ext)
+            else:
+                fname = "{}_{}".format(origname, counter)
+            counter += 1
+
+    return os.path.join(path, fname)
+
+
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@ -0,0 +1,20 @@
+# -*- coding: utf8- -*-
+import unittest
+
+from androguard.misc import clean_file_name
+import tempfile
+
+class MiscTest(unittest.TestCase):
+    def testCleanPath(self):
+        self.assertEqual("foobarfoo_", clean_file_name("foobarfoo ", unique=False))
+        self.assertEqual("foobarsdf_", clean_file_name("foobarsdf.", unique=False))
+        self.assertEqual("_init_", clean_file_name("<init>", unique=False))
+        self.assertEqual("C:\\" + "a" * 230, clean_file_name("C:\\" + "a" * 999, unique=False, force_nt=True))
+        self.assertEqual("C:\\" + "a" * 226 + ".foo", clean_file_name("C:\\" + "a" * 999 + ".foo", unique=False, force_nt=True))
+
+        with tempfile.TemporaryFile() as fp:
+            self.assertEqual(fp.name + "_0", clean_file_name(fp.name, unique=True))
+
+
+if __name__ == '__main__':
+    unittest.main()