From b420793c78d5161824d6f865cd9a55e2fedff350 Mon Sep 17 00:00:00 2001 From: ehrenb Date: Sat, 3 Feb 2024 20:21:51 -0500 Subject: [PATCH 1/6] testing out wrapping fields --- androguard/core/analysis/analysis.py | 31 ++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/androguard/core/analysis/analysis.py b/androguard/core/analysis/analysis.py index cb262bd6..42d43958 100644 --- a/androguard/core/analysis/analysis.py +++ b/androguard/core/analysis/analysis.py @@ -1118,6 +1118,18 @@ class ClassAnalysis: def get_field_analysis(self, field): return self._fields.get(field) + def add_field(self, field_analysis): + """ + Add the given field to this analyis. + usually only called during Analysis.add and Analysis._resolve_method + + :param FieldAnalysis field_analysis: + """ + self._fields[field_analysis.get_field()] = field_analysis + # if self.external: + # # Propagate ExternalField to ExternalClass + # self.orig_class.add_method(field_analysis.get_field()) + def add_field_xref_read(self, method, classobj, field, off): """ Add a Field Read to this class @@ -1391,7 +1403,7 @@ class Analysis: It encapsulates all the Dalvik related functions into a single place, while you have still the ability to use the functions from :class:`~androguard.core.bytecodes.dvm.DEX` and the related classes. - :param Optional[androguard.core.bytecodes.dvm.DEX] vm: inital DEX object (default None) + :param Optional[androguard.core.dex.DEX] vm: inital DEX object (default None) """ def __init__(self, vm=None): # Contains DEX objects @@ -1402,6 +1414,8 @@ class Analysis: self.strings = dict() # A dict of {EncodedMethod: MethodAnalysis}, populated on add(vm) self.methods = dict() + # A dict of {EncodedField: FieldAnalysis}, populated on add(vm) + self.fields = dict() # Used to quickly look up methods self.__method_hashes = dict() @@ -1415,7 +1429,7 @@ class Analysis: """ Add a DEX to this Analysis. - :param androguard.core.bytecodes.dvm.DEX vm: :class:`dvm.DEX` to add to this Analysis + :param androguard.core.dex.DEX vm: :class:`androguard.core.dex.DEX` to add to this Analysis """ self.vms.append(vm) @@ -1442,6 +1456,10 @@ class Analysis: m_hash = (current_class.get_name(), method.get_name(), str(method.get_descriptor())) self.__method_hashes[m_hash] = self.methods[method] + for field in current_class.get_fields(): + self.fields[field] = FieldAnalysis(field) + new_class.add_field(self.fields[field]) + logger.info("Added DEX in the analysis took : {:0d}min {:02d}s".format(*divmod(int(time.time() - tic), 60))) def create_xref(self): @@ -1493,7 +1511,7 @@ class Analysis: Note that this might be quite slow, as all instructions are parsed. - :param androguard.core.bytecodes.dvm.ClassDefItem current_class: The class to create xrefs for + :param androguard.core.dex.ClassDefItem current_class: The class to create xrefs for """ cur_cls_name = current_class.get_name() @@ -1772,9 +1790,10 @@ class Analysis: :rtype: Iterator[FieldAnalysis] """ - for c in self.classes.values(): - for f in c.get_fields(): - yield f + # for c in self.classes.values(): + # for f in c.get_fields(): + # yield f + yield from self.fields.values() def find_classes(self, name=".*", no_external=False): """ From b8fe1d5120db9c8f592d629e8dbf72df898a0f54 Mon Sep 17 00:00:00 2001 From: ehrenb Date: Sun, 4 Feb 2024 19:24:29 -0500 Subject: [PATCH 2/6] revert to using Analysis classes' fields, since theyll contain xrefs. If we make a new Analysis field attribute, none of the downstream XREF analysis will get propagated to it --- androguard/core/analysis/analysis.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/androguard/core/analysis/analysis.py b/androguard/core/analysis/analysis.py index 42d43958..24e14bb2 100644 --- a/androguard/core/analysis/analysis.py +++ b/androguard/core/analysis/analysis.py @@ -1014,6 +1014,8 @@ class ClassAnalysis: # Propagate ExternalMethod to ExternalClass self.orig_class.add_method(method_analysis.get_method()) + + @property def implements(self): """ @@ -1414,8 +1416,6 @@ class Analysis: self.strings = dict() # A dict of {EncodedMethod: MethodAnalysis}, populated on add(vm) self.methods = dict() - # A dict of {EncodedField: FieldAnalysis}, populated on add(vm) - self.fields = dict() # Used to quickly look up methods self.__method_hashes = dict() @@ -1425,6 +1425,11 @@ class Analysis: self.__created_xrefs = False + @property + def fields(self): + """Returns FieldAnalysis list""" + return self.get_fields() + def add(self, vm): """ Add a DEX to this Analysis. @@ -1447,6 +1452,7 @@ class Analysis: new_class.set_restriction_flag(rf) new_class.set_domain_flag(df) + # seed MethodAnalysis objects into new class analysis for method in current_class.get_methods(): self.methods[method] = MethodAnalysis(vm, method) @@ -1456,9 +1462,10 @@ class Analysis: m_hash = (current_class.get_name(), method.get_name(), str(method.get_descriptor())) self.__method_hashes[m_hash] = self.methods[method] + # seed FieldAnalysis objects into new class analysis for field in current_class.get_fields(): - self.fields[field] = FieldAnalysis(field) - new_class.add_field(self.fields[field]) + new_field_analysis = FieldAnalysis(field) + new_class.add_field(new_field_analysis) logger.info("Added DEX in the analysis took : {:0d}min {:02d}s".format(*divmod(int(time.time() - tic), 60))) @@ -1790,10 +1797,9 @@ class Analysis: :rtype: Iterator[FieldAnalysis] """ - # for c in self.classes.values(): - # for f in c.get_fields(): - # yield f - yield from self.fields.values() + for c in self.classes.values(): + for f in c.get_fields(): + yield f def find_classes(self, name=".*", no_external=False): """ From e8945c923a6b8c76fe1b42e548fd55e1d7776038 Mon Sep 17 00:00:00 2001 From: ehrenb Date: Sun, 25 Feb 2024 18:08:11 -0500 Subject: [PATCH 3/6] create 'gets()' function for MethodHIdItem class, make explicit functions for getting FieldIdItems and MethodIdItems vs. EncodedFields and EncodedMethods, normalize variable names to be consistent where designs overlap, and create additional 'get_len_xxx()' functions for classes --- androguard/core/analysis/analysis.py | 2 +- androguard/core/dex/__init__.py | 240 ++++++++++++++++++--------- 2 files changed, 163 insertions(+), 79 deletions(-) diff --git a/androguard/core/analysis/analysis.py b/androguard/core/analysis/analysis.py index 24e14bb2..d5742f6c 100644 --- a/androguard/core/analysis/analysis.py +++ b/androguard/core/analysis/analysis.py @@ -1610,7 +1610,7 @@ class Analysis: elif 0x52 <= op_value <= 0x6d: idx_field = instruction.get_ref_kind() field_info = instruction.cm.vm.get_cm_field(idx_field) - field_item = instruction.cm.vm.get_field_descriptor(field_info[0], field_info[2], field_info[1]) + field_item = instruction.cm.vm.get_encoded_field_descriptor(field_info[0], field_info[2], field_info[1]) if not field_item: continue diff --git a/androguard/core/dex/__init__.py b/androguard/core/dex/__init__.py index 9da0c770..1e2edbb6 100644 --- a/androguard/core/dex/__init__.py +++ b/androguard/core/dex/__init__.py @@ -2479,7 +2479,7 @@ class FieldHIdItem: def __init__(self, size, buff, cm): self.offset = buff.tell() - self.elem = [FieldIdItem(buff, cm) for i in range(0, size)] + self.field_id_items = [FieldIdItem(buff, cm) for i in range(0, size)] def set_off(self, off): self.offset = off @@ -2488,30 +2488,30 @@ class FieldHIdItem: return self.offset def gets(self): - return self.elem + return self.field_id_items def get(self, idx): try: - return self.elem[idx] + return self.field_id_items[idx] except IndexError: return FieldIdItemInvalid() def show(self): nb = 0 - for i in self.elem: + for i in self.field_id_items: print(nb, end=' ') i.show() nb = nb + 1 def get_obj(self): - return [i for i in self.elem] + return [i for i in self.field_id_items] def get_raw(self): - return b''.join(i.get_raw() for i in self.elem) + return b''.join(i.get_raw() for i in self.field_id_items) def get_length(self): length = 0 - for i in self.elem: + for i in self.field_id_items: length += i.get_length() return length @@ -2658,7 +2658,7 @@ class MethodHIdItem: self.offset = buff.tell() - self.methods = [MethodIdItem(buff, cm) for i in range(0, size)] + self.method_id_items = [MethodIdItem(buff, cm) for i in range(0, size)] def set_off(self, off): self.offset = off @@ -2666,33 +2666,36 @@ class MethodHIdItem: def get_off(self): return self.offset + def gets(self): + return self.method_id_items + def get(self, idx): try: - return self.methods[idx] + return self.method_id_items[idx] except IndexError: return MethodIdItemInvalid() def reload(self): - for i in self.methods: + for i in self.method_id_items: i.reload() def show(self): print("METHOD_ID_ITEM") nb = 0 - for i in self.methods: + for i in self.method_id_items: print(nb, end=' ') i.show() nb = nb + 1 def get_obj(self): - return [i for i in self.methods] + return [i for i in self.method_id_items] def get_raw(self): - return b''.join(i.get_raw() for i in self.methods) + return b''.join(i.get_raw() for i in self.method_id_items) def get_length(self): length = 0 - for i in self.methods: + for i in self.method_id_items: length += i.get_length() return length @@ -3678,7 +3681,7 @@ class ClassDefItem: def get_methods(self): """ - Return all methods of this class + Return all EncodedMethods of this class :rtype: a list of :class:`EncodedMethod` objects """ @@ -3688,7 +3691,7 @@ class ClassDefItem: def get_fields(self): """ - Return all fields of this class + Return all EncodedFields of this class :rtype: a list of :class:`EncodedField` objects """ @@ -7960,6 +7963,14 @@ class DEX: # There is a rare case that the DEX has no classes return [] + def get_len_classes(self): + """ + Return the number of classes + + :rtype: int + """ + return len(self.get_classes()) + def get_class(self, name): """ Return a specific class @@ -7973,41 +7984,23 @@ class DEX: return i return None - def get_method(self, name): - """ - Return a list all methods which corresponds to the regexp - - :param name: the name of the method (a python regexp) - - :rtype: a list with all :class:`EncodedMethod` objects - """ - # TODO could use a generator here - prog = re.compile(name) - l = [] - for i in self.get_classes(): - for j in i.get_methods(): - if prog.match(j.get_name()): - l.append(j) - return l - def get_field(self, name): - """ - Return a list all fields which corresponds to the regexp + """get field id item by name :param name: the name of the field (a python regexp) - - :rtype: a list with all :class:`EncodedField` objects + :type name: str + :return: the list of matching :class:`FieldIdItem` objects + :rtype: list """ - # TODO could use a generator here + prog = re.compile(name) l = [] - for i in self.get_classes(): - for j in i.get_fields(): - if prog.match(j.get_name()): - l.append(j) + for i in self.get_fields(): + if prog.match(i.name): + l.append(i) return l - def get_all_fields(self): + def get_fields(self): """ Return a list of field items @@ -8018,7 +8011,31 @@ class DEX: except AttributeError: return [] - def get_fields(self): + def get_len_fields(self): + """ + Return the number of fields + + :rtype: int + """ + return len(self.get_fields()) + + def get_encoded_field(self, name): + """ + Return a list all fields which corresponds to the regexp + + :param name: the name of the field (a python regexp) + + :rtype: a list with all :class:`EncodedField` objects + """ + # TODO could use a generator here + prog = re.compile(name) + l = [] + for i in self.get_encoded_fields(): + if prog.match(i.get_name()): + l.append(i) + return l + + def get_encoded_fields(self): """ Return all field objects @@ -8031,9 +8048,76 @@ class DEX: self.__cache_all_fields.append(j) return self.__cache_all_fields + def get_len_encoded_fields(self): + return len(self.get_encoded_fields()) + + def get_field(self, name): + """get field id item by name + + :param name: the name of the field (a python regexp) + :type name: str + :return: the list of matching :class:`FieldIdItem` objects + :rtype: list + """ + prog = re.compile(name) + l = [] + for i in self.get_fields(): + if prog.match(i.name): + l.append(i) + return l + + def get_method(self, name): + """get method id item by name + + :param name: the name of the field (a python regexp) + :type name: str + :return: the list of matching :class:`MethodIdItem` objects + :rtype: list + """ + prog = re.compile(name) + l = [] + for i in self.get_methods(): + if prog.match(i.name): + l.append(i) + return l + def get_methods(self): """ - Return all method objects + Return a list of method items + + :rtype: a list of :class:`MethodIdItem` objects + """ + try: + return self.methods.gets() + except AttributeError: + return [] + + def get_len_methods(self): + """ + Return the number of methods + + :rtype: int + """ + return len(self.get_methods()) + + def get_encoded_method(self, name): + """ + Return a list all encoded methods whose name corresponds to the regexp + + :param name: the name of the method (a python regexp) + + :rtype: a list with all :class:`EncodedMethod` objects + """ + prog = re.compile(name) + l = [] + for i in self.get_encoded_methods(): + if prog.match(i.name): + l.append(i) + return l + + def get_encoded_methods(self): + """ + Return all encoded method objects :rtype: a list of :class:`EncodedMethod` objects """ @@ -8044,17 +8128,17 @@ class DEX: self.__cache_all_methods.append(j) return self.__cache_all_methods - def get_len_methods(self): + def get_len_encoded_methods(self): """ - Return the number of methods + Return the number of encoded methods :rtype: int """ - return len(self.get_methods()) + return len(self.get_encoded_methods()) - def get_method_by_idx(self, idx): + def get_encoded_method_by_idx(self, idx): """ - Return a specific method by using an index + Return a specific encoded method by using an index :param idx: the index of the method :type idx: int @@ -8071,9 +8155,9 @@ class DEX: except KeyError: return None - def get_method_descriptor(self, class_name, method_name, descriptor): + def get_encoded_method_descriptor(self, class_name, method_name, descriptor): """ - Return the specific method + Return the specific encoded method given a class name, method name, and descriptor :param class_name: the class name of the method :type class_name: string @@ -8095,9 +8179,9 @@ class DEX: return self.__cache_methods.get(key) - def get_methods_descriptor(self, class_name, method_name): + def get_encoded_methods_class_method(self, class_name, method_name): """ - Return the specific methods of the class + Return the specific encoded methods of the class :param class_name: the class name of the method :type class_name: string @@ -8106,18 +8190,14 @@ class DEX: :rtype: None or a :class:`EncodedMethod` object """ - l = [] - for i in self.get_classes(): - if i.get_name() == class_name: - for j in i.get_methods(): - if j.get_name() == method_name: - l.append(j) + for i in self.get_encoded_methods(): + if i.get_name() == method_name and i.get_class_name() == class_name: + return i + return None - return l - - def get_methods_class(self, class_name): + def get_encoded_methods_class(self, class_name): """ - Return all methods of a specific class + Return all encoded methods of a specific class by class name :param class_name: the class name :type class_name: string @@ -8125,16 +8205,14 @@ class DEX: :rtype: a list with :class:`EncodedMethod` objects """ l = [] - for i in self.get_classes(): - for j in i.get_methods(): - if class_name == j.get_class_name(): - l.append(j) - + for i in self.get_encoded_methods(): + if class_name == i.get_class_name(): + l.append(i) return l - def get_fields_class(self, class_name): + def get_encoded_fields_class(self, class_name): """ - Return all fields of a specific class + Return all encoded fields of a specific class by class name :param class_name: the class name :type class_name: string @@ -8142,16 +8220,14 @@ class DEX: :rtype: a list with :class:`EncodedField` objects """ l = [] - for i in self.get_classes(): - for j in i.get_fields(): - if class_name == j.get_class_name(): - l.append(j) - + for i in self.get_encoded_fields(): + if class_name == i.get_class_name(): + l.append(i) return l - def get_field_descriptor(self, class_name, field_name, descriptor): + def get_encoded_field_descriptor(self, class_name, field_name, descriptor): """ - Return the specific field + Return the specific encoded field given a class name, field name, and descriptor :param class_name: the class name of the field :type class_name: string @@ -8185,6 +8261,14 @@ class DEX: """ return [i.get() for i in self.strings] + def get_len_strings(self): + """ + Return the number of strings + + :rtype: int + """ + return len(self.get_strings()) + def get_regex_strings(self, regular_expressions): """ Return all target strings matched the regex From 8fdb6f939dbd415d3168481970956aaa9268ac16 Mon Sep 17 00:00:00 2001 From: ehrenb Date: Sun, 25 Feb 2024 18:12:46 -0500 Subject: [PATCH 4/6] update all tests to use explicit functions for encoded methods/fields. fix failing test mentioned in https://github.com/androguard/androguard/issues/989\#issuecomment-1925512272 so that it assumes testing for wrapped fields. make new tests for DEX-class level testing that tests counts of parsed values in the DEX header --- tests/test_analysis.py | 4 ++-- tests/test_dex.py | 40 +++++++++++++++++++++++++++++++++--- tests/test_dexcodeparsing.py | 2 +- tests/test_rename.py | 4 ++-- tests/test_types.py | 2 +- 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/tests/test_analysis.py b/tests/test_analysis.py index ff74a6dd..997927f5 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -24,7 +24,7 @@ class AnalysisTest(unittest.TestCase): self.assertEqual(len(list(dx.get_internal_classes())), 1353) # checked by reading the dex header self.assertEqual(len(dx.get_strings()), 1564) self.assertEqual(len(list(dx.get_methods())), 12792) # according to DEX Header 12795 - self.assertEqual(len(list(dx.get_fields())), 3033) # According to DEX Header 4005 + self.assertEqual(len(list(dx.get_fields())), 4578) # According to DEX Header 4005 self.assertEqual(len(list(dx.get_external_classes())), 388) for cls in dx.get_external_classes(): @@ -48,7 +48,7 @@ class AnalysisTest(unittest.TestCase): # find String fields self.assertEqual(len(list(dx.find_fields(classname="^(?!Landroid).*;$", fieldtype=r"Ljava\/lang\/String;"))), - 63) + 95)#63) def testAnalysis(self): h, d, dx = AnalyzeDex(os.path.join(test_dir, "data/APK/AnalysisTest.dex")) diff --git a/tests/test_dex.py b/tests/test_dex.py index c7e1d266..e0a77783 100644 --- a/tests/test_dex.py +++ b/tests/test_dex.py @@ -17,15 +17,49 @@ class MockClassManager(): def get_odex_format(self): return False -class accessflagsTest(unittest.TestCase): +class VMClassTest(unittest.TestCase): @classmethod def setUpClass(cls): test_apk_path = os.path.join(test_dir, 'data/APK/TestActivity.apk') cls.a, cls.d, cls.dx = AnalyzeAPK(test_apk_path) + def testVMClass(self): + """test number of ClassDefItems, StringDataItems, FieldIdItems, and MethodIdItems""" + + num_class_def_items = 0 + num_strings_data_items = 0 + num_field_id_items = 0 + num_method_id_items = 0 + + # the below field exists in the fieldIds list, but + # their class doesnt exist, this is bc its loaded at runtime + # 19 [FieldIdItem]: class_idx=0x13 type_idx=0x242 name_idx=0x1099 + # classIdx = 0x13 = 19 + # typeIdx = 0x242 = 578 + # nameIdx = 0x1099 = 4249 + # className = Landroid/app/Notification; + # typeName = [J + # fieldName = vibrate + + # see DEX format spec https://source.android.com/docs/core/runtime/dex-format + # https://reverseengineering.stackexchange.com/questions/21767/dex-file-referenced-type-is-not-defined-in-file + # field ids, type ids, and method ids references + # are not required to be defined in the dex since they can be resolved at runtime via shared library + for vm in self.dx.vms: + num_class_def_items += vm.get_len_classes() # ClassDefItems + num_strings_data_items += vm.get_len_strings() # StringDataItems + num_field_id_items += vm.get_len_fields() # FieldIdItems + num_method_id_items += vm.get_len_methods() # MethodIdItems + + + + self.assertEqual(len(self.dx.vms), 1) + self.assertEqual(num_class_def_items, 340) + self.assertEqual(num_strings_data_items, 4329) + self.assertEqual(num_field_id_items, 865) + self.assertEqual(num_method_id_items, 3602) + def testAccessflags(self): - - class_name_accessflag_map = { 'Ltests/androguard/TestLoops;': { 'access_flag': 0x1, # public diff --git a/tests/test_dexcodeparsing.py b/tests/test_dexcodeparsing.py index f99b1ee2..d68ec896 100644 --- a/tests/test_dexcodeparsing.py +++ b/tests/test_dexcodeparsing.py @@ -24,7 +24,7 @@ class TestDexCodeParsing(unittest.TestCase): dif = Differ() - for m in d.get_methods(): + for m in d.get_encoded_methods(): if not m.get_code(): continue diff --git a/tests/test_rename.py b/tests/test_rename.py index cb0bd760..bc97b5cf 100644 --- a/tests/test_rename.py +++ b/tests/test_rename.py @@ -18,7 +18,7 @@ class RenameTest(unittest.TestCase): # self.d.set_vmanalysis(self.dx) def testMethodRename(self): - meth, = self.d.get_method("testDouble") + meth, = self.d.get_encoded_method("testDouble") clas = self.d.get_class(meth.get_class_name()) self.assertEqual(meth.get_name(), "testDouble") self.assertIn(meth.get_name(), [i.name for i in clas.get_methods()]) @@ -28,7 +28,7 @@ class RenameTest(unittest.TestCase): self.assertNotIn("testDouble", [i.name for i in clas.get_methods()]) def testFieldRename(self): - field, = self.d.get_field("FLAG_REGISTER_CONTENT_OBSERVER") + field, = self.d.get_encoded_field("FLAG_REGISTER_CONTENT_OBSERVER") self.assertEqual(field.get_name(), "FLAG_REGISTER_CONTENT_OBSERVER") field.set_name("FLAG_REGISTER_CONTENT_OBSERVER_RENAMED") self.assertEqual(field.get_name(), "FLAG_REGISTER_CONTENT_OBSERVER_RENAMED") diff --git a/tests/test_types.py b/tests/test_types.py index 6191fd53..51cd6bc6 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -158,7 +158,7 @@ class TypesTest(unittest.TestCase): with open(TEST_CASE, "rb") as fd: digest, d, dx = s.addDEX(TEST_CASE, fd.read()) - for method in filter(lambda x: x.full_name in VALUES, d.get_methods()): + for method in filter(lambda x: x.full_name in VALUES, d.get_encoded_methods()): # print("METHOD", method.full_name) for i in filter(lambda x: 'const' in x.get_name(), method.get_instructions()): From 126bac4605031ee94d226e8d198f74636059fa40 Mon Sep 17 00:00:00 2001 From: ehrenb Date: Sat, 9 Mar 2024 15:27:51 -0500 Subject: [PATCH 5/6] ensure .show() works for MethodAnalysis that are External. Add logging when creating new ClassAnalysis instance. Similar to wrapping all fields into FieldAnalysis, also add and wrap all strings into StringAnalysis. Create explicit methods for getting internal/external methods. Updated expected outputs for test_analysis, as underlying implementations have changed --- androguard/core/analysis/analysis.py | 51 ++++++++++++++++++++++------ tests/test_analysis.py | 42 ++++++++++++++++++----- tests/test_dex.py | 4 +-- 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/androguard/core/analysis/analysis.py b/androguard/core/analysis/analysis.py index d5742f6c..5ee3c7d3 100644 --- a/androguard/core/analysis/analysis.py +++ b/androguard/core/analysis/analysis.py @@ -681,7 +681,9 @@ class MethodAnalysis: self.method.get_access_flags_string(), self.method.get_name(), ", ".join(args), ret)) - bytecode.PrettyShow(self.basic_blocks.gets(), self.method.notes) + + if not self.is_external(): + bytecode.PrettyShow(self.basic_blocks.gets(), self.method.notes) def show_xrefs(self): data = "XREFto for %s\n" % self.method @@ -982,6 +984,7 @@ class ClassAnalysis: """ def __init__(self, classobj): + logger.info(f"Adding new ClassAnalysis: {classobj}") # Automatically decide if the class is external or not self.external = isinstance(classobj, ExternalClass) @@ -1014,8 +1017,6 @@ class ClassAnalysis: # Propagate ExternalMethod to ExternalClass self.orig_class.add_method(method_analysis.get_method()) - - @property def implements(self): """ @@ -1123,7 +1124,7 @@ class ClassAnalysis: def add_field(self, field_analysis): """ Add the given field to this analyis. - usually only called during Analysis.add and Analysis._resolve_method + usually only called during Analysis.add :param FieldAnalysis field_analysis: """ @@ -1412,7 +1413,7 @@ class Analysis: self.vms = [] # A dict of {classname: ClassAnalysis}, populated on add(vm) self.classes = dict() - # A dict of {string: StringAnalysis}, populated on create_xref() + # A dict of {string: StringAnalysis}, populated on add(vm) and create_xref() self.strings = dict() # A dict of {EncodedMethod: MethodAnalysis}, populated on add(vm) self.methods = dict() @@ -1436,6 +1437,7 @@ class Analysis: :param androguard.core.dex.DEX vm: :class:`androguard.core.dex.DEX` to add to this Analysis """ + self.vms.append(vm) logger.info("Adding DEX file version {}".format(vm.version)) @@ -1443,8 +1445,10 @@ class Analysis: # TODO: This step can easily be multithreaded, as there is no dependecy between the objects at this stage tic = time.time() for i, current_class in enumerate(vm.get_classes()): + # seed ClassAnalysis objects into classes attribute and add as new class self.classes[current_class.get_name()] = ClassAnalysis(current_class) new_class = self.classes[current_class.get_name()] + # Fix up the hidden api annotations (Android 10) hidden_api = vm.get_hidden_api() if hidden_api: @@ -1452,20 +1456,25 @@ class Analysis: new_class.set_restriction_flag(rf) new_class.set_domain_flag(df) - # seed MethodAnalysis objects into new class analysis + # seed MethodAnalysis objects into methods attribute and add to new class analysis for method in current_class.get_methods(): self.methods[method] = MethodAnalysis(vm, method) - new_class.add_method(self.methods[method]) # Store for faster lookup during create_xrefs m_hash = (current_class.get_name(), method.get_name(), str(method.get_descriptor())) self.__method_hashes[m_hash] = self.methods[method] - # seed FieldAnalysis objects into new class analysis + # seed FieldAnalysis objects into to new class analysis + # since we access methods through a class property, + # which returns what's within a ClassAnalysis + # we don't have to track it internally in this class for field in current_class.get_fields(): - new_field_analysis = FieldAnalysis(field) - new_class.add_field(new_field_analysis) + new_class.add_field(FieldAnalysis(field)) + + # seed StringAnalysis objects into strings attribute - connect alter using xrefs + for string_value in vm.get_strings(): + self.strings[string_value] = StringAnalysis(string_value) logger.info("Added DEX in the analysis took : {:0d}min {:02d}s".format(*divmod(int(time.time() - tic), 60))) @@ -1756,6 +1765,28 @@ class Analysis: if not cls.is_external(): yield cls + def get_internal_methods(self): + """ + Returns all internal methods, that means all methods that are + defined in the given set of :class:`~DEX`. + + :rtype: Iterator[MethodAnalysis] + """ + for m in self.methods.values(): + if not m.is_external(): + yield m + + def get_external_methods(self): + """ + Returns all external methods, that means all methods that are not + defined in the given set of :class:`~DEX`. + + :rtype: Iterator[MethodAnalysis] + """ + for m in self.methods.values(): + if m.is_external(): + yield m + def get_strings_analysis(self): """ Returns a dictionary of strings and their corresponding :class:`StringAnalysis` diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 997927f5..b45a1bfe 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -21,11 +21,28 @@ class AnalysisTest(unittest.TestCase): def testAPK(self): a, d, dx = AnalyzeAPK(os.path.join(test_dir, "data/APK/a2dp.Vol_137.apk")) - self.assertEqual(len(list(dx.get_internal_classes())), 1353) # checked by reading the dex header - self.assertEqual(len(dx.get_strings()), 1564) - self.assertEqual(len(list(dx.get_methods())), 12792) # according to DEX Header 12795 - self.assertEqual(len(list(dx.get_fields())), 4578) # According to DEX Header 4005 - self.assertEqual(len(list(dx.get_external_classes())), 388) + # internal+external classes should sum up to header->headerItem->classIdsSize + self.assertEqual(len(list(dx.get_internal_classes())), 1353) # dex header header->headerItem->classDefsSize + self.assertEqual(len(list(dx.get_external_classes())), 388) # difficult to check, cannot find using JADX + self.assertEqual(len(list(dx.get_classes())), 1741) # dex header header->headerItem->classDefsSize + + self.assertEqual(len(dx.get_strings()), 13523) # dex header header->headerItem->stringsIdsSize + + # don't have a way to discern external vs internal fields currently, + # header->headerItemFieldIdsSize is 4005, but there must be 573 more external added + # so this is difficult to derive. Even JADX seems to disagree with 4005 number? + self.assertEqual(len(list(dx.get_fields())), 4005 + 573) + + # internal+external methods should sum up to header->headerItem->methodIdsSize + self.assertEqual(len(list(dx.get_internal_methods())), 9676) # difficult to check, can use jadx-gui and see summary + self.assertEqual(len(list(dx.get_external_methods())), 3116) # difficult to check + + # TODO: the DEX header says 12795 here, but 9676 + 3116 adds up to 12792 + # JADX corroborates 9676, so I think 3116 is off, and a few unncessary + # ExternalMethods are added somewhere + self.assertEqual(len(list(dx.get_methods())), 12792) # dex header header->headerItem->methodIdsSize + + for cls in dx.get_external_classes(): self.assertEqual(cls.name[0], 'L') @@ -44,15 +61,23 @@ class AnalysisTest(unittest.TestCase): no_external=True))), 94) # Find url like strings - self.assertEqual(len(list(dx.find_strings(r".*:\/\/.*"))), 15) + self.assertEqual(len(list(dx.find_strings(r".*:\/\/.*"))), 16) # find String fields self.assertEqual(len(list(dx.find_fields(classname="^(?!Landroid).*;$", fieldtype=r"Ljava\/lang\/String;"))), 95)#63) def testAnalysis(self): + import sys h, d, dx = AnalyzeDex(os.path.join(test_dir, "data/APK/AnalysisTest.dex")) + self.assertEqual(len(list(dx.get_internal_classes())), 1) + self.assertEqual(len(list(dx.get_internal_methods())), 4) + self.assertEqual(len(list(dx.get_external_methods())), 4) + self.assertEqual(len(list(dx.get_methods())), 8) + self.assertEqual(len(dx.get_strings()), 21) + self.assertEqual(len(list(dx.get_fields())), 0) + self.assertEqual(h, "4595fc25104f3fcd709163eb70ca476edf116753607ec18f09548968c71910dc") self.assertIsInstance(d, DEX) self.assertIsInstance(dx, analysis.Analysis) @@ -269,7 +294,7 @@ class AnalysisTest(unittest.TestCase): """Tests if String offsets in bytecode are correctly stored""" _, _, dx = AnalyzeDex(os.path.join(test_dir, "data/APK/AnalysisTest.dex")) - self.assertEqual(len(dx.get_strings()), 1) + self.assertEqual(len(dx.get_strings()), 21) self.assertIsInstance(dx.strings['Hello world'], analysis.StringAnalysis) sa = dx.strings['Hello world'] @@ -282,7 +307,7 @@ class AnalysisTest(unittest.TestCase): """Tests if Field offsets in bytecode are correctly stored""" _, _, dx = AnalyzeDex(os.path.join(test_dir, "data/APK/FieldsTest.dex")) - self.assertEqual(len(dx.get_strings()), 4) + self.assertEqual(len(dx.get_strings()), 20) self.assertIn('hello world', dx.strings.keys()) self.assertIn('sdf', dx.strings.keys()) self.assertIn('hello mars', dx.strings.keys()) @@ -350,6 +375,5 @@ class AnalysisTest(unittest.TestCase): self.assertEqual(class1.restriction_flag, HiddenApiClassDataItem.RestrictionApiFlag.BLACKLIST) self.assertEqual(class1.domain_flag, HiddenApiClassDataItem.DomapiApiFlag.NONE) - if __name__ == '__main__': unittest.main() diff --git a/tests/test_dex.py b/tests/test_dex.py index e0a77783..3bdae2fa 100644 --- a/tests/test_dex.py +++ b/tests/test_dex.py @@ -25,7 +25,7 @@ class VMClassTest(unittest.TestCase): def testVMClass(self): """test number of ClassDefItems, StringDataItems, FieldIdItems, and MethodIdItems""" - + num_class_def_items = 0 num_strings_data_items = 0 num_field_id_items = 0 @@ -51,8 +51,6 @@ class VMClassTest(unittest.TestCase): num_field_id_items += vm.get_len_fields() # FieldIdItems num_method_id_items += vm.get_len_methods() # MethodIdItems - - self.assertEqual(len(self.dx.vms), 1) self.assertEqual(num_class_def_items, 340) self.assertEqual(num_strings_data_items, 4329) From 167ef191d8c6a91e9742d650dc1f09f77df4d7d4 Mon Sep 17 00:00:00 2001 From: ehrenb Date: Sat, 9 Mar 2024 20:19:48 -0500 Subject: [PATCH 6/6] fix comment --- tests/test_analysis.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_analysis.py b/tests/test_analysis.py index b45a1bfe..2ec4435b 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -21,10 +21,9 @@ class AnalysisTest(unittest.TestCase): def testAPK(self): a, d, dx = AnalyzeAPK(os.path.join(test_dir, "data/APK/a2dp.Vol_137.apk")) - # internal+external classes should sum up to header->headerItem->classIdsSize self.assertEqual(len(list(dx.get_internal_classes())), 1353) # dex header header->headerItem->classDefsSize self.assertEqual(len(list(dx.get_external_classes())), 388) # difficult to check, cannot find using JADX - self.assertEqual(len(list(dx.get_classes())), 1741) # dex header header->headerItem->classDefsSize + self.assertEqual(len(list(dx.get_classes())), 1741) # sum of internal and external classes self.assertEqual(len(dx.get_strings()), 13523) # dex header header->headerItem->stringsIdsSize