restore 'androguard cg' functionality for generating call graphs and displaying them with matplotlib. still need to save raw graph formats

This commit is contained in:
ehrenb 2023-12-30 14:44:04 -05:00
parent d3b6ded8c8
commit 36ad687331
5 changed files with 251 additions and 2 deletions

View File

@ -12,6 +12,12 @@ Quick installation:
~~~~
pip install androguard
~~~~
For visualizing callgraphs generated with 'androguard cg', install additional dependencies:
~~~~
apt-get install libxcb-randr0-dev libxcb-xtest0-dev libxcb-xinerama0-dev libxcb-shape0-dev libxcb-xkb-dev
~~~~
> [!IMPORTANT]
> Versions >= 4.0.0 are new releases after a long time, where the project has substantial differences from the previous stable version 3.3.5 from 2019. This means that certain functionalities have been removed. If you notice an issue with your project using tha latest version, please open up an [issue](https://github.com/androguard/androguard/issues).

View File

@ -455,6 +455,160 @@ def dump(package_name, modules):
"""
androdump_main(package_name, modules)
@entry_point.command()
@click.argument(
'file_',
type=click.Path(exists=True, dir_okay=False, file_okay=True),
required=True,
)
@click.option(
'--output', '-o',
default='callgraph.gml',
help='Filename of the output file, the extension is used to decide which format to use (default callgraph.gml)',
)
@click.option(
'--show', '-s',
default=False,
is_flag=True,
help='instead of saving the graph, print it with matplotlib (you might not see anything!',
)
@click.option(
'--classname',
default='.*',
help='Regex to filter by classname',
)
@click.option(
'--methodname',
default='.*',
help='Regex to filter by methodname',
)
@click.option(
'--descriptor',
default='.*',
help='Regex to filter by descriptor',
)
@click.option(
'--accessflag',
default='.*',
help='Regex to filter by accessflag',
)
@click.option(
'--no-isolated',
default=False,
is_flag=True,
help='Do not store methods which has no xrefs',
)
def cg(
file_,
output,
show,
classname,
methodname,
descriptor,
accessflag,
no_isolated):
"""
Create a call graph based on the data of Analysis and export it into a graph format.
"""
from androguard.core.bytecode import FormatClassToJava
from androguard.misc import AnalyzeAPK
from androguard.core.analysis.analysis import ExternalMethod
import matplotlib.pyplot as plt
import networkx as nx
a, d, dx = AnalyzeAPK(file_)
entry_points = map(FormatClassToJava,
a.get_activities() + a.get_providers() +
a.get_services() + a.get_receivers())
entry_points = list(entry_points)
callgraph = dx.get_call_graph(
classname,
methodname,
descriptor,
accessflag,
no_isolated,
entry_points
)
# write_methods = dict(gml=_write_gml,
# gexf=nx.write_gexf,
# gpickle=nx.write_gpickle,
# graphml=nx.write_graphml,
# yaml=nx.write_yaml,
# net=nx.write_pajek,
# )
# for u,v,d in callgraph.edges(data=True):
# print("edge")
# print(u)
# print(v)
# print(d)
if show:
pos = nx.spring_layout(callgraph)
internal = []
external = []
for n in callgraph:
if isinstance(n, ExternalMethod):
external.append(n)
else:
internal.append(n)
print("Drawing nodes and edges")
nx.draw_networkx_nodes(
callgraph,
# node_size=100,
pos=pos, node_color='r',
nodelist=internal)
nx.draw_networkx_nodes(
callgraph,
# node_size=100,
pos=pos,
node_color='b',
nodelist=external)
nx.draw_networkx_edges(
callgraph,
pos,
width=0.5,
arrows=True)
nx.draw_networkx_labels(callgraph,
pos=pos,
font_size=6,
labels={n: f"{n.get_class_name()} {n.name} {n.descriptor}"
for n in callgraph.nodes})
# matplotlib
print("Showing")
# plt.margins(x=0.4, y=0.4)
# plt.savefig("graph.png", dpi=1000)
# plt.tight_layout()
# plt.figure(figsize=(20,14))
plt.draw()
plt.show()
else:
# TODO: save various format too
pass
# end matplotlib
# else:
# writer = output.rsplit(".", 1)[1]
# if writer in ["bz2", "gz"]:
# writer = output.rsplit(".", 2)[1]
# if writer not in write_methods:
# print("Could not find a method to export files to {}!"
# .format(writer))
# sys.exit(1)
# write_methods[writer](CG, output)
if __name__ == '__main__':
entry_point()

View File

@ -9,7 +9,7 @@ import time
from enum import IntEnum
from loguru import logger
import networkx as nx
BasicOPCODES = set()
for i in dex.BRANCH_DEX_OPCODES:
@ -1810,6 +1810,17 @@ class Analysis:
if re.match(classname, cname):
for m in c.get_methods():
z = m.get_method()
# logger.info(z.get_descriptor())
# if 'TestInnerClass' in z.get_descriptor():
# logger.info(z.get_descriptor())
# if isinstance(z, ExternalMethod):
# print('external method')
# print(f'is list: {isinstance(z.get_descriptor(), list)}')
# else:
# print('internal method')
# print(f'is str: {isinstance(z.get_descriptor(), str)}')
# TODO is it even possible that an internal class has
# external methods? Maybe we should check for ExternalClass
# instead...
@ -1853,6 +1864,80 @@ class Analysis:
def __repr__(self):
return "<analysis.Analysis VMs: {}, Classes: {}, Methods: {}, Strings: {}>".format(len(self.vms), len(self.classes), len(self.methods), len(self.strings))
def get_call_graph(
self,
classname=".*",
methodname=".*",
descriptor=".*",
accessflags=".*",
no_isolated=False,
entry_points=[]):
"""
Generate a directed graph based on the methods found by the filters applied.
The filters are the same as in
:meth:`~androguard.core.analysis.analysis.Analysis.find_methods`
A networkx.DiGraph is returned, containing all edges only once!
that means, if a method calls some method twice or more often, there will
only be a single connection.
:param classname: regular expression of the classname (default: ".*")
:param fieldname: regular expression of the fieldname (default: ".*")
:param fieldtype: regular expression of the fieldtype (default: ".*")
:param accessflags: regular expression of the access flags (default: ".*")
:param no_isolated: remove isolated nodes from the graph, e.g. methods which do not call anything (default: False)
:param entry_points: A list of classes that are marked as entry point
:rtype: DiGraph
"""
def _add_node(G, method, _entry_points):
"""
Wrapper to add methods to a graph
"""
if method not in G:
if isinstance(method, ExternalMethod):
is_external = True
else:
is_external = False
if method.get_class_name() in _entry_points:
is_entry_point = True
else:
is_entry_point = False
G.add_node(method, external=is_external, entrypoint=is_entry_point)
CG = nx.DiGraph()
# Note: If you create the CG from many classes at the same time, the drawing
# will be a total mess...
for m in self.find_methods(
classname=classname,
methodname=methodname,
descriptor=descriptor,
accessflags=accessflags):
orig_method = m.get_method()
logger.info("Found Method --> {}".format(orig_method))
if no_isolated and len(m.get_xref_to()) == 0:
logger.info("Skipped {}, because if has no xrefs".format(orig_method))
continue
_add_node(CG, orig_method, entry_points)
for other_class, callee, offset in m.get_xref_to():
_add_node(CG, callee, entry_points)
# As this is a DiGraph and we are not interested in duplicate edges,
# check if the edge is already in the edge set.
# If you need all calls, you probably want to check out MultiDiGraph
if not CG.has_edge(orig_method, callee):
CG.add_edge(orig_method, callee)
return CG
def create_ipython_exports(self):
"""
.. warning:: this feature is experimental and is currently not enabled by default! Use with caution!

View File

@ -25,6 +25,8 @@ dataset = "*"
frida = "*"
loguru = "*"
apkInspector = ">=1.1.7"
networkx = ">=3.2.1"
PyQt5 = ">=5.15.10"
[tool.setuptools.package_data]
"androguard.core.api_specific_resources" = ["aosp_permissions/*.json", "api_permission_mappings/*.json"]

View File

@ -9,4 +9,6 @@ mutf8
dataset
frida
loguru
apkInspector>=1.1.7
apkInspector>=1.1.7
networkx>=3.2.1
PyQt5>=5.15.10