mirror of
https://github.com/androguard/androguard.git
synced 2024-11-23 05:00:11 +00:00
Switch to mutf8 to decode string
This commit is contained in:
parent
4e0bed55f9
commit
8df1fe850d
@ -1,9 +0,0 @@
|
||||
from androguard.cli.main import (androarsc_main,
|
||||
androaxml_main,
|
||||
androcg_main,
|
||||
androgui_main,
|
||||
androlyze_main,
|
||||
androsign_main,
|
||||
androdis_main,
|
||||
export_apps_to_format,
|
||||
)
|
@ -62,7 +62,11 @@ class DVMBasicBlock:
|
||||
|
||||
self.special_ins = {}
|
||||
|
||||
self.name = mutf8.MUTF8String.join([self.method.get_name(), b'-BB@', hex(self.start).encode()])
|
||||
self.name = ''.join([
|
||||
self.method.get_name(),
|
||||
'-BB@',
|
||||
hex(self.start)
|
||||
])
|
||||
self.exception_analysis = None
|
||||
|
||||
self.notes = []
|
||||
@ -931,7 +935,7 @@ class ExternalMethod:
|
||||
return self.class_name
|
||||
|
||||
def get_descriptor(self):
|
||||
return mutf8.MUTF8String.join(self.descriptor)
|
||||
return self.descriptor
|
||||
|
||||
@property
|
||||
def full_name(self):
|
||||
@ -1517,7 +1521,7 @@ class Analysis:
|
||||
if op_value in [0x1c, 0x22]:
|
||||
idx_type = instruction.get_ref_kind()
|
||||
# type_info is the string like 'Ljava/lang/Object;'
|
||||
type_info = instruction.cm.vm.get_cm_type(idx_type).lstrip(b'[')
|
||||
type_info = instruction.cm.vm.get_cm_type(idx_type).lstrip('[')
|
||||
if type_info[0] != b'L':
|
||||
# Need to make sure, that we get class types and not other types
|
||||
continue
|
||||
@ -1558,7 +1562,7 @@ class Analysis:
|
||||
"Requested IDX {}".format(off, current_method.get_code_off(), idx_meth))
|
||||
continue
|
||||
|
||||
class_info = method_info[0].lstrip(b'[')
|
||||
class_info = method_info[0].lstrip('[')
|
||||
if class_info[0] != b'L':
|
||||
# Need to make sure, that we get class types and not other types
|
||||
# If another type, like int is used, we simply skip it.
|
||||
@ -1631,7 +1635,7 @@ class Analysis:
|
||||
:return:
|
||||
:rtype: MethodAnalysis
|
||||
"""
|
||||
m_hash = (class_name, method_name, mutf8.MUTF8String.join(method_descriptor))
|
||||
m_hash = (class_name, method_name, ''.join(method_descriptor))
|
||||
if m_hash not in self.__method_hashes:
|
||||
# Need to create a new method
|
||||
if class_name not in self.classes:
|
||||
@ -1792,7 +1796,7 @@ class Analysis:
|
||||
:param no_external: Remove external classes from the output (default False)
|
||||
:rtype: Iterator[ClassAnalysis]
|
||||
"""
|
||||
name = bytes(mutf8.MUTF8String.from_str(name))
|
||||
name = bytes(name)
|
||||
for cname, c in self.classes.items():
|
||||
if no_external and isinstance(c.get_vm_class(), ExternalClass):
|
||||
continue
|
||||
@ -1813,9 +1817,9 @@ class Analysis:
|
||||
:param no_external: Remove external method from the output (default False)
|
||||
:rtype: Iterator[MethodAnalysis]
|
||||
"""
|
||||
classname = bytes(mutf8.MUTF8String.from_str(classname))
|
||||
methodname = bytes(mutf8.MUTF8String.from_str(methodname))
|
||||
descriptor = bytes(mutf8.MUTF8String.from_str(descriptor))
|
||||
classname = bytes(classname)
|
||||
methodname = bytes(methodname)
|
||||
descriptor = bytes(descriptor)
|
||||
for cname, c in self.classes.items():
|
||||
if re.match(classname, cname):
|
||||
for m in c.get_methods():
|
||||
@ -1837,7 +1841,7 @@ class Analysis:
|
||||
:param string: regular expression for the string to search for
|
||||
:rtype: Iterator[StringAnalysis]
|
||||
"""
|
||||
string = bytes(mutf8.MUTF8String.from_str(string))
|
||||
string = bytes(string)
|
||||
for s, sa in self.strings.items():
|
||||
if re.match(string, s):
|
||||
yield sa
|
||||
@ -1852,9 +1856,9 @@ class Analysis:
|
||||
:param accessflags: regular expression of the access flags
|
||||
:rtype: Iterator[FieldAnalysis]
|
||||
"""
|
||||
classname = bytes(mutf8.MUTF8String.from_str(classname))
|
||||
fieldname = bytes(mutf8.MUTF8String.from_str(fieldname))
|
||||
fieldtype = bytes(mutf8.MUTF8String.from_str(fieldtype))
|
||||
classname = bytes(classname)
|
||||
fieldname = bytes(fieldname)
|
||||
fieldtype = bytes(fieldtype)
|
||||
for cname, c in self.classes.items():
|
||||
if re.match(classname, cname):
|
||||
for f in c.get_fields():
|
||||
|
@ -807,7 +807,7 @@ class AXMLParser:
|
||||
|
||||
res = self.sb[name]
|
||||
# If the result is a (null) string, we need to look it up.
|
||||
if not res:
|
||||
if not res or res == ":":
|
||||
attr = self.m_resourceIDs[name]
|
||||
if attr in public.SYSTEM_RESOURCES['attributes']['inverse']:
|
||||
res = 'android:' + public.SYSTEM_RESOURCES['attributes']['inverse'][attr]
|
||||
|
@ -1992,9 +1992,9 @@ class StringDataItem:
|
||||
|
||||
def get(self):
|
||||
"""
|
||||
Returns a MUTF8String object
|
||||
Returns a str object
|
||||
"""
|
||||
return mutf8.MUTF8String(self.data)
|
||||
return mutf8.decode(self.data)
|
||||
|
||||
def show(self):
|
||||
bytecode._PrintSubBanner("String Data Item")
|
||||
@ -2245,11 +2245,11 @@ class ProtoIdItem:
|
||||
"""
|
||||
Return the string associated to the parameters_off
|
||||
|
||||
:rtype: MUTF8String
|
||||
:rtype: str
|
||||
"""
|
||||
if self.parameters_off_value is None:
|
||||
params = self.CM.get_type_list(self.parameters_off)
|
||||
self.parameters_off_value = mutf8.MUTF8String(b'(' + b' '.join(params) + b')')
|
||||
self.parameters_off_value = '(' + ' '.join(params) + ')'
|
||||
return self.parameters_off_value
|
||||
|
||||
def show(self):
|
||||
@ -3006,7 +3006,7 @@ class EncodedMethod:
|
||||
if v and len(v) >= 3:
|
||||
self.class_name = v[0]
|
||||
self.name = v[1]
|
||||
self.proto = mutf8.MUTF8String.join(i for i in v[2])
|
||||
self.proto = ''.join(i for i in v[2])
|
||||
else:
|
||||
self.class_name = 'CLASS_NAME_ERROR'
|
||||
self.name = 'NAME_ERROR'
|
||||
@ -3113,7 +3113,7 @@ class EncodedMethod:
|
||||
@property
|
||||
def full_name(self):
|
||||
"""Return class_name + name + descriptor, separated by spaces (no access flags"""
|
||||
return mutf8.MUTF8String.join([self.class_name, self.name, self.get_descriptor()], spacing=b' ')
|
||||
return ''.join([self.class_name, self.name, self.get_descriptor()], spacing=b' ')
|
||||
|
||||
@property
|
||||
def descriptor(self):
|
||||
@ -3691,7 +3691,7 @@ class ClassDefItem:
|
||||
For example, if the class is marked as :code:`@Deprecated`, this will return
|
||||
:code:`['Ljava/lang/Deprecated;']`.
|
||||
|
||||
:rtype: Iterator[mutf8.MUTF8String]
|
||||
:rtype: Iterator[str]
|
||||
"""
|
||||
return [self.CM.get_type(x.get_type_idx()) for x in self._get_annotation_type_ids()]
|
||||
|
||||
@ -3775,7 +3775,7 @@ class ClassDefItem:
|
||||
"""
|
||||
Return the name of this class
|
||||
|
||||
:rtype: MUTF8String
|
||||
:rtype: str
|
||||
"""
|
||||
return self.name
|
||||
|
||||
@ -3783,7 +3783,7 @@ class ClassDefItem:
|
||||
"""
|
||||
Return the name of the super class
|
||||
|
||||
:rtype: MUTF8String
|
||||
:rtype: str
|
||||
"""
|
||||
return self.sname
|
||||
|
||||
@ -3791,7 +3791,7 @@ class ClassDefItem:
|
||||
"""
|
||||
Return the names of the interfaces
|
||||
|
||||
:rtype: List[MUTF8String]
|
||||
:rtype: List[str]
|
||||
"""
|
||||
return self.interfaces
|
||||
|
||||
@ -8024,7 +8024,7 @@ class DEX:
|
||||
:rtype: a list with all :class:`EncodedMethod` objects
|
||||
"""
|
||||
# TODO could use a generator here
|
||||
name = bytes(mutf8.MUTF8String.from_str(name))
|
||||
name = bytes(name)
|
||||
prog = re.compile(name)
|
||||
l = []
|
||||
for i in self.get_classes():
|
||||
@ -8042,7 +8042,7 @@ class DEX:
|
||||
:rtype: a list with all :class:`EncodedField` objects
|
||||
"""
|
||||
# TODO could use a generator here
|
||||
name = bytes(mutf8.MUTF8String.from_str(name))
|
||||
name = bytes(name)
|
||||
prog = re.compile(name)
|
||||
l = []
|
||||
for i in self.get_classes():
|
||||
@ -8258,6 +8258,7 @@ class DEX:
|
||||
"""
|
||||
Export classes/methods/fields' names in the python namespace
|
||||
"""
|
||||
logger.debug("Exporting Python objects")
|
||||
setattr(self, "C", ExportObject())
|
||||
|
||||
for _class in self.get_classes():
|
||||
|
@ -1,227 +1,4 @@
|
||||
def decode(b):
|
||||
size = len(b)
|
||||
ord_array = [None] * size
|
||||
ord_index = 0
|
||||
from mutf8 import decode_modified_utf8, encode_modified_utf8
|
||||
|
||||
b = iter(b)
|
||||
|
||||
for x in b:
|
||||
if x >> 7 == 0:
|
||||
# Single char:
|
||||
ord_array[ord_index] = x & 0x7f
|
||||
elif x >> 5 == 0b110:
|
||||
# 2 byte Multichar
|
||||
b2 = next(b)
|
||||
if b2 >> 6 != 0b10:
|
||||
raise UnicodeDecodeError(
|
||||
"Second byte of 2 byte sequence does not looks right.")
|
||||
|
||||
ord_array[ord_index] = (x & 0x1f) << 6 | b2 & 0x3f
|
||||
elif x >> 4 == 0b1110:
|
||||
# 3 byte Multichar
|
||||
b2 = next(b)
|
||||
b3 = next(b)
|
||||
if b2 >> 6 != 0b10:
|
||||
raise UnicodeDecodeError(
|
||||
"Second byte of 3 byte sequence does not looks right.")
|
||||
if b3 >> 6 != 0b10:
|
||||
raise UnicodeDecodeError(
|
||||
"Third byte of 3 byte sequence does not looks right.")
|
||||
|
||||
ord_array[ord_index] = (x & 0xf) << 12 | (
|
||||
b2 & 0x3f) << 6 | b3 & 0x3f
|
||||
else:
|
||||
raise UnicodeDecodeError("Could not decode byte")
|
||||
ord_index += 1
|
||||
|
||||
chr_array = [""]*size
|
||||
chr_index = 0
|
||||
while chr_index < size:
|
||||
c = ord_array[chr_index]
|
||||
if c is None:
|
||||
break
|
||||
if (c >> 10) == 0b110110:
|
||||
n = None
|
||||
try:
|
||||
n = ord_array[chr_index + 1]
|
||||
except:
|
||||
pass
|
||||
if n and (n >> 10) == 0b110111:
|
||||
chr_array[chr_index] = chr(
|
||||
((c & 0x3ff) << 10 | (n & 0x3ff)) + 0x10000)
|
||||
chr_index += 1
|
||||
else:
|
||||
chr_array[chr_index] = chr(c)
|
||||
else:
|
||||
chr_array[chr_index] = chr(c)
|
||||
chr_index += 1
|
||||
|
||||
return "".join(chr_array)
|
||||
|
||||
|
||||
def encode(s):
|
||||
b = [b""]*len(s)
|
||||
ord_array = [i for i in map(lambda x: ord(x), s)]
|
||||
for x in ord_array:
|
||||
if (x == 0) or ((x <= 0x7ff) and (x >= 0x80)):
|
||||
b1 = ((x & 0x7c0) >> 6 | 0xc0).to_bytes(1, 'big')
|
||||
b2 = ((x & 0x3f) | 0x80).to_bytes(1, 'big')
|
||||
b.append(b1 + b2)
|
||||
elif (x <= 0x7f):
|
||||
b1 = x.to_bytes(1, 'big')
|
||||
b.append(b1)
|
||||
elif (x >= 0x800) and (x <= 0xffff):
|
||||
b1 = ((x & 0xf000) >> 12 | 0xe0).to_bytes(1, 'big')
|
||||
b2 = ((x & 0xfff) >> 6 | 0x80).to_bytes(1, 'big')
|
||||
b3 = ((x & 0x3f) | 0x80).to_bytes(1, 'big')
|
||||
b.append(b1 + b2 + b3)
|
||||
else:
|
||||
a = x - 0x10000
|
||||
s1 = ((a >> 10) | 0xd800)
|
||||
s2 = ((a & 0x3ff) | 0xdc00)
|
||||
b1 = ((s1 & 0xf000) >> 12 | 0xe0).to_bytes(1, 'big')
|
||||
b2 = ((s1 & 0xfff) >> 6 | 0x80).to_bytes(1, 'big')
|
||||
b3 = ((s1 & 0x3f) | 0x80).to_bytes(1, 'big')
|
||||
b4 = ((s2 & 0xf000) >> 12 | 0xe0).to_bytes(1, 'big')
|
||||
b5 = ((s2 & 0xfff) >> 6 | 0x80).to_bytes(1, 'big')
|
||||
b6 = ((s2 & 0x3f) | 0x80).to_bytes(1, 'big')
|
||||
b.append(b1 + b2 + b3 + b4 + b5 + b6)
|
||||
return b"".join(b)
|
||||
|
||||
|
||||
class MUTF8String(bytes):
|
||||
def __new__(cls, b):
|
||||
return bytes.__new__(cls, b)
|
||||
|
||||
def __init__(self, b):
|
||||
self.__decoded = None
|
||||
|
||||
@classmethod
|
||||
def from_str(cls, s):
|
||||
try:
|
||||
c = cls(encode(s))
|
||||
except TypeError as e:
|
||||
try:
|
||||
c = cls(s)
|
||||
except:
|
||||
raise e
|
||||
c.__decoded = s
|
||||
return c
|
||||
|
||||
@classmethod
|
||||
def join(cls, data, spacing=b''):
|
||||
return MUTF8String(spacing.join(data))
|
||||
|
||||
def replace(self, old, new, count=None):
|
||||
if count is None:
|
||||
try:
|
||||
return MUTF8String(bytes.replace(self, old, new))
|
||||
except TypeError:
|
||||
return MUTF8String(bytes.replace(self, encode(old), encode(new)))
|
||||
else:
|
||||
try:
|
||||
return MUTF8String(bytes.replace(self, old, new, count))
|
||||
except TypeError:
|
||||
return MUTF8String(bytes.replace(self, encode(old), encode(new), count))
|
||||
|
||||
def find(self, sub):
|
||||
try:
|
||||
return bytes.find(self, sub)
|
||||
except TypeError:
|
||||
return bytes.find(self, encode(sub))
|
||||
|
||||
def split(self, sep=None, maxsplit=-1):
|
||||
try:
|
||||
return [MUTF8String(i) for i in bytes.split(self, sep, maxsplit)]
|
||||
except TypeError:
|
||||
return [MUTF8String(i) for i in bytes.split(self, encode(sep), maxsplit)]
|
||||
|
||||
def rsplit(self, sep=None, maxsplit=-1):
|
||||
try:
|
||||
return [MUTF8String(i) for i in bytes.rsplit(self, sep, maxsplit)]
|
||||
except TypeError:
|
||||
return [MUTF8String(i) for i in bytes.rsplit(self, encode(sep), maxsplit)]
|
||||
|
||||
def lstrip(self, sub):
|
||||
try:
|
||||
return MUTF8String(bytes.lstrip(self, sub))
|
||||
except TypeError:
|
||||
return MUTF8String(bytes.lstrip(self, encode(sub)))
|
||||
|
||||
def startswith(self, sub):
|
||||
try:
|
||||
return bytes.startswith(self, sub)
|
||||
except TypeError:
|
||||
return bytes.startswith(self, encode(sub))
|
||||
|
||||
def __hash__(self):
|
||||
return bytes.__hash__(self)
|
||||
|
||||
def __add__(self, other):
|
||||
try:
|
||||
return MUTF8String(bytes.__add__(self, other))
|
||||
except TypeError:
|
||||
return MUTF8String(bytes.__add__(self, encode(other)))
|
||||
|
||||
def __getitem__(self, key):
|
||||
item = super(MUTF8String, self).__getitem__(key)
|
||||
if isinstance(item, int):
|
||||
return MUTF8String(item.to_bytes(1, byteorder='big'))
|
||||
else:
|
||||
return MUTF8String(item)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
def __str__(self):
|
||||
if not self.__decoded:
|
||||
self.__decoded = decode(self).encode('utf8', errors='backslashreplace').decode('utf8')
|
||||
return self.__decoded
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, bytes):
|
||||
return bytes.__lt__(self, other)
|
||||
elif isinstance(other, str):
|
||||
return bytes.__lt__(self, encode(other))
|
||||
else:
|
||||
return NotImplemented
|
||||
|
||||
def __le__(self, other):
|
||||
if isinstance(other, bytes):
|
||||
return bytes.__le__(self, other)
|
||||
elif isinstance(other, str):
|
||||
return bytes.__le__(self, encode(other))
|
||||
else:
|
||||
return NotImplemented
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, bytes):
|
||||
return bytes.__eq__(self, other)
|
||||
elif isinstance(other, str):
|
||||
return bytes.__eq__(self, encode(other))
|
||||
else:
|
||||
return NotImplemented
|
||||
|
||||
def __ne__(self, other):
|
||||
if isinstance(other, bytes):
|
||||
return bytes.__ne__(self, other)
|
||||
elif isinstance(other, str):
|
||||
return bytes.__ne__(self, encode(other))
|
||||
else:
|
||||
return NotImplemented
|
||||
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, bytes):
|
||||
return bytes.__gt__(self, other)
|
||||
elif isinstance(other, str):
|
||||
return bytes.__gt__(self, encode(other))
|
||||
else:
|
||||
return NotImplemented
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, bytes):
|
||||
return bytes.__ge__(self, other)
|
||||
elif isinstance(other, str):
|
||||
return bytes.__ge__(self, encode(other))
|
||||
else:
|
||||
return NotImplemented
|
||||
decode = decode_modified_utf8
|
||||
encode = encode_modified_utf8
|
@ -8,6 +8,7 @@ import hashlib
|
||||
import re
|
||||
import os
|
||||
import warnings
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def get_default_session():
|
||||
@ -38,10 +39,10 @@ def AnalyzeAPK(_file, session=None, raw=False):
|
||||
:param raw: boolean if raw bytes are supplied instead of a filename
|
||||
:rtype: return the :class:`~androguard.core.bytecodes.apk.APK`, list of :class:`~androguard.core.bytecodes.dvm.DalvikVMFormat`, and :class:`~androguard.core.analysis.analysis.Analysis` objects
|
||||
"""
|
||||
log.debug("AnalyzeAPK")
|
||||
logger.debug("AnalyzeAPK")
|
||||
|
||||
if session:
|
||||
log.debug("Using existing session {}".format(session))
|
||||
logger.debug("Using existing session {}".format(session))
|
||||
if raw:
|
||||
data = _file
|
||||
filename = hashlib.md5(_file).hexdigest()
|
||||
@ -53,8 +54,8 @@ def AnalyzeAPK(_file, session=None, raw=False):
|
||||
digest = session.add(filename, data)
|
||||
return session.get_objects_apk(filename, digest)
|
||||
else:
|
||||
log.debug("Analysing without session")
|
||||
a = APK(_file, raw=raw)
|
||||
logger.debug("Analysing without session")
|
||||
a = apk.APK(_file, raw=raw)
|
||||
# FIXME: probably it is not necessary to keep all DalvikVMFormats, as
|
||||
# they are already part of Analysis. But when using sessions, it works
|
||||
# this way...
|
||||
@ -82,7 +83,7 @@ def AnalyzeDex(filename, session=None, raw=False):
|
||||
|
||||
:rtype: return a tuple of (sha256hash, :class:`DalvikVMFormat`, :class:`Analysis`)
|
||||
"""
|
||||
log.debug("AnalyzeDex")
|
||||
logger.debug("AnalyzeDex")
|
||||
|
||||
if not session:
|
||||
session = get_default_session()
|
||||
@ -107,7 +108,7 @@ def AnalyzeODex(filename, session=None, raw=False):
|
||||
|
||||
:rtype: return a tuple of (sha256hash, :class:`DalvikOdexVMFormat`, :class:`Analysis`)
|
||||
"""
|
||||
log.debug("AnalyzeODex")
|
||||
logger.debug("AnalyzeODex")
|
||||
|
||||
if not session:
|
||||
session = get_default_session()
|
||||
@ -133,7 +134,7 @@ def RunDecompiler(d, dx, decompiler_name):
|
||||
:type decompiler: string
|
||||
"""
|
||||
if decompiler_name is not None:
|
||||
log.debug("Decompiler ...")
|
||||
logger.debug("Decompiler ...")
|
||||
decompiler_name = decompiler_name.lower()
|
||||
# TODO put this into the configuration object and make it more dynamic
|
||||
# e.g. detect new decompilers and so on...
|
||||
|
@ -1,5 +1,4 @@
|
||||
# External dependecies
|
||||
from loguru import logger
|
||||
import asn1crypto
|
||||
|
||||
# Stuff that might be useful
|
||||
|
0
androguard_session_2022-07-08_105521.ag
Normal file
0
androguard_session_2022-07-08_105521.ag
Normal file
0
androguard_session_2022-07-08_112803.ag
Normal file
0
androguard_session_2022-07-08_112803.ag
Normal file
86
cli.py
86
cli.py
@ -20,7 +20,6 @@ import androguard
|
||||
from androguard.core.androconf import show_logging
|
||||
from main import (androarsc_main,
|
||||
androaxml_main,
|
||||
androcg_main,
|
||||
export_apps_to_format,
|
||||
androsign_main,
|
||||
androlyze_main,
|
||||
@ -241,91 +240,6 @@ def arsc(input_,
|
||||
locale=locale)
|
||||
|
||||
|
||||
@entry_point.command()
|
||||
@click.option(
|
||||
'--output', '-o',
|
||||
default="callgraph.gml", show_default=True,
|
||||
help='Filename of the output file, the extension is used to decide which '
|
||||
'format to use',
|
||||
)
|
||||
@click.option(
|
||||
'--show', '-s',
|
||||
is_flag=True,
|
||||
help='instead of saving the graph, print it with mathplotlib '
|
||||
'(you might not see anything!)',
|
||||
)
|
||||
@click.option(
|
||||
'--verbose', '-v', is_flag=True,
|
||||
default=False,
|
||||
help='Print more output',
|
||||
)
|
||||
@click.option(
|
||||
'--classname',
|
||||
default='.*', show_default=True,
|
||||
help='Regex to filter by classname',
|
||||
)
|
||||
@click.option(
|
||||
'--methodname',
|
||||
default='.*', show_default=True,
|
||||
help='Regex to filter by methodname',
|
||||
)
|
||||
@click.option(
|
||||
'--descriptor',
|
||||
default='.*', show_default=True,
|
||||
help='Regex to filter by descriptor',
|
||||
)
|
||||
@click.option(
|
||||
'--accessflag',
|
||||
default='.*', show_default=True,
|
||||
help='Regex to filter by accessflags',
|
||||
)
|
||||
@click.option(
|
||||
'--no-isolated/--isolated',
|
||||
default=False,
|
||||
help='Do not store methods which has no xrefs',
|
||||
)
|
||||
@click.argument(
|
||||
'APK',
|
||||
nargs=1,
|
||||
type=click.Path(exists=True,
|
||||
file_okay=True,
|
||||
dir_okay=False,
|
||||
readable=True,
|
||||
allow_dash=False),
|
||||
)
|
||||
def cg(output,
|
||||
show,
|
||||
verbose,
|
||||
classname,
|
||||
methodname,
|
||||
descriptor,
|
||||
accessflag,
|
||||
no_isolated,
|
||||
apk):
|
||||
"""
|
||||
Create a call graph and export it into a graph format.
|
||||
|
||||
The default is to create a file called callgraph.gml in the current
|
||||
directory!
|
||||
|
||||
classnames are found in the type "Lfoo/bar/bla;".
|
||||
|
||||
Example:
|
||||
|
||||
\b
|
||||
$ androguard cg examples/tests/hello-world.apk
|
||||
"""
|
||||
androcg_main(verbose=verbose,
|
||||
APK=apk,
|
||||
classname=classname,
|
||||
methodname=methodname,
|
||||
descriptor=descriptor,
|
||||
accessflag=accessflag,
|
||||
no_isolated=no_isolated,
|
||||
show=show,
|
||||
output=output)
|
||||
|
||||
|
||||
@entry_point.command()
|
||||
@click.option(
|
||||
'--input', '-i', 'input_',
|
||||
|
100
main.py
100
main.py
@ -74,104 +74,6 @@ def androarsc_main(arscobj, outp=None, package=None, typ=None, locale=None):
|
||||
sys.stdout.write(highlight(buff.decode("UTF-8"), get_lexer_by_name("xml"), TerminalFormatter()))
|
||||
|
||||
|
||||
def androcg_main(verbose,
|
||||
APK,
|
||||
classname,
|
||||
methodname,
|
||||
descriptor,
|
||||
accessflag,
|
||||
no_isolated,
|
||||
show,
|
||||
output):
|
||||
from androguard.core.androconf import show_logging
|
||||
from androguard.core.bytecode import FormatClassToJava
|
||||
from androguard.misc import AnalyzeAPK
|
||||
import networkx as nx
|
||||
import logging
|
||||
log = logging.getLogger("androcfg")
|
||||
if verbose:
|
||||
show_logging(logging.INFO)
|
||||
|
||||
a, d, dx = AnalyzeAPK(APK)
|
||||
|
||||
entry_points = map(FormatClassToJava,
|
||||
a.get_activities() + a.get_providers() +
|
||||
a.get_services() + a.get_receivers())
|
||||
entry_points = list(entry_points)
|
||||
|
||||
log.info("Found The following entry points by search AndroidManifest.xml: "
|
||||
"{}".format(entry_points))
|
||||
|
||||
CG = dx.get_call_graph(classname,
|
||||
methodname,
|
||||
descriptor,
|
||||
accessflag,
|
||||
no_isolated,
|
||||
entry_points,
|
||||
)
|
||||
|
||||
def write_yaml(G_to_be_yaml, path_for_yaml_output, **kwds):
|
||||
yaml.dump(G_to_be_yaml, path_for_yaml_output, **kwds)
|
||||
|
||||
write_methods = dict(gml=_write_gml,
|
||||
gexf=nx.write_gexf,
|
||||
gpickle=nx.write_gpickle,
|
||||
graphml=nx.write_graphml,
|
||||
yaml=write_yaml,
|
||||
net=nx.write_pajek,
|
||||
)
|
||||
|
||||
if show:
|
||||
plot(CG)
|
||||
else:
|
||||
writer = output.rsplit(".", 1)[1]
|
||||
if writer in ["bz2", "gz"]:
|
||||
writer = output.rsplit(".", 2)[1]
|
||||
if writer not in write_methods:
|
||||
print("Could not find a method to export files to {}!"
|
||||
.format(writer))
|
||||
sys.exit(1)
|
||||
|
||||
write_methods[writer](CG, output)
|
||||
|
||||
|
||||
def plot(cg):
|
||||
"""
|
||||
Plot the call graph using matplotlib
|
||||
For larger graphs, this should not be used, as it is very slow
|
||||
and probably you can not see anything on it.
|
||||
|
||||
:param cg: A networkx call graph to plot
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
import networkx as nx
|
||||
pos = nx.spring_layout(cg)
|
||||
|
||||
internal = []
|
||||
external = []
|
||||
|
||||
for n in cg.nodes:
|
||||
if n.is_external():
|
||||
external.append(n)
|
||||
else:
|
||||
internal.append(n)
|
||||
|
||||
nx.draw_networkx_nodes(cg, pos=pos, node_color='r', nodelist=internal)
|
||||
nx.draw_networkx_nodes(cg, pos=pos, node_color='b', nodelist=external)
|
||||
nx.draw_networkx_edges(cg, pos, arrows=True)
|
||||
nx.draw_networkx_labels(cg, pos=pos, labels={x: "{}{}".format(x.class_name, x.name) for x in cg.nodes})
|
||||
plt.draw()
|
||||
plt.show()
|
||||
|
||||
|
||||
def _write_gml(G, path):
|
||||
"""
|
||||
Wrapper around nx.write_gml
|
||||
"""
|
||||
import networkx as nx
|
||||
return nx.write_gml(G, path, stringizer=str)
|
||||
|
||||
|
||||
def export_apps_to_format(filename,
|
||||
s,
|
||||
output,
|
||||
@ -530,4 +432,4 @@ def androdis_main(offset, size, dex_file):
|
||||
i.show(idx)
|
||||
print()
|
||||
|
||||
idx += i.get_length()
|
||||
idx += i.get_length()
|
@ -1,4 +1,3 @@
|
||||
networkx>=2.2
|
||||
pygments>=2.3.1
|
||||
lxml>=4.3.0
|
||||
colorama>=0.4.1
|
||||
@ -8,3 +7,7 @@ click>=7.0
|
||||
pydot>=1.4.1
|
||||
ipython>=5.0.0
|
||||
pyyaml
|
||||
fastapi
|
||||
uvicorn
|
||||
loguru
|
||||
mutf8
|
BIN
tests/data/StringTests.dex
Normal file
BIN
tests/data/StringTests.dex
Normal file
Binary file not shown.
27
tests/data/StringTests.java.txt
Normal file
27
tests/data/StringTests.java.txt
Normal file
@ -0,0 +1,27 @@
|
||||
public class StringTests{
|
||||
public static void main(String... args){
|
||||
|
||||
String a = "this is a quite normal string";
|
||||
String b = "\u0000 \u0001 \u1234";
|
||||
String c = "使用在線工具將字符串翻譯為中文";
|
||||
String d = "перевод строки на русский с помощью онлайн-инструментов";
|
||||
String e = "온라인 도구를 사용하여 문자열을 한국어로 번역";
|
||||
String f = "オンラインツールを使用して文字列を日本語に翻訳";
|
||||
String g = "This is \ud83d\ude4f, an emoji.";
|
||||
String h = "\u2713 check this string";
|
||||
String i = "\uFFFF \u0000 \uFF00";
|
||||
String j = "\u0420\u043e\u0441\u0441\u0438\u044f";
|
||||
|
||||
System.out.println(a);
|
||||
System.out.println(b);
|
||||
System.out.println(c);
|
||||
System.out.println(d);
|
||||
System.out.println(d);
|
||||
System.out.println(f);
|
||||
System.out.println(g);
|
||||
System.out.println(h);
|
||||
System.out.println(i);
|
||||
System.out.println(j);
|
||||
}
|
||||
|
||||
}
|
@ -1,4 +1,9 @@
|
||||
from androguard.core.bytecodes.dvm import DalvikVMFormat
|
||||
import sys
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from androguard.core import dex
|
||||
|
||||
from binascii import hexlify
|
||||
import parse_dex
|
||||
import unittest
|
||||
@ -15,7 +20,7 @@ class TestDexCodeParsing(unittest.TestCase):
|
||||
parsed = parse_dex.read_dex(fname)
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
d = DalvikVMFormat(f.read())
|
||||
d = dex.DEX(f.read())
|
||||
|
||||
dif = Differ()
|
||||
|
||||
@ -52,7 +57,7 @@ class TestDexCodeParsing(unittest.TestCase):
|
||||
parsed = parse_dex.read_dex(fname)
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
d = DalvikVMFormat(f.read())
|
||||
d = dex.DEX(f.read())
|
||||
|
||||
cm = d.get_class_manager()
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
import unittest
|
||||
|
||||
import sys
|
||||
from androguard.core.bytecodes.apk import APK
|
||||
from androguard.core.apk import APK
|
||||
|
||||
from androguard import session
|
||||
|
||||
|
@ -3,15 +3,17 @@ import unittest
|
||||
|
||||
import sys
|
||||
|
||||
from androguard.core import mutf8
|
||||
from androguard.core.bytecodes import dvm
|
||||
from androguard.core.analysis import analysis
|
||||
sys.path.append(".")
|
||||
|
||||
from androguard.core import mutf8
|
||||
from androguard.core import dex
|
||||
from androguard.core import analysis
|
||||
|
||||
|
||||
class StringTest(unittest.TestCase):
|
||||
def testDex(self):
|
||||
with open("examples/tests/StringTests.dex", "rb") as fd:
|
||||
d = dvm.DalvikVMFormat(fd.read())
|
||||
with open("tests/data/StringTests.dex", "rb") as fd:
|
||||
d = dex.DEX(fd.read())
|
||||
|
||||
stests = ["this is a quite normal string",
|
||||
"\u0000 \u0001 \u1234",
|
||||
@ -28,18 +30,18 @@ class StringTest(unittest.TestCase):
|
||||
self.assertIn(s, d.get_strings())
|
||||
|
||||
def testMUTF8(self):
|
||||
# self.assertEqual("\x67", mutf8.decode(b"\x67"))
|
||||
self.assertEqual("\x67", mutf8.decode(b"\x67"))
|
||||
# # Null byte
|
||||
# self.assertEqual("\x00", mutf8.decode(b"\xc0\x80"))
|
||||
# self.assertEqual("\uacf0", mutf8.decode(b"\xea\xb3\xb0"))
|
||||
self.assertEqual("\x00", mutf8.decode(b"\xc0\x80"))
|
||||
self.assertEqual("\uacf0", mutf8.decode(b"\xea\xb3\xb0"))
|
||||
# # Surrogates
|
||||
# self.assertEqual("\ud83d\ude4f", mutf8.decode(b"\xed\xa0\xbd\xed\xb9\x8f"))
|
||||
# self.assertEqual("\ud853\udf5c", mutf8.decode(b"\xed\xa1\x93\xed\xbd\x9c"))
|
||||
self.assertEqual("\ud83d\ude4f", mutf8.decode(b"\xed\xa0\xbd\xed\xb9\x8f"))
|
||||
self.assertEqual("\ud853\udf5c", mutf8.decode(b"\xed\xa1\x93\xed\xbd\x9c"))
|
||||
# # Lonely surrogates
|
||||
# self.assertEqual("\ud853", mutf8.decode(b"\xed\xa1\x93"))
|
||||
# self.assertEqual("\udf5c", mutf8.decode(b"\xed\xbd\x9c"))
|
||||
self.assertEqual("\ud853", mutf8.decode(b"\xed\xa1\x93"))
|
||||
self.assertEqual("\udf5c", mutf8.decode(b"\xed\xbd\x9c"))
|
||||
# # Normal ASCII String
|
||||
# self.assertEqual("hello world", mutf8.decode(b"\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64"))
|
||||
self.assertEqual("hello world", mutf8.decode(b"\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64"))
|
||||
|
||||
# Testing decode
|
||||
|
||||
@ -64,8 +66,5 @@ class StringTest(unittest.TestCase):
|
||||
self.assertEqual(b"\xed\xa1\x93", mutf8.encode("\ud853"))
|
||||
self.assertEqual(b, mutf8.encode("\U00024f5c\U0001f64f\ud83d\uacf0hello world\x00"))
|
||||
|
||||
self.assertEqual(mutf8.MUTF8String(b), mutf8.MUTF8String.from_str("\U00024f5c\U0001f64f\ud83d\uacf0hello world\x00"))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user