From baaa7f81d0c0ea47f67d5eec23af88328bfa4c0d Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jul 2013 19:32:14 +0200 Subject: [PATCH] marshal disassembly improvement --- uncompyle2/__init__.py | 21 +++---- uncompyle2/disas.py | 120 ++++++++++++++++++++++++++++++++++++++-- uncompyle2/magics.py | 22 +++++++- uncompyle2/scanner27.py | 2 +- uncompyle2/walker.py | 2 +- 5 files changed, 146 insertions(+), 21 deletions(-) diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index cad32257..af86852d 100755 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -29,6 +29,8 @@ import sys, types, os import walker, verify, magics +import disas as dis +import marshal sys.setrecursionlimit(5000) __all__ = ['uncompyle_file', 'main'] @@ -65,7 +67,6 @@ def _load_module(filename): code_object: code_object from this file ''' - import magics, marshal fp = open(filename, 'rb') magic = fp.read(4) try: @@ -76,7 +77,7 @@ def _load_module(filename): raise ImportError, "This is a Python %s file! Only Python 2.5 to 2.7 files are supported." % version #print version fp.read(4) # timestamp - co = marshal.load(fp) + co = marshal.load(fp) #dis.marshalLoad(fp) fp.close() return version, co @@ -90,7 +91,7 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): # store final output stream for case of error __real_out = out or sys.stdout if co.co_filename: - print >>__real_out, '#Embedded file name: %s' % co.co_filename + print >>__real_out, '# Embedded file name: %s' % co.co_filename # diff scanner if version == 2.7: import scanner27 as scan @@ -101,11 +102,9 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): elif version == 2.5: import scanner25 as scan scanner = scan.Scanner25() - scanner.setShowAsm(showasm, out) tokens, customize = scanner.disassemble(co) - #sys.exit(0) # Build AST from disassembly. walk = walker.Walker(out, scanner, showast=showast) try: @@ -113,7 +112,6 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): except walker.ParserError, e : # parser failed, dump disassembly print >>__real_out, e raise - del tokens # save memory # convert leading '__doc__ = "..." into doc string @@ -185,11 +183,11 @@ def main(in_base, out_base, files, codes, outfile=None, of = outfile tot_files = okay_files = failed_files = verify_failed_files = 0 - for code in codes: - version = sys.version[:3] # "2.5" - with open(code, "r") as f: - co = compile(f.read(), "", "exec") - uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast) + #for code in codes: + # version = sys.version[:3] # "2.5" + # with open(code, "r") as f: + # co = compile(f.read(), "", "exec") + # uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast) for file in files: infile = os.path.join(in_base, file) @@ -223,7 +221,6 @@ def main(in_base, out_base, files, codes, outfile=None, sys.stderr.write("\n# Can't uncompyle %s\n" % infile) import traceback traceback.print_exc() - #raise else: # uncompyle successfull if outfile: outstream.close() diff --git a/uncompyle2/disas.py b/uncompyle2/disas.py index 74f43d2f..fc0a272d 100755 --- a/uncompyle2/disas.py +++ b/uncompyle2/disas.py @@ -2,14 +2,15 @@ import sys import types +from struct import unpack +import marshal, pickle _have_code = (types.MethodType, types.FunctionType, types.CodeType, types.ClassType, type) +internStrings = [] def dis(x=None): """Disassemble classes, methods, functions, or code. - With no argument, disassemble the last traceback. - """ if x is None: distb() @@ -144,9 +145,7 @@ disco = disassemble def findlabels(code): """Detect all offsets in a byte code which are jump targets. - Return the list of offsets. - """ labels = [] n = len(code) @@ -170,9 +169,7 @@ def findlabels(code): def findlinestarts(code): """Find the offsets in a byte code which are start of lines in the source. - Generate pairs (offset, lineno) as described in Python/compile.c. - """ byte_increments = [ord(c) for c in code.co_lnotab[0::2]] line_increments = [ord(c) for c in code.co_lnotab[1::2]] @@ -190,6 +187,117 @@ def findlinestarts(code): if lineno != lastlineno: yield (addr, lineno) +def marshalLoad(fp): + global internStrings + internStrings = [] + return load(fp) + +def load(fp): + """ + Load marshal + """ + global internStrings + + marshalType = fp.read(1) + if marshalType == 'c': + Code = types.CodeType + + co_argcount = unpack('l', fp.read(4))[0] + co_nlocals = unpack('l', fp.read(4))[0] + co_stacksize = unpack('l', fp.read(4))[0] + co_flags = unpack('l', fp.read(4))[0] + co_code = load(fp) + co_consts = load(fp) + co_names = load(fp) + co_varnames = load(fp) + co_freevars = load(fp) + co_cellvars = load(fp) + co_filename = load(fp) + co_name = load(fp) + co_firstlineno = unpack('l', fp.read(4))[0] + co_lnotab = load(fp) + return Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code, co_consts, co_names,\ + co_varnames, co_filename, co_name, co_firstlineno, co_lnotab, co_freevars, co_cellvars) + + # const type + elif marshalType == '.': + return Ellipsis + elif marshalType == '0': + raise KeyError, marshalType + return None + elif marshalType == 'N': + return None + elif marshalType == 'T': + return True + elif marshalType == 'F': + return False + elif marshalType == 'S': + return StopIteration + # number type + elif marshalType == 'f': + n = fp.read(1) + return float(unpack('d', fp.read(n))[0]) + elif marshalType == 'g': + return float(unpack('d', fp.read(8))[0]) + elif marshalType == 'i': + return int(unpack('l', fp.read(4))[0]) + elif marshalType == 'I': + raise KeyError, marshalType + return None + elif marshalType == 'x': + raise KeyError, marshalType + return None + elif marshalType == 'y': + raise KeyError, marshalType + return None + elif marshalType == 'l': + n = unpack('l', fp.read(4))[0] + if n == 0: + return long(0) + ratio = 2 #2 for 64bit 1 for 32bit + size = abs(n); + d = long(0) + for j in range(0, size): + md = int(unpack('h', fp.read(2))[0]) + d += md << j*15; + if n < 0: + return long(d*-1) + return d + # strings type + elif marshalType == 'R': + refnum = unpack('l', fp.read(4))[0] + return internStrings[refnum] + elif marshalType == 's': + strsize = unpack('l', fp.read(4))[0] + return str(fp.read(strsize)) + elif marshalType == 't': + strsize = unpack('l', fp.read(4))[0] + interned = str(fp.read(strsize)) + internStrings.append(interned) + return interned + elif marshalType == 'u': + strsize = unpack('l', fp.read(4))[0] + return unicode(fp.read(strsize)) + # collection type + elif marshalType == '(': + tuplesize = unpack('l', fp.read(4))[0] + ret = tuple() + while tuplesize > 0: + ret += load(fp), + tuplesize -= 1 + return ret + elif marshalType == '[': + raise KeyError, marshalType + return None + elif marshalType == '{': + raise KeyError, marshalType + return None + elif marshalType in ['<', '>']: + raise KeyError, marshalType + return None + else: + sys.stderr.write("Unkown type %i (hex %x)\n" % (ord(marshalType), ord(marshalType))) + def _test(): """Simple test program to disassemble a file.""" if sys.argv[1:]: diff --git a/uncompyle2/magics.py b/uncompyle2/magics.py index 74177b7a..cd6493c1 100755 --- a/uncompyle2/magics.py +++ b/uncompyle2/magics.py @@ -40,7 +40,27 @@ versions = { # introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) __build_magic(62191): '2.7', #2.7a0 (introduce SETUP_WITH) __build_magic(62201): '2.7', #2.7a0 (introduce BUILD_SET) - __build_magic(62211): '2.7' #2.7a0 (introduce MAP_ADD and SET_ADD) + __build_magic(62211): '2.7', #2.7a0 (introduce MAP_ADD and SET_ADD) + __build_magic(3000): '3.0', #3.000 + __build_magic(3010): '3.0', #3.000 (removed UNARY_CONVERT) + __build_magic(3020): '3.0', #3.000 (added BUILD_SET) + __build_magic(3030): '3.0', #3.000 (added keyword-only parameters) + __build_magic(3040): '3.0', #3.000 (added signature annotations) + __build_magic(3050): '3.0', #3.000 (print becomes a function) + __build_magic(3060): '3.0', #3.000 (PEP 3115 metaclass syntax) + __build_magic(3061): '3.0', #3.000 (string literals become unicode) + __build_magic(3071): '3.0', #3.000 (PEP 3109 raise changes) + __build_magic(3081): '3.0', #3.000 (PEP 3137 make __file__ and __name__ unicode) + __build_magic(3091): '3.0', #3.000 (kill str8 interning) + __build_magic(3101): '3.0', #3.000 (merge from 2.6a0, see 62151) + __build_magic(3103): '3.0', #3.000 (__file__ points to source file) + __build_magic(3111): '3.0', #3.0a4 (WITH_CLEANUP optimization). + __build_magic(3131): '3.0', #3.0a5 (lexical exception stacking, including POP_EXCEPT) + __build_magic(3141): '3.1', #3.1a0 (optimize list, set and dict comprehensions) + __build_magic(3151): '3.1', #3.1a0 (optimize conditional branches) + __build_magic(3160): '3.2', #3.2a0 (add SETUP_WITH) + __build_magic(3170): '3.2', #3.2a1 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR) + __build_magic(3180): '3.2', #3.2a2 (add DELETE_DEREF) } magics = __by_version(versions) diff --git a/uncompyle2/scanner27.py b/uncompyle2/scanner27.py index cba4f915..9481cc90 100755 --- a/uncompyle2/scanner27.py +++ b/uncompyle2/scanner27.py @@ -17,7 +17,7 @@ import scanner as scan class Scanner27(scan.Scanner): def __init__(self): - self.Token = scan.Scanner.__init__(self, 2.6) + self.Token = scan.Scanner.__init__(self, 2.7) # check def disassemble(self, co, classname=None): ''' diff --git a/uncompyle2/walker.py b/uncompyle2/walker.py index 81fda68b..1ebb8563 100755 --- a/uncompyle2/walker.py +++ b/uncompyle2/walker.py @@ -1247,7 +1247,7 @@ class Walker(GenericASTTraversal, object): #if node[1][0] not in ('unpack', 'unpack_list'): return '(' + self.traverse(node[1]) + ')' #return self.traverse(node[1]) - raise "Can't find tuple parameter" % name + raise Exception("Can't find tuple parameter " + name) def make_function(self, node, isLambda, nested=1):