Improve Python 1.x decompiling

Still has bugs, but is much better.
This commit is contained in:
rocky 2022-04-30 05:00:49 -04:00
parent 5e1ba2baa1
commit eba0d37d0f
10 changed files with 267 additions and 24 deletions

View File

@ -0,0 +1,8 @@
from test_support import *
print '4. Built-in functions'
print 'test_b1'
unload('test_b1')
import test_b1
print 'test_b2'
unload('test_b2')
import test_b2

View File

@ -1,4 +1,4 @@
# Copyright (c) 2015-2016, 2818-2021 by Rocky Bernstein
# Copyright (c) 2015-2016, 2818-2022 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
@ -60,11 +60,18 @@ def disco_loop(disasm, queue, real_out):
while len(queue) > 0:
co = queue.popleft()
if co.co_name != "<module>":
print(
"\n# %s line %d of %s"
% (co.co_name, co.co_firstlineno, co.co_filename),
file=real_out,
)
if hasattr(co, "co_firstlineno"):
print(
"\n# %s line %d of %s"
% (co.co_name, co.co_firstlineno, co.co_filename),
file=real_out,
)
else:
print(
"\n# %s of %s"
% (co.co_name, co.co_filename),
file=real_out,
)
tokens, customize = disasm(co)
for t in tokens:
if iscode(t.pattr):

View File

@ -75,6 +75,8 @@ class PythonParser(GenericASTBuilder):
"come_from_loops",
# Python 3.7+
"importlist37",
# Python < 1.4
"args_store",
]
self.collect = frozenset(nt_list)

View File

@ -1,4 +1,4 @@
# Copyright (c) 2018 Rocky Bernstein
# Copyright (c) 2018, 2022 Rocky Bernstein
from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.parser import PythonParserSingle
@ -8,11 +8,24 @@ class Python14Parser(Python15Parser):
def p_misc14(self, args):
"""
# Not much here yet, but will probably need to add UNARY_CALL, BINARY_CALL,
# RAISE_EXCEPTION, BUILD_FUNCTION, UNPACK_ARG, UNPACK_VARARG, LOAD_LOCAL,
# SET_FUNC_ARGS, and RESERVE_FAST
# Not much here yet, but will probably need to add UNARY_CALL,
# LOAD_LOCAL, SET_FUNC_ARGS
call ::= expr tuple BINARY_CALL
expr ::= call
kv ::= DUP_TOP expr ROT_TWO LOAD_CONST STORE_SUBSCR
mkfunc ::= LOAD_CODE BUILD_FUNCTION
print_expr_stmt ::= expr PRINT_EXPR
raise_stmt2 ::= expr expr RAISE_EXCEPTION
star_args ::= RESERVE_FAST UNPACK_VARARG_1 args_store
args ::= RESERVE_FAST UNPACK_ARG args_store
stmt ::= print_expr_stmt
args_store ::= STORE_FAST+
stmt ::= args
stmt ::= star_args
# Not strictly needed, but tidies up output
stmt ::= doc_junk
doc_junk ::= LOAD_CONST POP_TOP

View File

@ -428,6 +428,7 @@ class Python26Parser(Python2Parser):
# since the operand can be a relative offset rather than
# an absolute offset.
setup_inst = self.insts[self.offset2inst_index[tokens[first].offset]]
last = min(len(tokens)-1, last)
if self.version <= (2, 2) and tokens[last] == "COME_FROM":
last += 1
return tokens[last-1].off2int() > setup_inst.argval

View File

@ -228,7 +228,8 @@ class Scanner26(scan.Scanner2):
elif op in self.opc.JABS_OPS:
pattr = repr(oparg)
elif op in self.opc.LOCAL_OPS:
pattr = varnames[oparg]
if oparg in varnames:
pattr = varnames[oparg]
elif op in self.opc.COMPARE_OPS:
pattr = self.opc.cmp_op[oparg]
elif op in self.opc.FREE_OPS:

View File

@ -199,6 +199,9 @@ TABLE_DIRECT = {
"BINARY_AND": ("&",),
"BINARY_OR": ("|",),
"BINARY_XOR": ("^",),
"DELETE_FAST": ("%|del %{pattr}\n",),
"DELETE_NAME": ("%|del %{pattr}\n",),
"DELETE_GLOBAL": ("%|del %{pattr}\n",),
"INPLACE_ADD": ("+=",),
"INPLACE_SUBTRACT": ("-=",),
"INPLACE_MULTIPLY": ("*=",),
@ -215,8 +218,6 @@ TABLE_DIRECT = {
"INPLACE_XOR": ("^=",),
# bin_op (formerly "binary_expr") is the Python AST BinOp
"bin_op": ("%c %c %c", 0, (-1, "binary_operator"), (1, "expr")),
"UNARY_POSITIVE": ("+",),
"UNARY_NEGATIVE": ("-",),
"UNARY_INVERT": ("~"),
# unary_op (formerly "unary_expr") is the Python AST UnaryOp
"unary_op": ("%c%c", (1, "unary_operator"), (0, "expr")),
@ -238,9 +239,6 @@ TABLE_DIRECT = {
"LOAD_DEREF": ("%{pattr}",),
"LOAD_LOCALS": ("locals()",),
"LOAD_ASSERT": ("%{pattr}",),
"DELETE_FAST": ("%|del %{pattr}\n",),
"DELETE_NAME": ("%|del %{pattr}\n",),
"DELETE_GLOBAL": ("%|del %{pattr}\n",),
"delete_subscript": (
"%|del %p[%c]\n",
(0, "expr", PRECEDENCE["subscript"]),
@ -264,6 +262,8 @@ TABLE_DIRECT = {
"STORE_NAME": ("%{pattr}",),
"STORE_GLOBAL": ("%{pattr}",),
"STORE_DEREF": ("%{pattr}",),
"UNARY_POSITIVE": ("+",),
"UNARY_NEGATIVE": ("-",),
"unpack": ("%C%,", (1, maxint, ", ")),
# This nonterminal we create on the fly in semantic routines
"unpack_w_parens": ("(%C%,)", (1, maxint, ", ")),

View File

@ -1,4 +1,4 @@
# Copyright (c) 2018-2019, 2021 by Rocky Bernstein
# Copyright (c) 2018-2019, 2021-2022 by Rocky Bernstein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -17,7 +17,7 @@
"""
from uncompyle6.parsers.treenode import SyntaxTree
from uncompyle6.semantics.consts import INDENT_PER_LEVEL, PRECEDENCE, TABLE_R, TABLE_DIRECT
from uncompyle6.semantics.consts import INDENT_PER_LEVEL, NO_PARENTHESIS_EVER, PRECEDENCE, TABLE_R, TABLE_DIRECT
from uncompyle6.semantics.helper import flatten_list
from uncompyle6.scanners.tok import Token
@ -47,7 +47,7 @@ def customize_for_version(self, is_pypy, version):
if version[:2] >= (3, 7):
def n_call_kw_pypy37(node):
self.template_engine(("%p(", (0, 100)), node)
self.template_engine(("%p(", (0, NO_PARENTHESIS_EVER)), node)
assert node[-1] == "CALL_METHOD_KW"
arg_count = node[-1].attr
kw_names = node[-2]
@ -193,7 +193,26 @@ def customize_for_version(self, is_pypy, version):
self.prune()
self.n_iftrue_stmt24 = n_iftrue_stmt24
else: # version <= 2.3:
elif version <= (1, 4):
TABLE_DIRECT.update(
{
"call": (
"%p(%P)",
(0, "expr", 100), (1,-1,", ")
),
"print_expr_stmt": (
("%|print %c,\n", 0)
),
}
)
# FIXME: figure out how to handle LOAD_FAST
# it uses code.names
# def n_LOAD_FAST(node):
# pass
# self.n_LOAD_FAST = n_LOAD_FAST
else: # 1.0 <= version <= 2.3:
TABLE_DIRECT.update({"if1_stmt": ("%|if 1\n%+%c%-", 5)})
if version <= (2, 1):
TABLE_DIRECT.update(

View File

@ -0,0 +1,191 @@
# Copyright (c) 2015-2022 by Rocky Bernstein
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
All the crazy things we have to do to handle Python functions in Python before 3.0.
The saga of changes continues in 3.0 and above and in other files.
"""
from typing import List, Tuple
from uncompyle6.scanner import Code
from uncompyle6.semantics.parser_error import ParserError
from uncompyle6.parser import ParserError as ParserError2
from uncompyle6.semantics.helper import (
print_docstring,
find_all_globals,
find_globals_and_nonlocals,
find_none,
)
from xdis import iscode
def make_function1(self, node, is_lambda, nested=1, code_node=None):
"""
Dump function defintion, doc string, and function body.
This code is specialied for Python 2.
"""
def build_param(tree, param_names: List[str]) -> Tuple[bool, List[str]]:
"""build parameters:
- handle defaults
- handle format tuple parameters
"""
# if formal parameter is a tuple, the paramater name
# starts with a dot (eg. '.1', '.2')
args = tree[0]
del tree[0]
params = []
assert args.kind in ("star_args", "args")
has_star_arg = args.kind == "star_args"
args_store = args[2]
assert args_store == "args_store"
for arg in args_store:
params.append(param_names[arg.attr])
return has_star_arg, params
# MAKE_FUNCTION_... or MAKE_CLOSURE_...
assert node[-1].kind.startswith("BUILD_")
defparams = []
# args_node = node[-1]
# if isinstance(args_node.attr, tuple):
# # positional args are after kwargs
# defparams = node[1 : args_node.attr[0] + 1]
# pos_args, kw_args, annotate_argc = args_node.attr
# else:
# defparams = node[: args_node.attr]
# kw_args = 0
# pass
lambda_index = None
if lambda_index and is_lambda and iscode(node[lambda_index].attr):
assert node[lambda_index].kind == "LOAD_LAMBDA"
code = node[lambda_index].attr
else:
code = code_node.attr
assert iscode(code)
code = Code(code, self.scanner, self.currentclass)
# add defaults values to parameter names
argc = code.co_argcount
paramnames = list(code.co_varnames[:argc])
# defaults are for last n parameters, thus reverse
paramnames.reverse()
defparams.reverse()
try:
tree = self.build_ast(
code._tokens,
code._customize,
code,
is_lambda=is_lambda,
noneInNames=("None" in code.co_names),
)
except (ParserError, ParserError2) as p:
self.write(str(p))
if not self.tolerate_errors:
self.ERROR = p
return
indent = self.indent
# build parameters
has_star_arg, params = build_param(tree, code.co_names)
if has_star_arg:
params[-1] = "*" + params[-1]
# dump parameter list (with default values)
if is_lambda:
self.write("lambda ", ", ".join(params))
# If the last statement is None (which is the
# same thing as "return None" in a lambda) and the
# next to last statement is a "yield". Then we want to
# drop the (return) None since that was just put there
# to have something to after the yield finishes.
# FIXME: this is a bit hoaky and not general
if (
len(ast) > 1
and self.traverse(ast[-1]) == "None"
and self.traverse(ast[-2]).strip().startswith("yield")
):
del ast[-1]
# Now pick out the expr part of the last statement
ast_expr = ast[-1]
while ast_expr.kind != "expr":
ast_expr = ast_expr[0]
ast[-1] = ast_expr
pass
else:
self.write("(", ", ".join(params))
# if kw_args > 0:
# if not (4 & code.co_flags):
# if argc > 0:
# self.write(", *, ")
# else:
# self.write("*, ")
# pass
# else:
# self.write(", ")
# for n in node:
# if n == "pos_arg":
# continue
# else:
# self.preorder(n)
# break
# pass
# if code_has_star_star_arg(code):
# if argc > 0:
# self.write(", ")
# self.write("**%s" % code.co_varnames[argc + kw_pairs])
if is_lambda:
self.write(": ")
else:
self.println("):")
if (
len(code.co_consts) > 0 and code.co_consts[0] is not None and not is_lambda
): # ugly
# docstring exists, dump it
print_docstring(self, indent, code.co_consts[0])
if not is_lambda:
assert tree == "stmts"
all_globals = find_all_globals(tree, set())
globals, nonlocals = find_globals_and_nonlocals(
tree, set(), set(), code, self.version
)
# Python 1 doesn't support the "nonlocal" statement
for g in sorted((all_globals & self.mod_globs) | globals):
self.println(self.indent, "global ", g)
self.mod_globs -= all_globals
has_none = "None" in code.co_names
rn = has_none and not find_none(tree)
tree.code = code
self.gen_source(
tree, code.co_name, code._customize, is_lambda=is_lambda, returnNone=rn
)
code._tokens = None # save memory
code._customize = None # save memory

View File

@ -143,6 +143,7 @@ from uncompyle6.scanner import Code, get_scanner
import uncompyle6.parser as python_parser
from uncompyle6.semantics.check_ast import checker
from uncompyle6.semantics.make_function1 import make_function1
from uncompyle6.semantics.make_function2 import make_function2
from uncompyle6.semantics.make_function3 import make_function3
from uncompyle6.semantics.make_function36 import make_function36
@ -151,9 +152,7 @@ from uncompyle6.semantics.customize import customize_for_version
from uncompyle6.semantics.gencomp import ComprehensionMixin
from uncompyle6.semantics.helper import (
print_docstring,
find_code_node,
find_globals_and_nonlocals,
flatten_list,
)
from uncompyle6.scanners.tok import Token
@ -176,7 +175,6 @@ from uncompyle6.semantics.consts import (
TAB,
TABLE_R,
escape,
minint,
)
@ -539,7 +537,9 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
# Python changes make function this much that we need at least 3 different routines,
# and probably more in the future.
def make_function(self, node, is_lambda, nested=1, code_node=None, annotate=None):
if self.version <= (2, 7):
if self.version <= (1, 2):
make_function1(self, node, is_lambda, nested, code_node)
elif self.version <= (2, 7):
make_function2(self, node, is_lambda, nested, code_node)
elif (3, 0) <= self.version <= (3, 5):
make_function3(self, node, is_lambda, nested, code_node)
@ -994,6 +994,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
result = "(%s)" % result
return result
# return self.traverse(node[1])
return f"({name}"
raise Exception("Can't find tuple parameter " + name)
def build_class(self, code):