python-uncompyle6/uncompyle6/semantics/pysource.py
2024-02-04 12:36:08 -05:00

1439 lines
49 KiB
Python

# Copyright (c) 2015-2024 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Creates Python source code from an uncompyle6 parse tree.
The terminal symbols are CPython bytecode instructions. (See the
python documentation under module "dis" for a list of instructions
and what they mean).
Upper levels of the grammar is a more-or-less conventional grammar for
Python.
"""
# The below is a bit long, but still it is somewhat abbreviated.
# See https://github.com/rocky/python-uncompyle6/wiki/Table-driven-semantic-actions.
# for a more complete explanation, nicely marked up and with examples.
#
#
# Semantic action rules for nonterminal symbols can be specified here by
# creating a method prefaced with "n_" for that nonterminal. For
# example, "n_exec_stmt" handles the semantic actions for the
# "exec_stmt" nonterminal symbol. Similarly if a method with the name
# of the nonterminal is suffixed with "_exit" it will be called after
# all of its children are called.
#
# After a while writing methods this way, you'll find many routines which do similar
# sorts of things, and soon you'll find you want a short notation to
# describe rules and not have to create methods at all.
#
# So another other way to specify a semantic rule for a nonterminal is via
# either tables MAP_R, or MAP_DIRECT where the key is the
# nonterminal name.
#
# These dictionaries use a printf-like syntax to direct substitution
# from attributes of the nonterminal and its children..
#
# The rest of the below describes how table-driven semantic actions work
# and gives a list of the format specifiers. The default() and
# template_engine() methods implement most of the below.
#
# We allow for a couple of ways to interact with a node in a tree. So
# step 1 after not seeing a custom method for a nonterminal is to
# determine from what point of view tree-wise the rule is applied.
# In the diagram below, N is a nonterminal name, and K also a nonterminal
# name but the one used as a key in the table.
# we show where those are with respect to each other in the
# parse tree for N.
#
#
# N&K N
# / | ... \ / | ... \
# O O O O O K
#
#
# TABLE_DIRECT TABLE_R
#
# The default table is TABLE_DIRECT mapping By far, most rules used work this way.
#
# The key K is then extracted from the subtree and used to find one
# of the tables, T listed above. The result after applying T[K] is
# a format string and arguments (a la printf()) for the formatting
# engine.
#
# Escapes in the format string are:
#
# %c evaluate/traverse the node recursively. Its argument is a single
# integer or tuple representing a node index.
# If a tuple is given, the first item is the node index while
# the second item is a string giving the node/noterminal name.
# This name will be checked at runtime against the node type.
#
# %p like %c but sets the operator precedence.
# Its argument then is a tuple indicating the node
# index and the precedence value, an integer. If 3 items are given,
# the second item is the nonterminal name and the precedence is given last.
#
# %C evaluate/travers children recursively, with sibling children separated by the
# given string. It needs a 3-tuple: a starting node, the maximum
# value of an end node, and a string to be inserted between sibling children
#
# %, Append ',' if last %C only printed one item. This is mostly for tuples
# on the LHS of an assignment statement since BUILD_TUPLE_n pretty-prints
# other tuples. The specifier takes no arguments
#
# %P same as %C but sets operator precedence. Its argument is a 4-tuple:
# the node low and high indices, the separator, a string the precedence
# value, an integer.
#
# %D Same as `%C` this is for left-recursive lists like kwargs where goes
# to epsilon at the beginning. It needs a 3-tuple: a starting node, the
# maximum value of an end node, and a string to be inserted between
# sibling children. If we were to use `%C` an extra separator with an
# epsilon would appear at the beginning.
#
# %| Insert spaces to the current indentation level. Takes no arguments.
#
# %+ increase current indentation level. Takes no arguments.
#
# %- decrease current indentation level. Takes no arguments.
#
# %{EXPR} Python eval(EXPR) in context of node. Takes no arguments
#
# %[N]{EXPR} Python eval(EXPR) in context of node[N]. Takes no arguments
#
# %[N]{%X} evaluate/recurse on child node[N], using specifier %X.
# %X can be one of the above, e.g. %c, %p, etc. Takes the arguments
# that the specifier uses.
#
# %% literal '%'. Takes no arguments.
#
#
# The '%' may optionally be followed by a number (C) in square
# brackets, which makes the template_engine walk down to N[C] before
# evaluating the escape code.
import sys
from spark_parser import GenericASTTraversal
from xdis import COMPILER_FLAG_BIT, iscode
from xdis.version_info import PYTHON_VERSION_TRIPLE
from uncompyle6.parser import get_python_parser, parse
from uncompyle6.parsers.treenode import SyntaxTree
from uncompyle6.scanner import Code, get_scanner
from uncompyle6.scanners.tok import Token
from uncompyle6.semantics.check_ast import checker
from uncompyle6.semantics.consts import (
ASSIGN_TUPLE_PARAM,
INDENT_PER_LEVEL,
LINE_LENGTH,
MAP,
MAP_DIRECT,
NAME_MODULE,
NONE,
PASS,
PRECEDENCE,
RETURN_LOCALS,
RETURN_NONE,
TAB,
TABLE_R,
escape,
)
from uncompyle6.semantics.customize import customize_for_version
from uncompyle6.semantics.gencomp import ComprehensionMixin
from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring
from uncompyle6.semantics.make_function1 import make_function1
from uncompyle6.semantics.make_function2 import make_function2
from uncompyle6.semantics.make_function3 import make_function3
from uncompyle6.semantics.make_function36 import make_function36
from uncompyle6.semantics.n_actions import NonterminalActions
from uncompyle6.semantics.parser_error import ParserError
from uncompyle6.semantics.transform import TreeTransform, is_docstring
from uncompyle6.show import maybe_show_tree
from uncompyle6.util import better_repr
def unicode(x):
return x
from io import StringIO
PARSER_DEFAULT_DEBUG = {
"rules": False,
"transition": False,
"reduce": False,
"errorstack": "full",
"context": True,
"dups": False,
}
IS_PYPY = "__pypy__" in sys.builtin_module_names
TREE_DEFAULT_DEBUG = {"before": False, "after": False}
DEFAULT_DEBUG_OPTS = {
"asm": False,
"tree": TREE_DEFAULT_DEBUG,
"grammar": dict(PARSER_DEFAULT_DEBUG),
}
class SourceWalkerError(Exception):
def __init__(self, errmsg):
self.errmsg = errmsg
def __str__(self):
return self.errmsg
class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
"""
Class to traverses a Parse Tree of the bytecode instruction built from parsing to produce some sort of source text.
The Parse tree may be turned an Abstract Syntax tree as an intermediate step.
"""
stacked_params = ("f", "indent", "is_lambda", "_globals")
def __init__(
self,
version,
out,
scanner,
showast=TREE_DEFAULT_DEBUG,
debug_parser=PARSER_DEFAULT_DEBUG,
compile_mode="exec",
is_pypy=IS_PYPY,
linestarts={},
tolerate_errors=False,
):
"""`version' is the Python version (a float) of the Python dialect
of both the syntax tree and language we should produce.
`out' is IO-like file pointer to where the output should go. It
would have a getvalue() method.
`scanner' is a method to call when we need to scan tokens. Sometimes
in producing output we will run across further tokens that need
to be scanned.
If `showast' is True, we print the syntax tree.
`compile_mode' is is either 'exec' or 'single'. It is the compile
mode that was used to create the Syntax Tree and specifies a
grammar variant within a Python version to use.
`is_pypy` should be True if the Syntax Tree was generated for PyPy.
`linestarts` is a dictionary of line number to bytecode offset. This
can sometimes assist in determining which kind of source-code construct
to use when there is ambiguity.
"""
GenericASTTraversal.__init__(self, ast=None)
self.scanner = scanner
params = {"f": out, "indent": ""}
self.version = version
self.p = get_python_parser(
version,
debug_parser=dict(debug_parser),
compile_mode=compile_mode,
is_pypy=is_pypy,
)
self.ast_errors = []
self.currentclass = None
self.classes = []
self.debug_parser = dict(debug_parser)
# Initialize p_lambda on demand
self.line_number = 1
self.linemap = {}
self.p_lambda = None
self.params = params
self.param_stack = []
self.ERROR = None
self.prec = 100
self.return_none = False
self.mod_globs = set()
self.showast = showast
self.pending_newlines = 0
self.linestarts = linestarts
self.treeTransform = TreeTransform(version=self.version, show_ast=showast)
# FIXME: have p.insts update in a better way
# modularity is broken here
self.insts = scanner.insts
self.offset2inst_index = scanner.offset2inst_index
# This is in Python 2.6 on. It changes the way
# strings get interpreted. See n_LOAD_CONST
self.FUTURE_UNICODE_LITERALS = False
# Sometimes we may want to continue decompiling when there are errors
# and sometimes not
self.tolerate_errors = tolerate_errors
# If we are in a 3.6+ format string, we may need an
# extra level of parens when seeing a lambda. We also use
# this to understand whether or not to add the "f" prefix.
# When not "None" it is a string of the last nonterminal
# that started the format string
self.in_format_string = None
# hide_internal suppresses displaying the additional instructions that sometimes
# exist in code but were not written in the source code.
# An example is:
# __module__ = __name__
self.hide_internal = True
self.compile_mode = compile_mode
self.name = None
self.version = version
self.is_pypy = is_pypy
customize_for_version(self, is_pypy, version)
return
def maybe_show_tree(self, ast, phase):
if self.showast.get("before", False):
self.println(
"""
---- end before transform
"""
)
if self.showast.get("after", False):
self.println(
"""
---- begin after transform
"""
+ " "
)
if self.showast.get(phase, False):
maybe_show_tree(self, ast)
def str_with_template(self, ast):
stream = sys.stdout
stream.write(self.str_with_template1(ast, "", None))
stream.write("\n")
def str_with_template1(self, ast, indent, sibNum=None) -> str:
rv = str(ast.kind)
if sibNum is not None:
rv = "%2d. %s" % (sibNum, rv)
enumerate_children = False
if len(ast) > 1:
rv += f" ({len(ast)})"
enumerate_children = True
if ast in PRECEDENCE:
rv += f", precedence {PRECEDENCE[ast]}"
mapping = self._get_mapping(ast)
table = mapping[0]
key = ast
for i in mapping[1:]:
key = key[i]
pass
if ast.transformed_by is not None:
if ast.transformed_by is True:
rv += " transformed"
else:
rv += " transformed by %s" % ast.transformed_by
pass
pass
if key.kind in table:
rv += ": %s" % str(table[key.kind])
rv = indent + rv
indent += " "
i = 0
for node in ast:
if hasattr(node, "__repr1__"):
if enumerate_children:
child = self.str_with_template1(node, indent, i)
else:
child = self.str_with_template1(node, indent, None)
else:
inst = node.format(line_prefix="L.")
if inst.startswith("\n"):
# Nuke leading \n
inst = inst[1:]
if enumerate_children:
child = indent + "%2d. %s" % (i, inst)
else:
child = indent + inst
pass
rv += "\n" + child
i += 1
return rv
def indent_if_source_nl(self, line_number: int, indent: int):
if line_number != self.line_number:
self.write("\n" + indent + INDENT_PER_LEVEL[:-1])
return self.line_number
f = property(
lambda s: s.params["f"],
lambda s, x: s.params.__setitem__("f", x),
lambda s: s.params.__delitem__("f"),
None,
)
indent = property(
lambda s: s.params["indent"],
lambda s, x: s.params.__setitem__("indent", x),
lambda s: s.params.__delitem__("indent"),
None,
)
is_lambda = property(
lambda s: s.params["is_lambda"],
lambda s, x: s.params.__setitem__("is_lambda", x),
lambda s: s.params.__delitem__("is_lambda"),
None,
)
_globals = property(
lambda s: s.params["_globals"],
lambda s, x: s.params.__setitem__("_globals", x),
lambda s: s.params.__delitem__("_globals"),
None,
)
def set_pos_info(self, node):
if hasattr(node, "linestart") and node.linestart:
self.line_number = node.linestart
def preorder(self, node=None):
super(SourceWalker, self).preorder(node)
self.set_pos_info(node)
def indent_more(self, indent=TAB):
self.indent += indent
def indent_less(self, indent=TAB):
self.indent = self.indent[: -len(indent)]
def traverse(self, node, indent=None, is_lambda=False):
self.param_stack.append(self.params)
if indent is None:
indent = self.indent
p = self.pending_newlines
self.pending_newlines = 0
self.params = {
"_globals": {},
"_nonlocals": {}, # Python 3 has nonlocal
"f": StringIO(),
"indent": indent,
"is_lambda": is_lambda,
}
self.preorder(node)
self.f.write("\n" * self.pending_newlines)
result = self.f.getvalue()
self.params = self.param_stack.pop()
self.pending_newlines = p
return result
def write(self, *data):
if (len(data) == 0) or (len(data) == 1 and data[0] == ""):
return
out = "".join((str(j) for j in data))
n = 0
for i in out:
if i == "\n":
n += 1
if n == len(out):
self.pending_newlines = max(self.pending_newlines, n)
return
elif n:
self.pending_newlines = max(self.pending_newlines, n)
out = out[n:]
break
else:
break
if self.pending_newlines > 0:
self.f.write("\n" * self.pending_newlines)
self.pending_newlines = 0
for i in out[::-1]:
if i == "\n":
self.pending_newlines += 1
else:
break
if self.pending_newlines:
out = out[: -self.pending_newlines]
self.f.write(out)
def println(self, *data):
if data and not (len(data) == 1 and data[0] == ""):
self.write(*data)
self.pending_newlines = max(self.pending_newlines, 1)
def is_return_none(self, node):
# Is there a better way?
ret = (
node[0] == "return_expr"
and node[0][0] == "expr"
and node[0][0][0] == "LOAD_CONST"
and node[0][0][0].pattr is None
)
if self.version <= (2, 6):
return ret
else:
# FIXME: should the SyntaxTree expression be folded into
# the global RETURN_NONE constant?
return ret or node == SyntaxTree(
"return", [SyntaxTree("return_expr", [NONE]), Token("RETURN_VALUE")]
)
def pp_tuple(self, tup):
"""Pretty print a tuple"""
last_line = self.f.getvalue().split("\n")[-1]
l = len(last_line) + 1
indent = " " * l
self.write("(")
sep = ""
for item in tup:
self.write(sep)
l += len(sep)
s = better_repr(item, self.version)
l += len(s)
self.write(s)
sep = ","
if l > LINE_LENGTH:
l = 0
sep += "\n" + indent
else:
sep += " "
pass
pass
if len(tup) == 1:
self.write(", ")
self.write(")")
# Python changes make function this much that we need at least 3 different routines,
# and probably more in the future.
def make_function(self, node, is_lambda, nested=1, code_node=None, annotate=None):
if self.version <= (1, 2):
make_function1(self, node, is_lambda, nested, code_node)
elif self.version <= (2, 7):
make_function2(self, node, is_lambda, nested, code_node)
elif (3, 0) <= self.version < (3, 6):
make_function3(self, node, is_lambda, nested, code_node)
elif self.version >= (3, 6):
make_function36(self, node, is_lambda, nested, code_node)
def print_super_classes(self, node):
if not (node == "tuple"):
return
n_subclasses = len(node[:-1])
if n_subclasses > 0 or self.version > (2, 4):
# Not an old-style pre-2.2 class
self.write("(")
line_separator = ", "
sep = ""
for elem in node[:-1]:
value = self.traverse(elem)
self.write(sep, value)
sep = line_separator
if n_subclasses > 0 or self.version > (2, 4):
# Not an old-style pre-2.2 class
self.write(")")
def print_super_classes3(self, node):
n = len(node) - 1
if node.kind != "expr":
if node == "kwarg":
self.template_engine(("(%[0]{attr}=%c)", 1), node)
return
kwargs = None
assert node[n].kind.startswith("CALL_FUNCTION")
if node[n].kind.startswith("CALL_FUNCTION_KW"):
if self.is_pypy:
# FIXME: this doesn't handle positional and keyword args
# properly. Need to do something more like that below
# in the non-PYPY 3.6 case.
self.template_engine(("(%[0]{attr}=%c)", 1), node[n - 1])
return
else:
kwargs = node[n - 1].attr
assert isinstance(kwargs, tuple)
i = n - (len(kwargs) + 1)
j = 1 + n - node[n].attr
else:
i = start = n - 2
for i in range(start, 0, -1):
if not node[i].kind in ["expr", "call", "LOAD_CLASSNAME"]:
break
pass
if i == start:
return
i += 2
line_separator = ", "
sep = ""
self.write("(")
if kwargs:
# Last arg is tuple of keyword values: omit
l = n - 1
else:
l = n
if kwargs:
# 3.6+ does this
while j < i:
self.write(sep)
value = self.traverse(node[j])
self.write("%s" % value)
sep = line_separator
j += 1
j = 0
while i < l:
self.write(sep)
value = self.traverse(node[i])
self.write("%s=%s" % (kwargs[j], value))
sep = line_separator
j += 1
i += 1
else:
while i < l:
value = self.traverse(node[i])
i += 1
self.write(sep, value)
sep = line_separator
pass
pass
else:
if node[0] == "LOAD_STR":
return
value = self.traverse(node[0])
self.write("(")
self.write(value)
pass
self.write(")")
def kv_map(self, kv_node, sep, line_number, indent):
first_time = True
for kv in kv_node:
assert kv in ("kv", "kv2", "kv3")
# kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
# kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
# kv3 ::= expr expr STORE_MAP
# FIXME: DRY this and the above
if kv == "kv":
self.write(sep)
name = self.traverse(kv[-2], indent="")
if first_time:
line_number = self.indent_if_source_nl(line_number, indent)
first_time = False
pass
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(kv[1], indent=self.indent + (len(name) + 2) * " ")
elif kv == "kv2":
self.write(sep)
name = self.traverse(kv[1], indent="")
if first_time:
line_number = self.indent_if_source_nl(line_number, indent)
first_time = False
pass
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
kv[-3], indent=self.indent + (len(name) + 2) * " "
)
elif kv == "kv3":
self.write(sep)
name = self.traverse(kv[-2], indent="")
if first_time:
line_number = self.indent_if_source_nl(line_number, indent)
first_time = False
pass
line_number = self.line_number
self.write(name, ": ")
line_number = self.line_number
value = self.traverse(kv[0], indent=self.indent + (len(name) + 2) * " ")
pass
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
def template_engine(self, entry, startnode):
"""The format template interpretation engine. See the comment at the
beginning of this module for how we interpret format
specifications such as %c, %C, and so on.
"""
# print("-----")
# print(startnode)
# print(entry[0])
# print('======')
fmt = entry[0]
arg = 1
i = 0
m = escape.search(fmt)
while m:
i = m.end()
self.write(m.group("prefix"))
typ = m.group("type") or "{"
node = startnode
if m.group("child"):
node = node[int(m.group("child"))]
if typ == "%":
self.write("%")
elif typ == "+":
self.line_number += 1
self.indent_more()
elif typ == "-":
self.line_number += 1
self.indent_less()
elif typ == "|":
self.line_number += 1
self.write(self.indent)
# Used mostly on the LHS of an assignment
# BUILD_TUPLE_n is pretty printed and may take care of other uses.
elif typ == ",":
if node.kind in ("unpack", "unpack_w_parens") and node[0].attr == 1:
self.write(",")
elif typ == "c":
index = entry[arg]
if isinstance(index, tuple):
if isinstance(index[1], str):
# if node[index[0]] != index[1]:
# from trepan.api import debug; debug()
assert (
node[index[0]] == index[1]
), "at %s[%d], expected '%s' node; got '%s'" % (
node.kind,
arg,
index[1],
node[index[0]].kind,
)
else:
assert (
node[index[0]] in index[1]
), "at %s[%d], expected to be in '%s' node; got '%s'" % (
node.kind,
arg,
index[1],
node[index[0]].kind,
)
index = index[0]
assert isinstance(
index, int
), "at %s[%d], %s should be int or tuple" % (
node.kind,
arg,
type(index),
)
try:
node[index]
except IndexError:
raise RuntimeError(
f"""
Expanding '{node.kind}' in template '{entry}[{arg}]':
{index} is invalid; has only {len(node)} entries
"""
)
self.preorder(node[index])
arg += 1
elif typ == "p":
p = self.prec
# entry[arg]
tup = entry[arg]
assert isinstance(tup, tuple)
if len(tup) == 3:
(index, nonterm_name, self.prec) = tup
if isinstance(tup[1], str):
assert (
node[index] == nonterm_name
), "at %s[%d], expected '%s' node; got '%s'" % (
node.kind,
arg,
nonterm_name,
node[index].kind,
)
else:
assert (
node[tup[0]] in tup[1]
), "at %s[%d], expected to be in '%s' node; got '%s'" % (
node.kind,
arg,
index[1],
node[index[0]].kind,
)
else:
assert len(tup) == 2
(index, self.prec) = entry[arg]
self.preorder(node[index])
self.prec = p
arg += 1
elif typ == "C":
low, high, sep = entry[arg]
remaining = len(node[low:high])
for subnode in node[low:high]:
self.preorder(subnode)
remaining -= 1
if remaining > 0:
self.write(sep)
pass
pass
arg += 1
elif typ == "D":
low, high, sep = entry[arg]
remaining = len(node[low:high])
for subnode in node[low:high]:
remaining -= 1
if len(subnode) > 0:
self.preorder(subnode)
if remaining > 0:
self.write(sep)
pass
pass
pass
arg += 1
elif typ == "x":
# This code is only used in fragments
assert isinstance(entry[arg], tuple)
arg += 1
elif typ == "P":
p = self.prec
low, high, sep, self.prec = entry[arg]
remaining = len(node[low:high])
# remaining = len(node[low:high])
for subnode in node[low:high]:
self.preorder(subnode)
remaining -= 1
if remaining > 0:
self.write(sep)
self.prec = p
arg += 1
elif typ == "{":
expr = m.group("expr")
# Line mapping stuff
if (
hasattr(node, "linestart")
and node.linestart
and hasattr(node, "current_line_number")
):
self.source_linemap[self.current_line_number] = node.linestart
if expr[0] == "%":
index = entry[arg]
self.template_engine((expr, index), node)
arg += 1
else:
d = node.__dict__
try:
self.write(eval(expr, d, d))
except:
raise
m = escape.search(fmt, i)
self.write(fmt[i:])
def default(self, node):
mapping = self._get_mapping(node)
table = mapping[0]
key = node
for i in mapping[1:]:
key = key[i]
pass
if key.kind in table:
self.template_engine(table[key.kind], node)
self.prune()
def customize(self, customize):
"""
Special handling for opcodes, such as those that take a variable number
of arguments -- we add a new entry for each in TABLE_R.
"""
for k, v in list(customize.items()):
if k in TABLE_R:
continue
op = k[: k.rfind("_")]
if k.startswith("CALL_METHOD"):
# This happens in PyPy and Python 3.7+
TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100))
elif self.version >= (3, 6) and k.startswith("CALL_FUNCTION_KW"):
TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100))
elif op == "CALL_FUNCTION":
TABLE_R[k] = (
"%c(%P)",
(0, "expr"),
(1, -1, ", ", PRECEDENCE["yield"] - 1),
)
elif op in (
"CALL_FUNCTION_VAR",
"CALL_FUNCTION_VAR_KW",
"CALL_FUNCTION_KW",
):
# FIXME: handle everything in customize.
# Right now, some of this is here, and some in that.
if v == 0:
template_str = "%c(%C" # '%C' is a dummy here ...
p2 = (0, 0, None) # because of the None in this
else:
template_str = "%c(%C, "
p2 = (1, -2, ", ")
if op == "CALL_FUNCTION_VAR":
# Python 3.5 only puts optional args (the VAR part)
# the lowest down the stack
if self.version == (3, 5):
if template_str == "%c(%C, ":
entry = ("%c(*%C, %c)", 0, p2, -2)
elif template_str == "%c(%C":
entry = ("%c(*%C)", 0, (1, 100, ""))
elif self.version == (3, 4):
# CALL_FUNCTION_VAR's top element of the stack contains
# the variable argument list
if v == 0:
template_str = "%c(*%c)"
entry = (template_str, 0, -2)
else:
template_str = "%c(%C, *%c)"
entry = (template_str, 0, p2, -2)
else:
template_str += "*%c)"
entry = (template_str, 0, p2, -2)
elif op == "CALL_FUNCTION_KW":
template_str += "**%c)"
entry = (template_str, 0, p2, -2)
elif op == "CALL_FUNCTION_VAR_KW":
template_str += "*%c, **%c)"
# Python 3.5 only puts optional args (the VAR part)
# the lowest down the stack
na = v & 0xFF # positional parameters
if self.version == (3, 5) and na == 0:
if p2[2]:
p2 = (2, -2, ", ")
entry = (template_str, 0, p2, 1, -2)
else:
if p2[2]:
p2 = (1, -3, ", ")
entry = (template_str, 0, p2, -3, -2)
pass
else:
assert False, "Unhandled CALL_FUNCTION %s" % op
TABLE_R[k] = entry
pass
# handled by n_dict:
# if op == 'BUILD_SLICE': TABLE_R[k] = ('%C' , (0,-1,':'))
# handled by n_list:
# if op == 'BUILD_LIST': TABLE_R[k] = ('[%C]' , (0,-1,', '))
# elif op == 'BUILD_TUPLE': TABLE_R[k] = ('(%C%,)', (0,-1,', '))
pass
return
# This code is only for Python 1.x - 2.1 ish!
def get_tuple_parameter(self, ast, name):
"""
If the name of the formal parameter starts with dot,
it's a tuple parameter, like this:
# def MyFunc(xx, (a,b,c), yy):
# print a, b*2, c*42
In byte-code, the whole tuple is assigned to parameter '.1' and
then the tuple gets unpacked to 'a', 'b' and 'c'.
Since identifiers starting with a dot are illegal in Python,
we can search for the byte-code equivalent to '(a,b,c) = .1'
"""
assert ast == "stmts"
for i in range(len(ast)):
# search for an assign-statement
if ast[i] == "sstmt":
node = ast[i][0]
else:
node = ast[i]
if node == "assign" and node[0] == ASSIGN_TUPLE_PARAM(name):
# okay, this assigns '.n' to something
del ast[i]
# walk lhs; this
# returns a tuple of identifiers as used
# within the function definition
assert node[1] == "store"
# if lhs is not a UNPACK_TUPLE (or equiv.),
# add parentheses to make this a tuple
# if node[1][0] not in ('unpack', 'unpack_list'):
result = self.traverse(node[1])
if not (result.startswith("(") and result.endswith(")")):
result = "(%s)" % result
return result
# return self.traverse(node[1])
return f"({name}"
def build_class(self, code):
"""Dump class definition, doc string and class body."""
assert iscode(code)
self.classes.append(self.currentclass)
code = Code(code, self.scanner, self.currentclass)
indent = self.indent
# self.println(indent, '#flags:\t', int(code.co_flags))
ast = self.build_ast(code._tokens, code._customize, code)
# save memory by deleting no-longer-used structures
code._tokens = None
if ast[0] == "sstmt":
ast[0] = ast[0][0]
first_stmt = ast[0]
if ast[0] == "docstring":
self.println(self.traverse(ast[0]))
del ast[0]
first_stmt = ast[0]
if (3, 0) <= self.version <= (3, 3):
try:
if first_stmt == "store_locals":
if self.hide_internal:
del ast[0]
if ast[0] == "sstmt":
ast[0] = ast[0][0]
first_stmt = ast[0]
except Exception:
pass
try:
if first_stmt == NAME_MODULE:
if self.hide_internal:
del ast[0]
first_stmt = ast[0]
pass
except Exception:
pass
have_qualname = False
if len(ast):
if ast[0] == "sstmt":
ast[0] = ast[0][0]
first_stmt = ast[0]
if self.version < (3, 0):
# Should we ditch this in favor of the "else" case?
qualname = ".".join(self.classes)
qual_name_tree = SyntaxTree(
"assign",
[
SyntaxTree("expr", [Token("LOAD_CONST", pattr=qualname)]),
SyntaxTree("store", [Token("STORE_NAME", pattr="__qualname__")]),
],
)
# FIXME: is this right now that we've redone the grammar?
have_qualname = ast[0] == qual_name_tree
else:
# Python 3.4+ has constants like 'cmp_to_key.<locals>.K'
# which are not simple classes like the < 3 case.
try:
if (
first_stmt == "assign"
and first_stmt[0][0] == "LOAD_STR"
and first_stmt[1] == "store"
and first_stmt[1][0] == Token("STORE_NAME", pattr="__qualname__")
):
have_qualname = True
except Exception:
pass
if have_qualname:
if self.hide_internal:
del ast[0]
pass
# if docstring exists, dump it
if code.co_consts and code.co_consts[0] is not None and len(ast) > 0:
do_doc = False
if is_docstring(ast[0], self.version, code.co_consts):
i = 0
do_doc = True
elif len(ast) > 1 and is_docstring(ast[1], self.version, code.co_consts):
i = 1
do_doc = True
if do_doc and self.hide_internal:
try:
# FIXME: Is there an extra [0]?
docstring = ast[i][0][0][0][0].pattr
except Exception:
docstring = code.co_consts[0]
if print_docstring(self, indent, docstring):
self.println()
del ast[i]
# The function defining a class returns locals() in Python somewhere less than
# 3.7.
#
# We don't want this to show up in the source, so remove the node.
if len(ast):
if ast == "stmts" and ast[-1] == "sstmt":
return_locals_parent = ast[-1]
parent_index = 0
else:
return_locals_parent = ast
parent_index = -1
return_locals = return_locals_parent[parent_index]
if return_locals == RETURN_LOCALS:
if self.hide_internal:
del return_locals_parent[parent_index]
pass
pass
# else:
# print stmt[-1]
globals, nonlocals = find_globals_and_nonlocals(
ast, set(), set(), code, self.version
)
# Add "global" declaration statements at the top
# of the function
for g in sorted(globals):
self.println(indent, "global ", g)
for nl in sorted(nonlocals):
self.println(indent, "nonlocal ", nl)
old_name = self.name
self.gen_source(ast, code.co_name, code._customize)
self.name = old_name
# save memory by deleting no-longer-used structures
code._tokens = None
code._customize = None
self.classes.pop(-1)
def gen_source(
self,
ast,
name,
customize,
is_lambda=False,
returnNone=False,
debug_opts=DEFAULT_DEBUG_OPTS,
):
"""convert parse tree to Python source code"""
rn = self.return_none
self.return_none = returnNone
old_name = self.name
self.name = name
self.debug_opts = debug_opts
# if code would be empty, append 'pass'
if len(ast) == 0:
self.println(self.indent, "pass")
else:
self.customize(customize)
self.text = self.traverse(ast, is_lambda=is_lambda)
# In a formatted string using "lambda", we should not add "\n".
# For example in:
# f'{(lambda x:x)("8")!r}'
# Adding a "\n" after "lambda x: x" will give an error message:
# SyntaxError: f-string expression part cannot include a backslash
# So avoid \n after writing text
self.write(self.text)
self.name = old_name
self.return_none = rn
def build_ast(
self,
tokens,
customize,
code,
is_lambda=False,
noneInNames=False,
is_top_level_module=False,
):
# FIXME: DRY with fragments.py
# assert isinstance(tokens[0], Token)
if is_lambda:
for t in tokens:
if t.kind == "RETURN_END_IF":
t.kind = "RETURN_END_IF_LAMBDA"
elif t.kind == "RETURN_VALUE":
t.kind = "RETURN_VALUE_LAMBDA"
tokens.append(Token("LAMBDA_MARKER"))
try:
# FIXME: have p.insts update in a better way
# modularity is broken here
p_insts = self.p.insts
self.p.insts = self.scanner.insts
self.p.offset2inst_index = self.scanner.offset2inst_index
ast = parse(self.p, tokens, customize, code)
self.customize(customize)
self.p.insts = p_insts
except (ParserError, AssertionError) as e:
raise ParserError(e, tokens, self.p.debug["reduce"])
transform_tree = self.treeTransform.transform(ast, code)
self.maybe_show_tree(ast, phase="after")
del ast # Save memory
return transform_tree
# The bytecode for the end of the main routine has a "return
# None". However, you can't issue a "return" statement in
# main. So as the old cigarette slogan goes: I'd rather switch
# (the token stream) than fight (with the grammar to not emit
# "return None").
if self.hide_internal:
if len(tokens) >= 2 and not noneInNames:
if tokens[-1].kind in ("RETURN_VALUE", "RETURN_VALUE_LAMBDA"):
# Python 3.4's classes can add a "return None" which is
# invalid syntax.
if tokens[-2].kind == "LOAD_CONST":
if is_top_level_module or tokens[-2].pattr is None:
del tokens[-2:]
else:
tokens.append(Token("RETURN_LAST"))
else:
tokens.append(Token("RETURN_LAST"))
if len(tokens) == 0:
return PASS
# Build a parse tree from a tokenized and massaged disassembly.
try:
# FIXME: have p.insts update in a better way
# modularity is broken here
p_insts = self.p.insts
self.p.insts = self.scanner.insts
self.p.offset2inst_index = self.scanner.offset2inst_index
self.p.opc = self.scanner.opc
ast = parse(self.p, tokens, customize, code)
self.p.insts = p_insts
except (ParserError, AssertionError) as e:
raise ParserError(e, tokens, self.p.debug["reduce"])
checker(ast, False, self.ast_errors)
self.customize(customize)
transform_tree = self.treeTransform.transform(ast, code)
self.maybe_show_tree(ast, phase="before")
del ast # Save memory
return transform_tree
@classmethod
def _get_mapping(cls, node):
return MAP.get(node, MAP_DIRECT)
def code_deparse(
co,
out=sys.stdout,
version=None,
debug_opts=DEFAULT_DEBUG_OPTS,
code_objects={},
compile_mode="exec",
is_pypy=IS_PYPY,
walker=SourceWalker,
):
"""
ingests and deparses a given code block 'co'. If version is None,
we will use the current Python interpreter version.
"""
assert iscode(co)
if version is None:
version = PYTHON_VERSION_TRIPLE
# store final output stream for case of error
scanner = get_scanner(version, is_pypy=is_pypy, show_asm=debug_opts["asm"])
tokens, customize = scanner.ingest(
co, code_objects=code_objects, show_asm=debug_opts["asm"]
)
debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG))
# Build Syntax Tree from disassembly.
linestarts = dict(scanner.opc.findlinestarts(co))
deparsed = walker(
version,
out,
scanner,
showast=debug_opts.get("tree", TREE_DEFAULT_DEBUG),
debug_parser=debug_parser,
compile_mode=compile_mode,
is_pypy=is_pypy,
linestarts=linestarts,
)
is_top_level_module = co.co_name == "<module>"
if compile_mode == "eval":
deparsed.hide_internal = False
deparsed.compile_mode = compile_mode
deparsed.ast = deparsed.build_ast(
tokens,
customize,
co,
is_lambda=(compile_mode == "lambda"),
is_top_level_module=is_top_level_module,
)
# XXX workaround for profiling
if deparsed.ast is None:
return None
# FIXME use a lookup table here.
if compile_mode == "lambda":
expected_start = "lambda_start"
elif compile_mode == "eval":
expected_start = "expr_start"
elif compile_mode == "expr":
expected_start = "expr_start"
elif compile_mode == "exec":
expected_start = "stmts"
elif compile_mode == "single":
# expected_start = "single_start"
expected_start = None
else:
expected_start = None
if expected_start:
assert (
deparsed.ast == expected_start
), f"Should have parsed grammar start to '{expected_start}'; got: {deparsed.ast.kind}"
# save memory
del tokens
deparsed.mod_globs, nonlocals = find_globals_and_nonlocals(
deparsed.ast, set(), set(), co, version
)
assert not nonlocals
# convert leading '__doc__ = "..." into doc string
try:
stmts = deparsed.ast
first_stmt = stmts[0]
if version >= (3, 6):
if first_stmt[0] == "SETUP_ANNOTATIONS":
del stmts[0]
assert stmts[0] == "sstmt"
# Nuke sstmt
first_stmt = stmts[0][0]
pass
pass
if first_stmt == "docstring":
print_docstring(deparsed, "", co.co_consts[0])
del stmts[0]
if stmts[-1] == RETURN_NONE:
stmts.pop() # remove last node
# todo: if empty, add 'pass'
except Exception:
pass
deparsed.FUTURE_UNICODE_LITERALS = (
COMPILER_FLAG_BIT["FUTURE_UNICODE_LITERALS"] & co.co_flags != 0
)
# What we've been waiting for: Generate source from Syntax Tree!
deparsed.gen_source(
deparsed.ast,
name=co.co_name,
customize=customize,
is_lambda=compile_mode == "lambda",
debug_opts=debug_opts,
)
for g in sorted(deparsed.mod_globs):
deparsed.write("# global %s ## Warning: Unused global\n" % g)
if deparsed.ast_errors:
deparsed.write("# NOTE: have internal decompilation grammar errors.\n")
deparsed.write("# Use -T option to show full context.")
for err in deparsed.ast_errors:
deparsed.write(err)
raise SourceWalkerError("Deparsing hit an internal grammar-rule bug")
if deparsed.ERROR:
raise SourceWalkerError("Deparsing stopped due to parse error")
return deparsed
def deparse_code2str(
code,
out=sys.stdout,
version=None,
debug_opts=DEFAULT_DEBUG_OPTS,
code_objects={},
compile_mode="exec",
is_pypy=IS_PYPY,
walker=SourceWalker,
):
"""Return the deparsed text for a Python code object. `out` is where any intermediate
output for assembly or tree output will be sent.
"""
return code_deparse(
code,
out,
version,
debug_opts,
code_objects=code_objects,
compile_mode=compile_mode,
is_pypy=is_pypy,
walker=walker,
).text
if __name__ == "__main__":
def deparse_test(co):
"""This is a docstring"""
s = deparse_code2str(co)
# s = deparse_code2str(co, debug_opts={"asm": "after", "tree": {'before': False, 'after': False}})
print(s)
return
deparse_test(deparse_test.__code__)