From 9a7eb0ad0a969147b8af1349e2deca204f6255a8 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 15 Jan 2023 22:48:07 -0500 Subject: [PATCH] try to be more honest about MAKE_{FUNCTION,CLOSURE} --- uncompyle6/parser.py | 10 ++- uncompyle6/parsers/parse3.py | 148 ++++++++++++++++---------------- uncompyle6/parsers/parse32.py | 2 +- uncompyle6/scanner.py | 23 ++++- uncompyle6/scanners/scanner3.py | 41 ++++----- 5 files changed, 122 insertions(+), 102 deletions(-) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 3ef8a5a0..e694f944 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -213,10 +213,16 @@ class PythonParser(GenericASTBuilder): raise ParserError(None, -1, self.debug["reduce"]) def get_pos_kw(self, token): - """Return then the number of positional parameters and - represented by the attr field of token""" + """ + Return then the number of positional parameters and keyword + parfameters represented by the attr (operand) field of + token. + + This appears in CALL_FUNCTION or CALL_METHOD (PyPy) tokens + """ # Low byte indicates number of positional paramters, # high byte number of keyword parameters + assert token.kind.startswith("CALL_FUNCTION") or token.kind.startswith("CALL_METHOD") args_pos = token.attr & 0xFF args_kw = (token.attr >> 8) & 0xFF return args_pos, args_kw diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index bb26495c..e71d3100 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -541,9 +541,9 @@ class Python3Parser(PythonParser): # token found, while this one doesn't. if self.version < (3, 6): call_function = self.call_fn_name(call_fn_tok) - args_pos, args_kw = self.get_pos_kw(call_fn_tok) + pos_args_count, kw_args_count = self.get_pos_kw(call_fn_tok) rule = "build_class ::= LOAD_BUILD_CLASS mkfunc %s" "%s" % ( - ("expr " * (args_pos - 1) + ("kwarg " * args_kw)), + ("expr " * (pos_args_count - 1) + ("kwarg " * kw_args_count)), call_function, ) else: @@ -552,10 +552,10 @@ class Python3Parser(PythonParser): if call_function.startswith("CALL_FUNCTION_KW"): self.addRule("classdef ::= build_class_kw store", nop_func) if is_pypy: - args_pos, args_kw = self.get_pos_kw(call_fn_tok) + pos_args_count, kw_args_count = self.get_pos_kw(call_fn_tok) rule = "build_class_kw ::= LOAD_BUILD_CLASS mkfunc %s%s%s" % ( - "expr " * (args_pos - 1), - "kwarg " * (args_kw), + "expr " * (pos_args_count - 1), + "kwarg " * (kw_args_count), call_function, ) else: @@ -581,7 +581,7 @@ class Python3Parser(PythonParser): classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc {expr}^n-1 CALL_FUNCTION_n """ - args_pos, args_kw = self.get_pos_kw(token) + pos_args_count, kw_args_count = self.get_pos_kw(token) # Additional exprs for * and ** args: # 0 if neither @@ -590,7 +590,7 @@ class Python3Parser(PythonParser): # Yes, this computation based on instruction name is a little bit hoaky. nak = (len(opname) - len("CALL_FUNCTION")) // 3 - uniq_param = args_kw + args_pos + uniq_param = kw_args_count + pos_args_count # Note: 3.5+ have subclassed this method; so we don't handle # 'CALL_FUNCTION_VAR' or 'CALL_FUNCTION_EX' here. @@ -599,16 +599,16 @@ class Python3Parser(PythonParser): token.kind = self.call_fn_name(token) rule = ( "call ::= expr " - + ("pos_arg " * args_pos) - + ("kwarg " * args_kw) + + ("pos_arg " * pos_args_count) + + ("kwarg " * kw_args_count) + token.kind ) else: token.kind = self.call_fn_name(token) rule = ( "call ::= expr " - + ("pos_arg " * args_pos) - + ("kwarg " * args_kw) + + ("pos_arg " * pos_args_count) + + ("kwarg " * kw_args_count) + "expr " * nak + token.kind ) @@ -616,11 +616,11 @@ class Python3Parser(PythonParser): self.add_unique_rule(rule, token.kind, uniq_param, customize) if "LOAD_BUILD_CLASS" in self.seen_ops: - if next_token == "CALL_FUNCTION" and next_token.attr == 1 and args_pos > 1: + if next_token == "CALL_FUNCTION" and next_token.attr == 1 and pos_args_count > 1: rule = "classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d" % ( - ("expr " * (args_pos - 1)), + ("expr " * (pos_args_count - 1)), opname, - args_pos, + pos_args_count, ) self.add_unique_rule(rule, token.kind, uniq_param, customize) @@ -955,14 +955,14 @@ class Python3Parser(PythonParser): elif opname_base == "CALL_METHOD": # PyPy and Python 3.7+ only - DRY with parse2 - args_pos, args_kw = self.get_pos_kw(token) + pos_args_count, kw_args_count = self.get_pos_kw(token) # number of apply equiv arguments: nak = (len(opname_base) - len("CALL_METHOD")) // 3 rule = ( "call ::= expr " - + ("pos_arg " * args_pos) - + ("kwarg " * args_kw) + + ("pos_arg " * pos_args_count) + + ("kwarg " * kw_args_count) + "expr " * nak + opname ) @@ -1096,7 +1096,7 @@ class Python3Parser(PythonParser): """ self.addRule(rule, nop_func) - args_pos, args_kw, annotate_args = token.attr + pos_args_count, kw_args_count, annotate_args = token.attr # FIXME: Fold test into add_make_function_rule if self.version < (3, 3): @@ -1105,7 +1105,7 @@ class Python3Parser(PythonParser): j = 2 if self.is_pypy or (i >= j and tokens[i - j] == "LOAD_LAMBDA"): rule_pat = "lambda_body ::= %sload_closure LOAD_LAMBDA %%s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1113,7 +1113,7 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1129,7 +1129,7 @@ class Python3Parser(PythonParser): rule_pat = ( "listcomp ::= %sload_closure LOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" - % ("pos_arg " * args_pos, opname) + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1138,7 +1138,7 @@ class Python3Parser(PythonParser): rule_pat = ( "set_comp ::= %sload_closure LOAD_SETCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" - % ("pos_arg " * args_pos, opname) + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1149,13 +1149,13 @@ class Python3Parser(PythonParser): self.add_unique_rule( "dict_comp ::= %sload_closure LOAD_DICTCOMP %s " "expr GET_ITER CALL_FUNCTION_1" - % ("pos_arg " * args_pos, opname), + % ("pos_arg " * pos_args_count, opname), opname, token.attr, customize, ) - if args_kw > 0: + if kw_args_count > 0: kwargs_str = "kwargs " else: kwargs_str = "" @@ -1167,34 +1167,34 @@ class Python3Parser(PythonParser): "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure LOAD_CODE %s" % ( kwargs_str, - "pos_arg " * args_pos, - "annotate_arg " * (annotate_args - 1), + "pos_arg " * pos_args_count, + "annotate_arg " * (annotate_args), opname, ) ) else: rule = "mkfunc ::= %s%sload_closure LOAD_CODE %s" % ( kwargs_str, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) - elif (3, 3) <= self.version < (3, 5): + elif (3, 3) <= self.version < (3, 6): if annotate_args > 0: rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure LOAD_CODE LOAD_STR %s" % ( kwargs_str, - "pos_arg " * args_pos, - "annotate_arg " * (annotate_args - 1), + "pos_arg " * pos_args_count, + "annotate_arg " * (annotate_args), opname, ) ) else: rule = "mkfunc ::= %s%sload_closure LOAD_CODE LOAD_STR %s" % ( kwargs_str, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) @@ -1210,16 +1210,16 @@ class Python3Parser(PythonParser): rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple load_closure %s %s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs_str, - "annotate_arg " * (annotate_args - 1), + "annotate_arg " * (annotate_args), load_op, opname, ) ) else: rule = "mkfunc ::= %s%s load_closure LOAD_CODE %s %s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs_str, load_op, opname, @@ -1227,16 +1227,16 @@ class Python3Parser(PythonParser): self.add_unique_rule(rule, opname, token.attr, customize) - if args_kw == 0: + if kw_args_count == 0: rule = "mkfunc ::= %sload_closure load_genexpr %s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) if self.version < (3, 4): rule = "mkfunc ::= %sload_closure LOAD_CODE %s" % ( - "expr " * args_pos, + "expr " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) @@ -1247,10 +1247,10 @@ class Python3Parser(PythonParser): if self.version >= (3, 6): # The semantics of MAKE_FUNCTION in 3.6 are totally different from # before. - args_pos, args_kw, annotate_args, closure = token.attr - stack_count = args_pos + args_kw + annotate_args + pos_args_count, kw_args_count, annotate_args, closure = token.attr + stack_count = pos_args_count + kw_args_count + annotate_args if closure: - if args_pos: + if pos_args_count: rule = "lambda_body ::= %s%s%s%s" % ( "expr " * stack_count, "load_closure " * closure, @@ -1283,14 +1283,14 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize ) rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1312,7 +1312,7 @@ class Python3Parser(PythonParser): rule_pat = ( "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" - % ("expr " * args_pos, opname) + % ("expr " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1320,8 +1320,8 @@ class Python3Parser(PythonParser): if self.is_pypy or (i >= 2 and tokens[i - 2] == "LOAD_LAMBDA"): rule_pat = "lambda_body ::= %s%sLOAD_LAMBDA %%s%s" % ( - ("pos_arg " * args_pos), - ("kwarg " * args_kw), + ("pos_arg " * pos_args_count), + ("kwarg " * kw_args_count), opname, ) self.add_make_function_rule( @@ -1330,9 +1330,9 @@ class Python3Parser(PythonParser): continue if self.version < (3, 6): - args_pos, args_kw, annotate_args = token.attr + pos_args_count, kw_args_count, annotate_args = token.attr else: - args_pos, args_kw, annotate_args, closure = token.attr + pos_args_count, kw_args_count, annotate_args, closure = token.attr if self.version < (3, 3): j = 1 @@ -1342,7 +1342,7 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1354,7 +1354,7 @@ class Python3Parser(PythonParser): # Todo: For Pypy we need to modify this slightly rule_pat = ( "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("expr " * args_pos, opname) + "GET_ITER CALL_FUNCTION_1" % ("expr " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1363,13 +1363,13 @@ class Python3Parser(PythonParser): # FIXME: Fold test into add_make_function_rule if self.is_pypy or (i >= j and tokens[i - j] == "LOAD_LAMBDA"): rule_pat = "lambda_body ::= %s%sLOAD_LAMBDA %%s%s" % ( - ("pos_arg " * args_pos), - ("kwarg " * args_kw), + ("pos_arg " * pos_args_count), + ("kwarg " * kw_args_count), opname, ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) - if args_kw == 0: + if kw_args_count == 0: kwargs = "no_kwargs" self.add_unique_rule("no_kwargs ::=", opname, token.attr, customize) else: @@ -1379,13 +1379,13 @@ class Python3Parser(PythonParser): # positional args after keyword args rule = "mkfunc ::= %s %s%s%s" % ( kwargs, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, "LOAD_CODE ", opname, ) self.add_unique_rule(rule, opname, token.attr, customize) rule = "mkfunc ::= %s%s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, "LOAD_CODE ", opname, ) @@ -1393,14 +1393,14 @@ class Python3Parser(PythonParser): # positional args after keyword args rule = "mkfunc ::= %s %s%s%s" % ( kwargs, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, "LOAD_CODE LOAD_STR ", opname, ) elif self.version >= (3, 6): # positional args before keyword args rule = "mkfunc ::= %s%s %s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs, "LOAD_CODE LOAD_STR ", opname, @@ -1408,7 +1408,7 @@ class Python3Parser(PythonParser): elif self.version >= (3, 4): # positional args before keyword args rule = "mkfunc ::= %s%s %s%s" % ( - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, kwargs, "LOAD_CODE LOAD_STR ", opname, @@ -1416,7 +1416,7 @@ class Python3Parser(PythonParser): else: rule = "mkfunc ::= %s%sexpr %s" % ( kwargs, - "pos_arg " * args_pos, + "pos_arg " * pos_args_count, opname, ) self.add_unique_rule(rule, opname, token.attr, customize) @@ -1426,8 +1426,8 @@ class Python3Parser(PythonParser): rule = ( "mkfunc_annotate ::= %s%sannotate_tuple LOAD_CODE LOAD_STR %s" % ( - ("pos_arg " * (args_pos)), - ("call " * (annotate_args - 1)), + ("pos_arg " * pos_args_count), + ("call " * annotate_args), opname, ) ) @@ -1435,8 +1435,8 @@ class Python3Parser(PythonParser): rule = ( "mkfunc_annotate ::= %s%sannotate_tuple LOAD_CODE LOAD_STR %s" % ( - ("pos_arg " * (args_pos)), - ("annotate_arg " * (annotate_args - 1)), + ("pos_arg " * pos_args_count), + ("annotate_arg " * annotate_args), opname, ) ) @@ -1447,21 +1447,21 @@ class Python3Parser(PythonParser): if self.version == (3, 3): # 3.3 puts kwargs before pos_arg pos_kw_tuple = ( - ("kwargs " * args_kw), - ("pos_arg " * (args_pos)), + ("kwargs " * kw_args_count), + ("pos_arg " * pos_args_count), ) else: # 3.4 and 3.5puts pos_arg before kwargs pos_kw_tuple = ( - "pos_arg " * (args_pos), - ("kwargs " * args_kw), + "pos_arg " * (pos_args_count), + ("kwargs " * kw_args_count), ) rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE LOAD_STR EXTENDED_ARG %s" % ( pos_kw_tuple[0], pos_kw_tuple[1], - ("call " * (annotate_args - 1)), + ("call " * annotate_args), opname, ) ) @@ -1471,7 +1471,7 @@ class Python3Parser(PythonParser): % ( pos_kw_tuple[0], pos_kw_tuple[1], - ("annotate_arg " * (annotate_args - 1)), + ("annotate_arg " * annotate_args), opname, ) ) @@ -1480,9 +1480,9 @@ class Python3Parser(PythonParser): rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" % ( - ("kwargs " * args_kw), - ("pos_arg " * (args_pos)), - ("annotate_arg " * (annotate_args - 1)), + ("kwargs " * kw_args_count), + ("pos_arg " * (pos_args_count)), + ("annotate_arg " * annotate_args), opname, ) ) @@ -1490,9 +1490,9 @@ class Python3Parser(PythonParser): rule = ( "mkfunc_annotate ::= %s%s%sannotate_tuple LOAD_CODE EXTENDED_ARG %s" % ( - ("kwargs " * args_kw), - ("pos_arg " * (args_pos)), - ("call " * (annotate_args - 1)), + ("kwargs " * kw_args_count), + ("pos_arg " * pos_args_count), + ("call " * annotate_args), opname, ) ) diff --git a/uncompyle6/parsers/parse32.py b/uncompyle6/parsers/parse32.py index 0c3d4d6d..f0706798 100644 --- a/uncompyle6/parsers/parse32.py +++ b/uncompyle6/parsers/parse32.py @@ -92,7 +92,7 @@ class Python32Parser(Python3Parser): "LOAD_CONST LOAD_CODE EXTENDED_ARG %s" ) % ( ("pos_arg " * args_pos), - ("annotate_arg " * (annotate_args - 1)), + ("annotate_arg " * (annotate_args)), opname, ) self.add_unique_rule(rule, opname, token.attr, customize) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index b8e621e7..18e885e4 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -21,7 +21,7 @@ scanner/ingestion module. From here we call various version-specific scanners, e.g. for Python 2.7 or 3.4. """ -from typing import Optional +from typing import Optional, Tuple from array import array from collections import namedtuple @@ -600,8 +600,25 @@ class Scanner(object): return self.Token -def parse_fn_counts(argc): - return ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) +# TODO: after the next xdis release, use from there instead. +def parse_fn_counts_30_35(argc: int) -> Tuple[int, int, int]: + """ + In Python 3.0 to 3.5 MAKE_CLOSURE and MAKE_FUNCTION encode + arguments counts of positional, default + named, and annotation + arguments a particular kind of encoding where each of + the entry a a packe byted value of the lower 24 bits + of ``argc``. The high bits of argc may have come from + an EXTENDED_ARG instruction. Here, we unpack the values + from the ``argc`` int and return a triple of the + positional args, named_args, and annotation args. + """ + annotate_count = (argc >> 16) & 0x7FFF + # For some reason that I don't understand, annotate_args is off by one + # when there is an EXENDED_ARG instruction from what is documented in + # https://docs.python.org/3.4/library/dis.html#opcode-MAKE_CLOSURE + if annotate_count > 1: + annotate_count -= 1 + return ((argc & 0xFF), (argc >> 8) & 0xFF, annotate_count) def get_scanner(version, is_pypy=False, show_asm=None): diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 0cde31a1..1cf060e0 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2019, 2021-2022 by Rocky Bernstein +# Copyright (c) 2015-2019, 2021-2023 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # @@ -41,7 +41,7 @@ from xdis import iscode, instruction_size, Instruction from xdis.bytecode import _get_const_info from uncompyle6.scanners.tok import Token -from uncompyle6.scanner import parse_fn_counts +from uncompyle6.scanner import parse_fn_counts_30_35 import xdis # Get all the opcodes into globals @@ -363,7 +363,7 @@ class Scanner3(Scanner): ) new_tokens.append( Token( - opname=f"BUILD_DICT_OLDER", + opname="BUILD_DICT_OLDER", attr=t.attr, pattr=t.pattr, offset=t.offset, @@ -623,32 +623,29 @@ class Scanner3(Scanner): flags >>= 1 attr = attr[:4] # remove last value: attr[5] == False else: - pos_args, name_pair_args, annotate_args = parse_fn_counts( + pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35( inst.argval - ) + ) - correct_annotate_args = annotate_args - if opname in ("MAKE_CLOSURE", "MAKE_FUNCTION") and ((3, 4) <= self.version < (3, 6)) and annotate_args > 0: - # For some reason that I don't understand, annotate_args is off by one - # when there is an EXENDED_ARG instruction from what is documented in - # https://docs.python.org/3.4/library/dis.html#opcode-MAKE_CLOSURE - # However in parsing rule, we have already adjusted for the one-fewer annotate arg - correct_annotate_args -= 1 + pattr = f"{pos_args} positional, {name_pair_args} keyword only, {annotate_args} annotated" - pattr = "%d positional, %d keyword only, %d annotated" % ( - pos_args, - name_pair_args, - correct_annotate_args, - ) - if name_pair_args > 0: + if name_pair_args > 0 and annotate_args > 0: # FIXME: this should probably be K_ - opname = "%s_N%d" % (opname, name_pair_args) + opname += f"_N{name_pair_args}_A{annotate_args}" pass - if annotate_args > 0: - opname = "%s_A_%d" % (opname, annotate_args) + elif annotate_args > 0: + opname += f"_A_{annotate_args}" pass - opname = "%s_%d" % (opname, pos_args) + elif name_pair_args > 0: + opname += f"_N_{name_pair_args}" + pass + else: + # Rule customization mathics, MAKE_FUNCTION_... + # so make sure to add the "_" + opname += "_0" + attr = (pos_args, name_pair_args, annotate_args) + new_tokens.append( Token( opname=opname,