From 5a83c7c643d6082d5829207864eca4969ee26a38 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 19 May 2020 00:53:53 -0400 Subject: [PATCH] Simplify imports again using xdis 4.6.0 --- ...-test.py => simple-decompile-code-test.py} | 2 +- uncompyle6/disas.py | 3 +- uncompyle6/main.py | 3 +- uncompyle6/scanner.py | 6 +- uncompyle6/scanners/scanner3.py | 4 +- uncompyle6/scanners/scanner30.py | 293 +++++++----- uncompyle6/scanners/scanner37base.py | 4 +- uncompyle6/semantics/customize3.py | 3 +- uncompyle6/semantics/customize35.py | 3 +- uncompyle6/semantics/pysource.py | 3 +- uncompyle6/verify.py | 421 +++++++++++------- uncompyle6/version.py | 2 +- 12 files changed, 472 insertions(+), 275 deletions(-) rename test/{simple-uncompyle-code-test.py => simple-decompile-code-test.py} (89%) diff --git a/test/simple-uncompyle-code-test.py b/test/simple-decompile-code-test.py similarity index 89% rename from test/simple-uncompyle-code-test.py rename to test/simple-decompile-code-test.py index 8958ea36..c060f0bc 100755 --- a/test/simple-uncompyle-code-test.py +++ b/test/simple-decompile-code-test.py @@ -3,7 +3,7 @@ from __future__ import print_function from uncompyle6.main import decompile -from xdis.magics import sysinfo2float +from xdis import sysinfo2float import sys, inspect def uncompyle_test(): diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index 5bd4648e..843fb9a3 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -34,8 +34,7 @@ from __future__ import print_function import sys from collections import deque -from xdis import iscode, load_module -from xdis.load import check_object_path +from xdis import check_object_path, iscode, load_module from uncompyle6.scanner import get_scanner diff --git a/uncompyle6/main.py b/uncompyle6/main.py index c2f71894..62151205 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -16,8 +16,7 @@ from __future__ import print_function import datetime, py_compile, os, subprocess, sys, tempfile from uncompyle6 import verify, IS_PYPY, PYTHON_VERSION -from xdis import iscode -from xdis.magics import sysinfo2float +from xdis import iscode, sysinfo2float from uncompyle6.disas import check_object_path from uncompyle6.semantics import pysource from uncompyle6.parser import ParserError diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index bf767f9d..f5fa1d43 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016, 2018-2019 by Rocky Bernstein +# Copyright (c) 2016, 2018-2020 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -30,9 +30,7 @@ import sys from uncompyle6 import PYTHON3, IS_PYPY from uncompyle6.scanners.tok import Token import xdis -from xdis.bytecode import Bytecode, instruction_size, extended_arg_val, next_offset -from xdis.magics import canonic_python_version -from xdis.util import code2num +from xdis import Bytecode, canonic_python_version, code2num, instruction_size, extended_arg_val, next_offset # The byte code versions we support. # Note: these all have to be floats diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 7d52efe7..4ece378d 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -35,8 +35,8 @@ Finally we save token information. from __future__ import print_function -from xdis import iscode -from xdis.bytecode import instruction_size, _get_const_info +from xdis import iscode, instruction_size +from xdis.bytecode import _get_const_info from uncompyle6.scanner import Token, parse_fn_counts import xdis diff --git a/uncompyle6/scanners/scanner30.py b/uncompyle6/scanners/scanner30.py index ddc8259f..781c6421 100644 --- a/uncompyle6/scanners/scanner30.py +++ b/uncompyle6/scanners/scanner30.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016, 2017 by Rocky Bernstein +# Copyright (c) 2016-2017, 2020 by Rocky Bernstein """ Python 3.0 bytecode scanner/deparser @@ -10,17 +10,19 @@ from __future__ import print_function # bytecode verification, verify(), uses JUMP_OPs from here from xdis.opcodes import opcode_30 as opc -from xdis.bytecode import instruction_size +from xdis import instruction_size import xdis JUMP_TF = frozenset([opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE]) from uncompyle6.scanners.scanner3 import Scanner3 -class Scanner30(Scanner3): + +class Scanner30(Scanner3): def __init__(self, show_asm=None, is_pypy=False): Scanner3.__init__(self, 3.0, show_asm, is_pypy) return + pass def detect_control_flow(self, offset, targets, inst_index): @@ -35,17 +37,18 @@ class Scanner30(Scanner3): # Detect parent structure parent = self.structs[0] - start = parent['start'] - end = parent['end'] + start = parent["start"] + end = parent["end"] # Pick inner-most parent for our offset for struct in self.structs: - current_start = struct['start'] - current_end = struct['end'] - if ((current_start <= offset < current_end) - and (current_start >= start and current_end <= end)): - start = current_start - end = current_end + current_start = struct["start"] + current_end = struct["end"] + if (current_start <= offset < current_end) and ( + current_start >= start and current_end <= end + ): + start = current_start + end = current_end parent = struct if op == self.opc.SETUP_LOOP: @@ -56,28 +59,35 @@ class Scanner30(Scanner3): start += instruction_size(op, self.opc) target = self.get_target(offset) - end = self.restrict_to_parent(target, parent) + end = self.restrict_to_parent(target, parent) self.setup_loops[target] = offset if target != end: self.fixed_jumps[offset] = end (line_no, next_line_byte) = self.lines[offset] - jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, - next_line_byte, False) + jump_back = self.last_instr( + start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False + ) if jump_back: - jump_forward_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) + jump_forward_offset = xdis.next_offset( + code[jump_back], self.opc, jump_back + ) else: jump_forward_offset = None return_val_offset1 = self.prev[self.prev[end]] - if (jump_back and jump_back != self.prev_op[end] - and self.is_jump_forward(jump_forward_offset)): - if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or - (code[self.prev_op[end]] == self.opc.POP_BLOCK - and code[return_val_offset1] == self.opc.RETURN_VALUE)): + if ( + jump_back + and jump_back != self.prev_op[end] + and self.is_jump_forward(jump_forward_offset) + ): + if code[self.prev_op[end]] == self.opc.RETURN_VALUE or ( + code[self.prev_op[end]] == self.opc.POP_BLOCK + and code[return_val_offset1] == self.opc.RETURN_VALUE + ): jump_back = None if not jump_back: # loop suite ends in return @@ -92,56 +102,63 @@ class Scanner30(Scanner3): if code[self.prev_op[next_line_byte]] not in JUMP_TF: if_offset = self.prev[next_line_byte] if if_offset: - loop_type = 'while' + loop_type = "while" self.ignore_if.add(if_offset) else: - loop_type = 'for' + loop_type = "for" target = next_line_byte end = jump_back + 3 else: if self.get_target(jump_back) >= next_line_byte: - jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) + jump_back = self.last_instr( + start, end, self.opc.JUMP_ABSOLUTE, start, False + ) jb_inst = self.get_inst(jump_back) jb_next_offset = self.next_offset(jb_inst.opcode, jump_back) if end > jb_next_offset and self.is_jump_forward(end): if self.is_jump_forward(jb_next_offset): - if self.get_target(jump_back+4) == self.get_target(end): - self.fixed_jumps[offset] = jump_back+4 + if self.get_target(jump_back + 4) == self.get_target(end): + self.fixed_jumps[offset] = jump_back + 4 end = jb_next_offset elif target < offset: - self.fixed_jumps[offset] = jump_back+4 + self.fixed_jumps[offset] = jump_back + 4 end = jb_next_offset target = self.get_target(jump_back) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): - loop_type = 'for' + loop_type = "for" else: - loop_type = 'while' + loop_type = "while" test = self.prev_op[next_line_byte] if test == offset: - loop_type = 'while 1' + loop_type = "while 1" elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) test_target = self.get_target(test) - if test_target > (jump_back+3): + if test_target > (jump_back + 3): jump_back = test_target self.not_continue.add(jump_back) self.loops.append(target) - self.structs.append({'type': loop_type + '-loop', - 'start': target, - 'end': jump_back}) + self.structs.append( + {"type": loop_type + "-loop", "start": target, "end": jump_back} + ) after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back) - if (self.get_inst(after_jump_offset).opname == 'POP_TOP'): - after_jump_offset = xdis.next_offset(code[after_jump_offset], self.opc, - after_jump_offset) + if self.get_inst(after_jump_offset).opname == "POP_TOP": + after_jump_offset = xdis.next_offset( + code[after_jump_offset], self.opc, after_jump_offset + ) if after_jump_offset != end: - self.structs.append({'type': loop_type + '-else', - 'start': after_jump_offset, - 'end': end}) + self.structs.append( + { + "type": loop_type + "-else", + "start": after_jump_offset, + "end": end, + } + ) elif op in self.pop_jump_tf: start = offset + instruction_size(op, self.opc) target = self.get_target(offset) @@ -149,7 +166,7 @@ class Scanner30(Scanner3): prev_op = self.prev_op # Do not let jump to go out of parent struct bounds - if target != rtarget and parent['type'] == 'and/or': + if target != rtarget and parent["type"] == "and/or": self.fixed_jumps[offset] = rtarget return @@ -158,12 +175,15 @@ class Scanner30(Scanner3): # rocky: if we have a conditional jump to the next instruction, then # possibly I am "skipping over" a "pass" or null statement. - if ((code[prev_op[target]] in self.pop_jump_if_pop) and - (target > offset) and prev_op[target] != offset): + if ( + (code[prev_op[target]] in self.pop_jump_if_pop) + and (target > offset) + and prev_op[target] != offset + ): self.fixed_jumps[offset] = prev_op[target] - self.structs.append({'type': 'and/or', - 'start': start, - 'end': prev_op[target]}) + self.structs.append( + {"type": "and/or", "start": start, "end": prev_op[target]} + ) return # The op offset just before the target jump offset is important @@ -176,35 +196,80 @@ class Scanner30(Scanner3): # Search for another JUMP_IF_FALSE targetting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs - match = self.rem_or(start, self.next_stmt[offset], - opc.JUMP_IF_FALSE, target) + match = self.rem_or( + start, self.next_stmt[offset], opc.JUMP_IF_FALSE, target + ) # If we still have any offsets in set, start working on it if match: is_jump_forward = self.is_jump_forward(pre_rtarget) - if (is_jump_forward and pre_rtarget not in self.stmts and - self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget): - if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE - and self.remove_mid_line_ifs([offset]) and - target == self.get_target(prev_op[pre_rtarget]) and - (prev_op[pre_rtarget] not in self.stmts or - self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and - 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], JUMP_TF, target)))): + if ( + is_jump_forward + and pre_rtarget not in self.stmts + and self.restrict_to_parent( + self.get_target(pre_rtarget), parent + ) + == rtarget + ): + if ( + code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE + and self.remove_mid_line_ifs([offset]) + and target == self.get_target(prev_op[pre_rtarget]) + and ( + prev_op[pre_rtarget] not in self.stmts + or self.get_target(prev_op[pre_rtarget]) + > prev_op[pre_rtarget] + ) + and 1 + == len( + self.remove_mid_line_ifs( + self.rem_or( + start, prev_op[pre_rtarget], JUMP_TF, target + ) + ) + ) + ): pass - elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE - and self.remove_mid_line_ifs([offset]) and - 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], - JUMP_TF, target))) | - set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], - (opc.JUMP_IF_FALSE, - opc.JUMP_IF_TRUE, - opc.JUMP_ABSOLUTE), - pre_rtarget, True)))))): + elif ( + code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE + and self.remove_mid_line_ifs([offset]) + and 1 + == ( + len( + set( + self.remove_mid_line_ifs( + self.rem_or( + start, + prev_op[pre_rtarget], + JUMP_TF, + target, + ) + ) + ) + | set( + self.remove_mid_line_ifs( + self.rem_or( + start, + prev_op[pre_rtarget], + ( + opc.JUMP_IF_FALSE, + opc.JUMP_IF_TRUE, + opc.JUMP_ABSOLUTE, + ), + pre_rtarget, + True, + ) + ) + ) + ) + ) + ): pass else: fix = None - jump_ifs = self.inst_matches(start, self.next_stmt[offset], - opc.JUMP_IF_FALSE) + jump_ifs = self.inst_matches( + start, self.next_stmt[offset], opc.JUMP_IF_FALSE + ) last_jump_good = True for j in jump_ifs: if target == self.get_target(j): @@ -226,14 +291,19 @@ class Scanner30(Scanner3): pass elif self.is_jump_forward(next) and target == self.get_target(next): if code[prev_op[next]] == opc.JUMP_IF_FALSE: - if (code[next] == self.opc.JUMP_FORWARD + if ( + code[next] == self.opc.JUMP_FORWARD or target != rtarget - or code[prev_op[pre_rtarget]] not in - (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)): + or code[prev_op[pre_rtarget]] + not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE) + ): self.fixed_jumps[offset] = prev_op[next] return - elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and - self.get_target(target) == self.get_target(next)): + elif ( + code[next] == self.opc.JUMP_ABSOLUTE + and self.is_jump_forward(target) + and self.get_target(target) == self.get_target(next) + ): self.fixed_jumps[offset] = prev_op[next] return @@ -241,13 +311,17 @@ class Scanner30(Scanner3): if offset in self.ignore_if: return - if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE and - pre_rtarget in self.stmts and - pre_rtarget != offset and - prev_op[pre_rtarget] != offset and - not (code[rtarget] == self.opc.JUMP_ABSOLUTE and - code[rtarget+3] == self.opc.POP_BLOCK and - code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)): + if ( + code[pre_rtarget] == self.opc.JUMP_ABSOLUTE + and pre_rtarget in self.stmts + and pre_rtarget != offset + and prev_op[pre_rtarget] != offset + and not ( + code[rtarget] == self.opc.JUMP_ABSOLUTE + and code[rtarget + 3] == self.opc.POP_BLOCK + and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE + ) + ): rtarget = pre_rtarget # Does the "jump if" jump beyond a jump op? @@ -268,16 +342,17 @@ class Scanner30(Scanner3): if_end = self.get_target(pre_rtarget, 0) # If the jump target is back, we are looping - if (if_end < pre_rtarget and - (code[prev_op[if_end]] == self.opc.SETUP_LOOP)): - if (if_end > start): + if if_end < pre_rtarget and ( + code[prev_op[if_end]] == self.opc.SETUP_LOOP + ): + if if_end > start: return end = self.restrict_to_parent(if_end, parent) - self.structs.append({'type': 'if-then', - 'start': start, - 'end': pre_rtarget}) + self.structs.append( + {"type": "if-then", "start": start, "end": pre_rtarget} + ) self.not_continue.add(pre_rtarget) # if rtarget < end and ( @@ -291,20 +366,17 @@ class Scanner30(Scanner3): # self.else_start[rtarget] = end elif self.is_jump_back(pre_rtarget, 0): if_end = rtarget - self.structs.append({'type': 'if-then', - 'start': start, - 'end': pre_rtarget}) + self.structs.append( + {"type": "if-then", "start": start, "end": pre_rtarget} + ) self.not_continue.add(pre_rtarget) - elif code[pre_rtarget] in (self.opc.RETURN_VALUE, - self.opc.BREAK_LOOP): - self.structs.append({'type': 'if-then', - 'start': start, - 'end': rtarget}) + elif code[pre_rtarget] in (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP): + self.structs.append({"type": "if-then", "start": start, "end": rtarget}) # It is important to distingish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: - if self.opc.cmp_op[code[jump_prev+1]] == 'exception-match': + if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match": return if self.version >= 3.5: # Python 3.5 may remove as dead code a JUMP @@ -332,7 +404,10 @@ class Scanner30(Scanner3): if code[next_op] == self.opc.POP_TOP: next_op = rtarget for block in self.structs: - if block['type'] == 'while-loop' and block['end'] == next_op: + if ( + block["type"] == "while-loop" + and block["end"] == next_op + ): return next_op += instruction_size(self.code[next_op], self.opc) if code[next_op] == self.opc.POP_BLOCK: @@ -342,20 +417,21 @@ class Scanner30(Scanner3): self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) - elif op == self.opc.SETUP_EXCEPT: target = self.get_target(offset) - end = self.restrict_to_parent(target, parent) + end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.SETUP_FINALLY: target = self.get_target(offset) - end = self.restrict_to_parent(target, parent) + end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: target = self.get_target(offset) if target > offset: - unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target) - if unop_target and code[unop_target+3] != self.opc.ROT_TWO: + unop_target = self.last_instr( + offset, target, self.opc.JUMP_FORWARD, target + ) + if unop_target and code[unop_target + 3] != self.opc.ROT_TWO: self.fixed_jumps[offset] = unop_target else: self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) @@ -366,8 +442,11 @@ class Scanner30(Scanner3): # misclassified as RETURN_END_IF. Handle that here. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF if op == self.opc.RETURN_VALUE: - if (offset+1 < len(code) and code[offset+1] == self.opc.JUMP_ABSOLUTE and - offset in self.return_end_ifs): + if ( + offset + 1 < len(code) + and code[offset + 1] == self.opc.JUMP_ABSOLUTE + and offset in self.return_end_ifs + ): self.return_end_ifs.remove(offset) pass pass @@ -377,8 +456,10 @@ class Scanner30(Scanner3): # then RETURN_VALUE is not RETURN_END_IF rtarget = self.get_target(offset) rtarget_prev = self.prev[rtarget] - if (code[rtarget_prev] == self.opc.RETURN_VALUE and - rtarget_prev in self.return_end_ifs): + if ( + code[rtarget_prev] == self.opc.RETURN_VALUE + and rtarget_prev in self.return_end_ifs + ): i = rtarget_prev while i != offset: if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]: @@ -388,15 +469,17 @@ class Scanner30(Scanner3): pass return + if __name__ == "__main__": from uncompyle6 import PYTHON_VERSION + if PYTHON_VERSION == 3.0: import inspect + co = inspect.currentframe().f_code tokens, customize = Scanner30().ingest(co) for t in tokens: print(t) pass else: - print("Need to be Python 3.0 to demo; I am %s." % - PYTHON_VERSION) + print("Need to be Python 3.0 to demo; I am %s." % PYTHON_VERSION) diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index 4485d621..3478c6d3 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -29,8 +29,8 @@ For example: Finally we save token information. """ -from xdis import iscode -from xdis.bytecode import instruction_size, _get_const_info, Instruction +from xdis import iscode, instruction_size, Instruction +from xdis.bytecode import _get_const_info from uncompyle6.scanner import Token import xdis diff --git a/uncompyle6/semantics/customize3.py b/uncompyle6/semantics/customize3.py index 9c29e838..0a683888 100644 --- a/uncompyle6/semantics/customize3.py +++ b/uncompyle6/semantics/customize3.py @@ -17,9 +17,8 @@ """ from uncompyle6.semantics.consts import TABLE_DIRECT -from xdis.util import co_flags_is_async -from xdis import iscode +from xdis import co_flags_is_async, iscode from uncompyle6.scanner import Code from uncompyle6.semantics.helper import ( find_code_node, diff --git a/uncompyle6/semantics/customize35.py b/uncompyle6/semantics/customize35.py index 0205527b..04c13aff 100644 --- a/uncompyle6/semantics/customize35.py +++ b/uncompyle6/semantics/customize35.py @@ -15,8 +15,7 @@ """Isolate Python 3.5 version-specific semantic actions here. """ -from xdis import iscode -from xdis.util import co_flags_is_async +from xdis import co_flags_is_async, iscode from uncompyle6.semantics.consts import ( INDENT_PER_LEVEL, PRECEDENCE, diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 8b25338f..f2ca9b5b 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -135,8 +135,7 @@ import sys IS_PYPY = "__pypy__" in sys.builtin_module_names PYTHON3 = sys.version_info >= (3, 0) -from xdis import iscode -from xdis.util import COMPILER_FLAG_BIT +from xdis import iscode, COMPILER_FLAG_BIT from uncompyle6.parser import get_python_parser from uncompyle6.parsers.treenode import SyntaxTree diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 49be9f77..6d24e5d6 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -25,10 +25,9 @@ import xdis.std as dis from subprocess import call import uncompyle6 -from uncompyle6.scanner import (Token as ScannerToken, get_scanner) +from uncompyle6.scanner import Token as ScannerToken, get_scanner from uncompyle6 import PYTHON3 -from xdis import iscode, load_file, load_module, pretty_code_flags -from xdis.magics import PYTHON_MAGIC_INT +from xdis import iscode, load_file, load_module, pretty_code_flags, PYTHON_MAGIC_INT # FIXME: DRY if PYTHON3: @@ -41,63 +40,77 @@ else: def code_equal(a, b): return a.co_code == b.co_code + BIN_OP_FUNCS = { -'BINARY_POWER': operator.pow, -'BINARY_MULTIPLY': operator.mul, -'BINARY_DIVIDE': truediv, -'BINARY_FLOOR_DIVIDE': operator.floordiv, -'BINARY_TRUE_DIVIDE': operator.truediv, -'BINARY_MODULO' : operator.mod, -'BINARY_ADD': operator.add, -'BINARY_SUBRACT': operator.sub, -'BINARY_LSHIFT': operator.lshift, -'BINARY_RSHIFT': operator.rshift, -'BINARY_AND': operator.and_, -'BINARY_XOR': operator.xor, -'BINARY_OR': operator.or_, + "BINARY_POWER": operator.pow, + "BINARY_MULTIPLY": operator.mul, + "BINARY_DIVIDE": truediv, + "BINARY_FLOOR_DIVIDE": operator.floordiv, + "BINARY_TRUE_DIVIDE": operator.truediv, + "BINARY_MODULO": operator.mod, + "BINARY_ADD": operator.add, + "BINARY_SUBRACT": operator.sub, + "BINARY_LSHIFT": operator.lshift, + "BINARY_RSHIFT": operator.rshift, + "BINARY_AND": operator.and_, + "BINARY_XOR": operator.xor, + "BINARY_OR": operator.or_, } JUMP_OPS = None # --- exceptions --- + class VerifyCmpError(Exception): pass + class CmpErrorConsts(VerifyCmpError): """Exception to be raised when consts differ.""" + def __init__(self, name, index): self.name = name self.index = index def __str__(self): - return 'Compare Error within Consts of %s at index %i' % \ - (repr(self.name), self.index) + return "Compare Error within Consts of %s at index %i" % ( + repr(self.name), + self.index, + ) + class CmpErrorConstsType(VerifyCmpError): """Exception to be raised when consts differ.""" + def __init__(self, name, index): self.name = name self.index = index def __str__(self): - return 'Consts type differ in %s at index %i' % \ - (repr(self.name), self.index) + return "Consts type differ in %s at index %i" % (repr(self.name), self.index) + class CmpErrorConstsLen(VerifyCmpError): """Exception to be raised when length of co_consts differs.""" + def __init__(self, name, consts1, consts2): self.name = name self.consts = (consts1, consts2) def __str__(self): - return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \ - (repr(self.name), - len(self.consts[0]), repr(self.consts[0]), - len(self.consts[1]), repr(self.consts[1])) + return "Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n" % ( + repr(self.name), + len(self.consts[0]), + repr(self.consts[0]), + len(self.consts[1]), + repr(self.consts[1]), + ) + class CmpErrorCode(VerifyCmpError): """Exception to be raised when code differs.""" + def __init__(self, name, index, token1, token2, tokens1, tokens2): self.name = name self.index = index @@ -106,57 +119,74 @@ class CmpErrorCode(VerifyCmpError): self.tokens = [tokens1, tokens2] def __str__(self): - s = reduce(lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), - list(map(lambda a, b: (a, b), - self.tokens[0], - self.tokens[1])), - 'Code differs in %s\n' % str(self.name)) - return ('Code differs in %s at offset %s [%s] != [%s]\n\n' % - (repr(self.name), self.index, - repr(self.token1), repr(self.token2))) + s + s = reduce( + lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), + list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])), + "Code differs in %s\n" % str(self.name), + ) + return ( + "Code differs in %s at offset %s [%s] != [%s]\n\n" + % (repr(self.name), self.index, repr(self.token1), repr(self.token2)) + ) + s + class CmpErrorCodeLen(VerifyCmpError): """Exception to be raised when code length differs.""" + def __init__(self, name, tokens1, tokens2): self.name = name self.tokens = [tokens1, tokens2] def __str__(self): - return reduce(lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), - list(map(lambda a, b: (a, b), - self.tokens[0], - self.tokens[1])), - 'Code len differs in %s\n' % str(self.name)) + return reduce( + lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), + list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])), + "Code len differs in %s\n" % str(self.name), + ) + class CmpErrorMember(VerifyCmpError): """Exception to be raised when other members differ.""" + def __init__(self, name, member, data1, data2): self.name = name self.member = member self.data = (data1, data2) def __str__(self): - return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \ - (repr(self.member), repr(self.name), - repr(self.data[0]), repr(self.data[1])) + return "Member %s differs in %s:\n\t%s\n\t%s\n" % ( + repr(self.member), + repr(self.name), + repr(self.data[0]), + repr(self.data[1]), + ) + # --- compare --- # these members are ignored -__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names'] +__IGNORE_CODE_MEMBERS__ = [ + "co_filename", + "co_firstlineno", + "co_lnotab", + "co_stacksize", + "co_names", +] -def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, - name=''): + +def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) - assert iscode(code_obj1), \ - "cmp_code_object first object type is %s, not code" % type(code_obj1) - assert iscode(code_obj2), \ - "cmp_code_object second object type is %s, not code" % type(code_obj2) + assert iscode( + code_obj1 + ), "cmp_code_object first object type is %s, not code" % type(code_obj1) + assert iscode( + code_obj2 + ), "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) @@ -168,11 +198,12 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ - if name == '__main__': + if name == "__main__": name = code_obj1.co_name else: - name = '%s.%s' % (name, code_obj1.co_name) - if name == '.?': name = '__main__' + name = "%s.%s" % (name, code_obj1.co_name) + if name == ".?": + name = "__main__" if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which @@ -184,22 +215,22 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, pass if isinstance(code_obj1, object): - members = [x for x in dir(code_obj1) if x.startswith('co_')] + members = [x for x in dir(code_obj1) if x.startswith("co_")] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: - if member in __IGNORE_CODE_MEMBERS__ or verify != 'verify': + if member in __IGNORE_CODE_MEMBERS__ or verify != "verify": pass - elif member == 'co_code': - if verify != 'strong': + elif member == "co_code": + if verify != "strong": continue scanner = get_scanner(version, is_pypy, show_asm=False) global JUMP_OPS - JUMP_OPS = list(scan.JUMP_OPS) + ['JUMP_BACK'] + JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"] # use changed Token class # We (re)set this here to save exception handling, @@ -208,25 +239,29 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, try: # ingest both code-objects tokens1, customize = scanner.ingest(code_obj1) - del customize # save memory + del customize # save memory tokens2, customize = scanner.ingest(code_obj2) - del customize # save memory + del customize # save memory finally: - scanner.resetTokenClass() # restore Token class + scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) - tokens1 = [t for t in tokens1 if t.kind != 'COME_FROM'] - tokens2 = [t for t in tokens2 if t.kind != 'COME_FROM'] + tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"] + tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"] - i1 = 0; i2 = 0 - offset_map = {}; check_jumps = {} + i1 = 0 + i2 = 0 + offset_map = {} + check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): - if len(tokens1) == len(tokens2) + 2 \ - and tokens1[-1].kind == 'RETURN_VALUE' \ - and tokens1[-2].kind == 'LOAD_CONST' \ - and tokens1[-2].pattr is None \ - and tokens1[-3].kind == 'RETURN_VALUE': + if ( + len(tokens1) == len(tokens2) + 2 + and tokens1[-1].kind == "RETURN_VALUE" + and tokens1[-2].kind == "LOAD_CONST" + and tokens1[-2].pattr is None + and tokens1[-3].kind == "RETURN_VALUE" + ): break else: raise CmpErrorCodeLen(name, tokens1, tokens2) @@ -235,87 +270,144 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: - raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], - tokens2[idx2], tokens1, tokens2) + raise CmpErrorCode( + name, + tokens1[idx1].offset, + tokens1[idx1], + tokens2[idx2], + tokens1, + tokens2, + ) if tokens1[i1].kind != tokens2[i2].kind: - if tokens1[i1].kind == 'LOAD_CONST' == tokens2[i2].kind: + if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind: i = 1 - while tokens1[i1+i].kind == 'LOAD_CONST': + while tokens1[i1 + i].kind == "LOAD_CONST": i += 1 - if tokens1[i1+i].kind.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ - and i == int(tokens1[i1+i].kind.split('_')[-1]): - t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ]) + if tokens1[i1 + i].kind.startswith( + ("BUILD_TUPLE", "BUILD_LIST") + ) and i == int(tokens1[i1 + i].kind.split("_")[-1]): + t = tuple([elem.pattr for elem in tokens1[i1 : i1 + i]]) if t != tokens2[i2].pattr: - raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], - tokens2[i2], tokens1, tokens2) + raise CmpErrorCode( + name, + tokens1[i1].offset, + tokens1[i1], + tokens2[i2], + tokens1, + tokens2, + ) i1 += i + 1 i2 += 1 continue - elif i == 2 and tokens1[i1+i].kind == 'ROT_TWO' and tokens2[i2+1].kind == 'UNPACK_SEQUENCE_2': + elif ( + i == 2 + and tokens1[i1 + i].kind == "ROT_TWO" + and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2" + ): i1 += 3 i2 += 2 continue - elif i == 2 and tokens1[i1+i].kind in BIN_OP_FUNCS: - f = BIN_OP_FUNCS[tokens1[i1+i].kind] - if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr: + elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS: + f = BIN_OP_FUNCS[tokens1[i1 + i].kind] + if ( + f(tokens1[i1].pattr, tokens1[i1 + 1].pattr) + == tokens2[i2].pattr + ): i1 += 3 i2 += 1 continue - elif tokens1[i1].kind == 'UNARY_NOT': - if tokens2[i2].kind == 'POP_JUMP_IF_TRUE': - if tokens1[i1+1].kind == 'POP_JUMP_IF_FALSE': + elif tokens1[i1].kind == "UNARY_NOT": + if tokens2[i2].kind == "POP_JUMP_IF_TRUE": + if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE": i1 += 2 i2 += 1 continue - elif tokens2[i2].kind == 'POP_JUMP_IF_FALSE': - if tokens1[i1+1].kind == 'POP_JUMP_IF_TRUE': + elif tokens2[i2].kind == "POP_JUMP_IF_FALSE": + if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE": i1 += 2 i2 += 1 continue - elif tokens1[i1].kind in ('JUMP_FORWARD', 'JUMP_BACK') \ - and tokens1[i1-1].kind == 'RETURN_VALUE' \ - and tokens2[i2-1].kind in ('RETURN_VALUE', 'RETURN_END_IF') \ - and int(tokens1[i1].offset) not in targets1: + elif ( + tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK") + and tokens1[i1 - 1].kind == "RETURN_VALUE" + and tokens2[i2 - 1].kind in ("RETURN_VALUE", "RETURN_END_IF") + and int(tokens1[i1].offset) not in targets1 + ): i1 += 1 continue - elif tokens1[i1].kind == 'JUMP_BACK' and tokens2[i2].kind == 'CONTINUE': + elif ( + tokens1[i1].kind == "JUMP_BACK" + and tokens2[i2].kind == "CONTINUE" + ): # FIXME: should make sure that offset is inside loop, not outside of it i1 += 2 i2 += 2 continue - elif tokens1[i1].kind == 'JUMP_FORWARD' and tokens2[i2].kind == 'JUMP_BACK' \ - and tokens1[i1+1].kind == 'JUMP_BACK' and tokens2[i2+1].kind == 'JUMP_BACK' \ - and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: - if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset): + elif ( + tokens1[i1].kind == "JUMP_FORWARD" + and tokens2[i2].kind == "JUMP_BACK" + and tokens1[i1 + 1].kind == "JUMP_BACK" + and tokens2[i2 + 1].kind == "JUMP_BACK" + and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3 + ): + if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset): i1 += 2 i2 += 2 continue - elif tokens1[i1].kind == 'LOAD_NAME' and tokens2[i2].kind == 'LOAD_CONST' \ - and tokens1[i1].pattr == 'None' and tokens2[i2].pattr is None: + elif ( + tokens1[i1].kind == "LOAD_NAME" + and tokens2[i2].kind == "LOAD_CONST" + and tokens1[i1].pattr == "None" + and tokens2[i2].pattr is None + ): pass - elif tokens1[i1].kind == 'LOAD_GLOBAL' and tokens2[i2].kind == 'LOAD_NAME' \ - and tokens1[i1].pattr == tokens2[i2].pattr: + elif ( + tokens1[i1].kind == "LOAD_GLOBAL" + and tokens2[i2].kind == "LOAD_NAME" + and tokens1[i1].pattr == tokens2[i2].pattr + ): pass - elif tokens1[i1].kind == 'LOAD_ASSERT' and tokens2[i2].kind == 'LOAD_NAME' \ - and tokens1[i1].pattr == tokens2[i2].pattr: + elif ( + tokens1[i1].kind == "LOAD_ASSERT" + and tokens2[i2].kind == "LOAD_NAME" + and tokens1[i1].pattr == tokens2[i2].pattr + ): pass - elif (tokens1[i1].kind == 'RETURN_VALUE' and - tokens2[i2].kind == 'RETURN_END_IF'): + elif ( + tokens1[i1].kind == "RETURN_VALUE" + and tokens2[i2].kind == "RETURN_END_IF" + ): pass - elif (tokens1[i1].kind == 'BUILD_TUPLE_0' and - tokens2[i2].pattr == ()): + elif ( + tokens1[i1].kind == "BUILD_TUPLE_0" and tokens2[i2].pattr == () + ): pass else: - raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], - tokens2[i2], tokens1, tokens2) - elif tokens1[i1].kind in JUMP_OPS and tokens1[i1].pattr != tokens2[i2].pattr: - if tokens1[i1].kind == 'JUMP_BACK': + raise CmpErrorCode( + name, + tokens1[i1].offset, + tokens1[i1], + tokens2[i2], + tokens1, + tokens2, + ) + elif ( + tokens1[i1].kind in JUMP_OPS + and tokens1[i1].pattr != tokens2[i2].pattr + ): + if tokens1[i1].kind == "JUMP_BACK": dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if offset_map[dest1] != dest2: - raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], - tokens2[i2], tokens1, tokens2) + raise CmpErrorCode( + name, + tokens1[i1].offset, + tokens1[i1], + tokens2[i2], + tokens1, + tokens2, + ) else: # import pdb; pdb.set_trace() try: @@ -329,71 +421,84 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, i1 += 1 i2 += 1 - del tokens1, tokens2 # save memory - elif member == 'co_consts': + del tokens1, tokens2 # save memory + elif member == "co_consts": # partial optimization can make the co_consts look different, # so we'll just compare the code consts - codes1 = ( c for c in code_obj1.co_consts if hasattr(c, 'co_consts') ) - codes2 = ( c for c in code_obj2.co_consts if hasattr(c, 'co_consts') ) + codes1 = (c for c in code_obj1.co_consts if hasattr(c, "co_consts")) + codes2 = (c for c in code_obj2.co_consts if hasattr(c, "co_consts")) for c1, c2 in zip(codes1, codes2): - cmp_code_objects(version, is_pypy, c1, c2, verify, - name=name) - elif member == 'co_flags': + cmp_code_objects(version, is_pypy, c1, c2, verify, name=name) + elif member == "co_flags": flags1 = code_obj1.co_flags flags2 = code_obj2.co_flags if is_pypy: # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8: flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8 # We also don't care about COROUTINE or GENERATOR for now - flags1 &= ~0x000000a0 - flags2 &= ~0x000000a0 + flags1 &= ~0x000000A0 + flags2 &= ~0x000000A0 if flags1 != flags2: - raise CmpErrorMember(name, 'co_flags', - pretty_code_flags(flags1), - pretty_code_flags(flags2)) + raise CmpErrorMember( + name, + "co_flags", + pretty_code_flags(flags1), + pretty_code_flags(flags2), + ) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): - raise CmpErrorMember(name, member, - getattr(code_obj1, member), - getattr(code_obj2, member)) + raise CmpErrorMember( + name, member, getattr(code_obj1, member), getattr(code_obj2, member) + ) + class Token(ScannerToken): """Token class with changed semantics for 'cmp()'.""" + def __cmp__(self, o): - t = self.kind # shortcut - if t == 'BUILD_TUPLE_0' and o.kind == 'LOAD_CONST' and o.pattr == (): + t = self.kind # shortcut + if t == "BUILD_TUPLE_0" and o.kind == "LOAD_CONST" and o.pattr == (): return 0 - if t == 'COME_FROM' == o.kind: + if t == "COME_FROM" == o.kind: return 0 - if t == 'PRINT_ITEM_CONT' and o.kind == 'PRINT_ITEM': + if t == "PRINT_ITEM_CONT" and o.kind == "PRINT_ITEM": return 0 - if t == 'RETURN_VALUE' and o.kind == 'RETURN_END_IF': + if t == "RETURN_VALUE" and o.kind == "RETURN_END_IF": return 0 - if t == 'JUMP_IF_FALSE_OR_POP' and o.kind == 'POP_JUMP_IF_FALSE': + if t == "JUMP_IF_FALSE_OR_POP" and o.kind == "POP_JUMP_IF_FALSE": return 0 if JUMP_OPS and t in JUMP_OPS: # ignore offset return t == o.kind - return (t == o.kind) or self.pattr == o.pattr + return (t == o.kind) or self.pattr == o.pattr def __repr__(self): - return '%s %s (%s)' % (str(self.kind), str(self.attr), - repr(self.pattr)) + return "%s %s (%s)" % (str(self.kind), str(self.attr), repr(self.pattr)) def __str__(self): - return '%s\t%-17s %r' % (self.offset, self.kind, self.pattr) + return "%s\t%-17s %r" % (self.offset, self.kind, self.pattr) + def compare_code_with_srcfile(pyc_filename, src_filename, verify): """Compare a .pyc with a source code file. If everything is okay, None is returned. Otherwise a string message describing the mismatch is returned. """ - (version, timestamp, magic_int, code_obj1, is_pypy, - source_size, sip_hash) = load_module(pyc_filename) + ( + version, + timestamp, + magic_int, + code_obj1, + is_pypy, + source_size, + sip_hash, + ) = load_module(pyc_filename) if magic_int != PYTHON_MAGIC_INT: - msg = ("Can't compare code - Python is running with magic %s, but code is magic %s " - % (PYTHON_MAGIC_INT, magic_int)) + msg = ( + "Can't compare code - Python is running with magic %s, but code is magic %s " + % (PYTHON_MAGIC_INT, magic_int) + ) return msg try: code_obj2 = load_file(src_filename) @@ -401,7 +506,7 @@ def compare_code_with_srcfile(pyc_filename, src_filename, verify): # src_filename can be the first of a group sometimes return str(e).replace(src_filename, pyc_filename) cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify) - if verify == 'verify-run': + if verify == "verify-run": try: retcode = call("%s %s" % (sys.executable, src_filename), shell=True) if retcode != 0: @@ -412,19 +517,35 @@ def compare_code_with_srcfile(pyc_filename, src_filename, verify): pass return None + def compare_files(pyc_filename1, pyc_filename2, verify): """Compare two .pyc files.""" - (version1, timestamp, magic_int1, code_obj1, is_pypy, - source_size, sip_hash) = uncompyle6.load_module(pyc_filename1) - (version2, timestamp, magic_int2, code_obj2, is_pypy, - source_size, sip_hash) = uncompyle6.load_module(pyc_filename2) - if (magic_int1 != magic_int2) and verify == 'verify': - verify = 'weak_verify' + ( + version1, + timestamp, + magic_int1, + code_obj1, + is_pypy, + source_size, + sip_hash, + ) = uncompyle6.load_module(pyc_filename1) + ( + version2, + timestamp, + magic_int2, + code_obj2, + is_pypy, + source_size, + sip_hash, + ) = uncompyle6.load_module(pyc_filename2) + if (magic_int1 != magic_int2) and verify == "verify": + verify = "weak_verify" cmp_code_objects(version1, is_pypy, code_obj1, code_obj2, verify) -if __name__ == '__main__': - t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52) - t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55) + +if __name__ == "__main__": + t1 = Token("LOAD_CONST", None, "code_object _expandLang", 52) + t2 = Token("LOAD_CONST", -421, "code_object _expandLang", 55) print(repr(t1)) print(repr(t2)) - print(t1.kind == t2.kind, t1.attr == t2.attr) + print(t1.kind == t2.kind, t1.attr == t2.attr) diff --git a/uncompyle6/version.py b/uncompyle6/version.py index 537c8d3d..41ef8334 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -12,4 +12,4 @@ # along with this program. If not, see . # This file is suitable for sourcing inside POSIX shell as # well as importing into Python -VERSION="3.6.7" # noqa +VERSION="3.7.0" # noqa