Name phases "disassembly" and "tokenization"

This commit is contained in:
rocky 2024-03-14 15:31:53 -04:00
parent b88af23406
commit bec88e4aaa
3 changed files with 25 additions and 20 deletions

View File

@ -1,4 +1,4 @@
# Copyright (c) 2015-2023 by Rocky Bernstein
# Copyright (c) 2015-2024 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
@ -36,13 +36,13 @@ Finally we save token information.
from __future__ import print_function
from copy import copy
from xdis import code2num, iscode, op_has_argument, instruction_size
from xdis.bytecode import _get_const_info
from uncompyle6.scanner import Scanner, Token
from sys import intern
from xdis import code2num, instruction_size, iscode, op_has_argument
from xdis.bytecode import _get_const_info
from uncompyle6.scanner import Scanner, Token
class Scanner2(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False):
@ -206,7 +206,7 @@ class Scanner2(Scanner):
bytecode = self.build_instructions(co)
if show_asm in ("both", "before"):
print("\n# ---- before tokenization:")
print("\n# ---- disassembly:")
bytecode.disassemble_bytes(
co.co_code,
varnames=co.co_varnames,
@ -235,7 +235,6 @@ class Scanner2(Scanner):
# 'LOAD_ASSERT' is used in assert statements.
self.load_asserts = set()
for i in self.op_range(0, codelen):
# We need to detect the difference between:
# raise AssertionError
# and
@ -327,9 +326,14 @@ class Scanner2(Scanner):
"BUILD_SET",
):
t = Token(
op_name, oparg, pattr, offset,
op_name,
oparg,
pattr,
offset,
self.linestarts.get(offset, None),
op, has_arg, self.opc
op,
has_arg,
self.opc,
)
collection_type = op_name.split("_")[1]
next_tokens = self.bound_collection_from_tokens(
@ -490,7 +494,7 @@ class Scanner2(Scanner):
pass
if show_asm in ("both", "after"):
print("\n# ---- after tokenization:")
print("\n# ---- tokenization:")
for t in new_tokens:
print(t.format(line_prefix=""))
print()
@ -540,14 +544,17 @@ class Scanner2(Scanner):
for s in stmt_list:
if code[s] == self.opc.JUMP_ABSOLUTE and s not in pass_stmts:
target = self.get_target(s)
if target > s or (self.lines and self.lines[last_stmt].l_no == self.lines[s].l_no):
if target > s or (
self.lines and self.lines[last_stmt].l_no == self.lines[s].l_no
):
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == self.opc.JUMP_ABSOLUTE:
j = self.prev[j]
if (
self.version >= (2, 3) and self.opname_for_offset(j) == "LIST_APPEND"
self.version >= (2, 3)
and self.opname_for_offset(j) == "LIST_APPEND"
): # list comprehension
stmts.remove(s)
continue
@ -924,7 +931,6 @@ class Scanner2(Scanner):
# Is it an "and" inside an "if" or "while" block
if op == self.opc.PJIF:
# Search for other POP_JUMP_IF_...'s targeting the
# same target, of the current POP_JUMP_... instruction,
# starting from current offset, and filter everything inside inner 'or'
@ -1116,7 +1122,6 @@ class Scanner2(Scanner):
# Is this a loop and not an "if" statement?
if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets):
if if_end > start:
return
else:

View File

@ -1,4 +1,4 @@
# Copyright (c) 2015-2017, 2021-2022 by Rocky Bernstein
# Copyright (c) 2015-2017, 2021-2022, 2024 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
#
@ -80,7 +80,7 @@ class Scanner26(scan.Scanner2):
# show_asm = 'after'
if show_asm in ("both", "before"):
print("\n# ---- before tokenization:")
print("\n# ---- disassembly:")
for instr in bytecode.get_instructions(co):
print(instr.disassemble(self.opc))
@ -346,7 +346,7 @@ class Scanner26(scan.Scanner2):
pass
if show_asm in ("both", "after"):
print("\n# ---- after tokenization:")
print("\n# ---- tokenization:")
for t in tokens:
print(t.format(line_prefix=""))
print()

View File

@ -418,7 +418,7 @@ class Scanner3(Scanner):
# show_asm = 'both'
if show_asm in ("both", "before"):
print("\n# ---- before tokenization:")
print("\n# ---- disassembly:")
bytecode.disassemble_bytes(
co.co_code,
varnames=co.co_varnames,
@ -788,7 +788,7 @@ class Scanner3(Scanner):
pass
if show_asm in ("both", "after"):
print("\n# ---- after tokenization:")
print("\n# ---- tokenization:")
for t in new_tokens:
print(t.format(line_prefix=""))
print()