mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2024-11-23 13:09:49 +00:00
547 lines
19 KiB
Python
Executable File
547 lines
19 KiB
Python
Executable File
#
|
|
# (C) Copyright 2015-2018, 2020-2021 by Rocky Bernstein
|
|
# (C) Copyright 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
byte-code verification
|
|
"""
|
|
|
|
from __future__ import print_function
|
|
|
|
import operator, sys
|
|
import xdis.std as dis
|
|
from subprocess import call
|
|
|
|
import uncompyle6
|
|
from uncompyle6.scanner import Token as ScannerToken, get_scanner
|
|
from xdis import iscode, load_file, load_module, pretty_code_flags, PYTHON_MAGIC_INT
|
|
|
|
truediv = operator.truediv
|
|
from functools import reduce
|
|
|
|
|
|
def code_equal(a, b):
|
|
return a.co_code == b.co_code
|
|
|
|
|
|
BIN_OP_FUNCS = {
|
|
"BINARY_POWER": operator.pow,
|
|
"BINARY_MULTIPLY": operator.mul,
|
|
"BINARY_DIVIDE": truediv,
|
|
"BINARY_FLOOR_DIVIDE": operator.floordiv,
|
|
"BINARY_TRUE_DIVIDE": operator.truediv,
|
|
"BINARY_MODULO": operator.mod,
|
|
"BINARY_ADD": operator.add,
|
|
"BINARY_SUBRACT": operator.sub,
|
|
"BINARY_LSHIFT": operator.lshift,
|
|
"BINARY_RSHIFT": operator.rshift,
|
|
"BINARY_AND": operator.and_,
|
|
"BINARY_XOR": operator.xor,
|
|
"BINARY_OR": operator.or_,
|
|
}
|
|
|
|
JUMP_OPS = None
|
|
|
|
# --- exceptions ---
|
|
|
|
|
|
class VerifyCmpError(Exception):
|
|
pass
|
|
|
|
|
|
class CmpErrorConsts(VerifyCmpError):
|
|
"""Exception to be raised when consts differ."""
|
|
|
|
def __init__(self, name, index):
|
|
self.name = name
|
|
self.index = index
|
|
|
|
def __str__(self):
|
|
return "Compare Error within Consts of %s at index %i" % (
|
|
repr(self.name),
|
|
self.index,
|
|
)
|
|
|
|
|
|
class CmpErrorConstsType(VerifyCmpError):
|
|
"""Exception to be raised when consts differ."""
|
|
|
|
def __init__(self, name, index):
|
|
self.name = name
|
|
self.index = index
|
|
|
|
def __str__(self):
|
|
return "Consts type differ in %s at index %i" % (repr(self.name), self.index)
|
|
|
|
|
|
class CmpErrorConstsLen(VerifyCmpError):
|
|
"""Exception to be raised when length of co_consts differs."""
|
|
|
|
def __init__(self, name, consts1, consts2):
|
|
self.name = name
|
|
self.consts = (consts1, consts2)
|
|
|
|
def __str__(self):
|
|
return "Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n" % (
|
|
repr(self.name),
|
|
len(self.consts[0]),
|
|
repr(self.consts[0]),
|
|
len(self.consts[1]),
|
|
repr(self.consts[1]),
|
|
)
|
|
|
|
|
|
class CmpErrorCode(VerifyCmpError):
|
|
"""Exception to be raised when code differs."""
|
|
|
|
def __init__(self, name, index, token1, token2, tokens1, tokens2):
|
|
self.name = name
|
|
self.index = index
|
|
self.token1 = token1
|
|
self.token2 = token2
|
|
self.tokens = [tokens1, tokens2]
|
|
|
|
def __str__(self):
|
|
s = reduce(
|
|
lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
|
|
list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])),
|
|
"Code differs in %s\n" % str(self.name),
|
|
)
|
|
return (
|
|
"Code differs in %s at offset %s [%s] != [%s]\n\n"
|
|
% (repr(self.name), self.index, repr(self.token1), repr(self.token2))
|
|
) + s
|
|
|
|
|
|
class CmpErrorCodeLen(VerifyCmpError):
|
|
"""Exception to be raised when code length differs."""
|
|
|
|
def __init__(self, name, tokens1, tokens2):
|
|
self.name = name
|
|
self.tokens = [tokens1, tokens2]
|
|
|
|
def __str__(self):
|
|
return reduce(
|
|
lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
|
|
list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])),
|
|
"Code len differs in %s\n" % str(self.name),
|
|
)
|
|
|
|
|
|
class CmpErrorMember(VerifyCmpError):
|
|
"""Exception to be raised when other members differ."""
|
|
|
|
def __init__(self, name, member, data1, data2):
|
|
self.name = name
|
|
self.member = member
|
|
self.data = (data1, data2)
|
|
|
|
def __str__(self):
|
|
return "Member %s differs in %s:\n\t%s\n\t%s\n" % (
|
|
repr(self.member),
|
|
repr(self.name),
|
|
repr(self.data[0]),
|
|
repr(self.data[1]),
|
|
)
|
|
|
|
|
|
# --- compare ---
|
|
|
|
# these members are ignored
|
|
__IGNORE_CODE_MEMBERS__ = [
|
|
"co_filename",
|
|
"co_firstlineno",
|
|
"co_lnotab",
|
|
"co_stacksize",
|
|
"co_names",
|
|
]
|
|
|
|
|
|
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""):
|
|
"""
|
|
Compare two code-objects.
|
|
|
|
This is the main part of this module.
|
|
"""
|
|
# print code_obj1, type(code_obj2)
|
|
assert iscode(
|
|
code_obj1
|
|
), "cmp_code_object first object type is %s, not code" % type(code_obj1)
|
|
assert iscode(
|
|
code_obj2
|
|
), "cmp_code_object second object type is %s, not code" % type(code_obj2)
|
|
# print dir(code_obj1)
|
|
if isinstance(code_obj1, object):
|
|
# new style classes (Python 2.2)
|
|
# assume _both_ code objects to be new stle classes
|
|
assert dir(code_obj1) == dir(code_obj2)
|
|
else:
|
|
# old style classes
|
|
assert dir(code_obj1) == code_obj1.__members__
|
|
assert dir(code_obj2) == code_obj2.__members__
|
|
assert code_obj1.__members__ == code_obj2.__members__
|
|
|
|
if name == "__main__":
|
|
name = code_obj1.co_name
|
|
else:
|
|
name = "%s.%s" % (name, code_obj1.co_name)
|
|
if name == ".?":
|
|
name = "__main__"
|
|
|
|
if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
|
|
# use the new style code-classes' __cmp__ method, which
|
|
# should be faster and more sophisticated
|
|
# if this compare fails, we use the old routine to
|
|
# find out, what exactly is nor equal
|
|
# if this compare succeds, simply return
|
|
# return
|
|
pass
|
|
|
|
if isinstance(code_obj1, object):
|
|
members = [x for x in dir(code_obj1) if x.startswith("co_")]
|
|
else:
|
|
members = dir(code_obj1)
|
|
members.sort() # ; members.reverse()
|
|
|
|
tokens1 = None
|
|
for member in members:
|
|
if member in __IGNORE_CODE_MEMBERS__ or verify != "verify":
|
|
pass
|
|
elif member == "co_code":
|
|
if verify != "strong":
|
|
continue
|
|
scanner = get_scanner(version, is_pypy, show_asm=False)
|
|
|
|
global JUMP_OPS
|
|
JUMP_OPS = list(JUMP_OPS) + ["JUMP_BACK"]
|
|
|
|
# use changed Token class
|
|
# We (re)set this here to save exception handling,
|
|
# which would get confusing.
|
|
scanner.setTokenClass(Token)
|
|
try:
|
|
# ingest both code-objects
|
|
tokens1, customize = scanner.ingest(code_obj1)
|
|
del customize # save memory
|
|
tokens2, customize = scanner.ingest(code_obj2)
|
|
del customize # save memory
|
|
finally:
|
|
scanner.resetTokenClass() # restore Token class
|
|
|
|
targets1 = dis.findlabels(code_obj1.co_code)
|
|
tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"]
|
|
tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"]
|
|
|
|
i1 = 0
|
|
i2 = 0
|
|
offset_map = {}
|
|
check_jumps = {}
|
|
while i1 < len(tokens1):
|
|
if i2 >= len(tokens2):
|
|
if (
|
|
len(tokens1) == len(tokens2) + 2
|
|
and tokens1[-1].kind == "RETURN_VALUE"
|
|
and tokens1[-2].kind == "LOAD_CONST"
|
|
and tokens1[-2].pattr is None
|
|
and tokens1[-3].kind == "RETURN_VALUE"
|
|
):
|
|
break
|
|
else:
|
|
raise CmpErrorCodeLen(name, tokens1, tokens2)
|
|
|
|
offset_map[tokens1[i1].offset] = tokens2[i2].offset
|
|
|
|
for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
|
|
if offset2 != tokens2[i2].offset:
|
|
raise CmpErrorCode(
|
|
name,
|
|
tokens1[idx1].offset,
|
|
tokens1[idx1],
|
|
tokens2[idx2],
|
|
tokens1,
|
|
tokens2,
|
|
)
|
|
|
|
if tokens1[i1].kind != tokens2[i2].kind:
|
|
if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind:
|
|
i = 1
|
|
while tokens1[i1 + i].kind == "LOAD_CONST":
|
|
i += 1
|
|
if tokens1[i1 + i].kind.startswith(
|
|
("BUILD_TUPLE", "BUILD_LIST")
|
|
) and i == int(tokens1[i1 + i].kind.split("_")[-1]):
|
|
t = tuple([elem.pattr for elem in tokens1[i1 : i1 + i]])
|
|
if t != tokens2[i2].pattr:
|
|
raise CmpErrorCode(
|
|
name,
|
|
tokens1[i1].offset,
|
|
tokens1[i1],
|
|
tokens2[i2],
|
|
tokens1,
|
|
tokens2,
|
|
)
|
|
i1 += i + 1
|
|
i2 += 1
|
|
continue
|
|
elif (
|
|
i == 2
|
|
and tokens1[i1 + i].kind == "ROT_TWO"
|
|
and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"
|
|
):
|
|
i1 += 3
|
|
i2 += 2
|
|
continue
|
|
elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
|
|
f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
|
|
if (
|
|
f(tokens1[i1].pattr, tokens1[i1 + 1].pattr)
|
|
== tokens2[i2].pattr
|
|
):
|
|
i1 += 3
|
|
i2 += 1
|
|
continue
|
|
elif tokens1[i1].kind == "UNARY_NOT":
|
|
if tokens2[i2].kind == "POP_JUMP_IF_TRUE":
|
|
if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE":
|
|
i1 += 2
|
|
i2 += 1
|
|
continue
|
|
elif tokens2[i2].kind == "POP_JUMP_IF_FALSE":
|
|
if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE":
|
|
i1 += 2
|
|
i2 += 1
|
|
continue
|
|
elif (
|
|
tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK")
|
|
and tokens1[i1 - 1].kind == "RETURN_VALUE"
|
|
and tokens2[i2 - 1].kind in ("RETURN_VALUE", "RETURN_END_IF")
|
|
and int(tokens1[i1].offset) not in targets1
|
|
):
|
|
i1 += 1
|
|
continue
|
|
elif (
|
|
tokens1[i1].kind == "JUMP_BACK"
|
|
and tokens2[i2].kind == "CONTINUE"
|
|
):
|
|
# FIXME: should make sure that offset is inside loop, not outside of it
|
|
i1 += 2
|
|
i2 += 2
|
|
continue
|
|
elif (
|
|
tokens1[i1].kind == "JUMP_FORWARD"
|
|
and tokens2[i2].kind == "JUMP_BACK"
|
|
and tokens1[i1 + 1].kind == "JUMP_BACK"
|
|
and tokens2[i2 + 1].kind == "JUMP_BACK"
|
|
and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3
|
|
):
|
|
if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset):
|
|
i1 += 2
|
|
i2 += 2
|
|
continue
|
|
elif (
|
|
tokens1[i1].kind == "LOAD_NAME"
|
|
and tokens2[i2].kind == "LOAD_CONST"
|
|
and tokens1[i1].pattr == "None"
|
|
and tokens2[i2].pattr is None
|
|
):
|
|
pass
|
|
elif (
|
|
tokens1[i1].kind == "LOAD_GLOBAL"
|
|
and tokens2[i2].kind == "LOAD_NAME"
|
|
and tokens1[i1].pattr == tokens2[i2].pattr
|
|
):
|
|
pass
|
|
elif (
|
|
tokens1[i1].kind == "LOAD_ASSERT"
|
|
and tokens2[i2].kind == "LOAD_NAME"
|
|
and tokens1[i1].pattr == tokens2[i2].pattr
|
|
):
|
|
pass
|
|
elif (
|
|
tokens1[i1].kind == "RETURN_VALUE"
|
|
and tokens2[i2].kind == "RETURN_END_IF"
|
|
):
|
|
pass
|
|
elif (
|
|
tokens1[i1].kind == "BUILD_TUPLE_0" and tokens2[i2].pattr == ()
|
|
):
|
|
pass
|
|
else:
|
|
raise CmpErrorCode(
|
|
name,
|
|
tokens1[i1].offset,
|
|
tokens1[i1],
|
|
tokens2[i2],
|
|
tokens1,
|
|
tokens2,
|
|
)
|
|
elif (
|
|
tokens1[i1].kind in JUMP_OPS
|
|
and tokens1[i1].pattr != tokens2[i2].pattr
|
|
):
|
|
if tokens1[i1].kind == "JUMP_BACK":
|
|
dest1 = int(tokens1[i1].pattr)
|
|
dest2 = int(tokens2[i2].pattr)
|
|
if offset_map[dest1] != dest2:
|
|
raise CmpErrorCode(
|
|
name,
|
|
tokens1[i1].offset,
|
|
tokens1[i1],
|
|
tokens2[i2],
|
|
tokens1,
|
|
tokens2,
|
|
)
|
|
else:
|
|
# import pdb; pdb.set_trace()
|
|
try:
|
|
dest1 = int(tokens1[i1].pattr)
|
|
if dest1 in check_jumps:
|
|
check_jumps[dest1].append((i1, i2, dest2))
|
|
else:
|
|
check_jumps[dest1] = [(i1, i2, dest2)]
|
|
except:
|
|
pass
|
|
|
|
i1 += 1
|
|
i2 += 1
|
|
del tokens1, tokens2 # save memory
|
|
elif member == "co_consts":
|
|
# partial optimization can make the co_consts look different,
|
|
# so we'll just compare the code consts
|
|
codes1 = (c for c in code_obj1.co_consts if hasattr(c, "co_consts"))
|
|
codes2 = (c for c in code_obj2.co_consts if hasattr(c, "co_consts"))
|
|
|
|
for c1, c2 in zip(codes1, codes2):
|
|
cmp_code_objects(version, is_pypy, c1, c2, verify, name=name)
|
|
elif member == "co_flags":
|
|
flags1 = code_obj1.co_flags
|
|
flags2 = code_obj2.co_flags
|
|
if is_pypy:
|
|
# For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
|
|
flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8
|
|
# We also don't care about COROUTINE or GENERATOR for now
|
|
flags1 &= ~0x000000A0
|
|
flags2 &= ~0x000000A0
|
|
if flags1 != flags2:
|
|
raise CmpErrorMember(
|
|
name,
|
|
"co_flags",
|
|
pretty_code_flags(flags1),
|
|
pretty_code_flags(flags2),
|
|
)
|
|
else:
|
|
# all other members must be equal
|
|
if getattr(code_obj1, member) != getattr(code_obj2, member):
|
|
raise CmpErrorMember(
|
|
name, member, getattr(code_obj1, member), getattr(code_obj2, member)
|
|
)
|
|
|
|
|
|
class Token(ScannerToken):
|
|
"""Token class with changed semantics for 'cmp()'."""
|
|
|
|
def __cmp__(self, o):
|
|
t = self.kind # shortcut
|
|
if t == "BUILD_TUPLE_0" and o.kind == "LOAD_CONST" and o.pattr == ():
|
|
return 0
|
|
if t == "COME_FROM" == o.kind:
|
|
return 0
|
|
if t == "PRINT_ITEM_CONT" and o.kind == "PRINT_ITEM":
|
|
return 0
|
|
if t == "RETURN_VALUE" and o.kind == "RETURN_END_IF":
|
|
return 0
|
|
if t == "JUMP_IF_FALSE_OR_POP" and o.kind == "POP_JUMP_IF_FALSE":
|
|
return 0
|
|
if JUMP_OPS and t in JUMP_OPS:
|
|
# ignore offset
|
|
return t == o.kind
|
|
return (t == o.kind) or self.pattr == o.pattr
|
|
|
|
def __repr__(self):
|
|
return "%s %s (%s)" % (str(self.kind), str(self.attr), repr(self.pattr))
|
|
|
|
def __str__(self):
|
|
return "%s\t%-17s %r" % (self.offset, self.kind, self.pattr)
|
|
|
|
|
|
def compare_code_with_srcfile(pyc_filename, src_filename, verify):
|
|
"""Compare a .pyc with a source code file. If everything is okay, None
|
|
is returned. Otherwise a string message describing the mismatch is returned.
|
|
"""
|
|
(
|
|
version,
|
|
timestamp,
|
|
magic_int,
|
|
code_obj1,
|
|
is_pypy,
|
|
source_size,
|
|
sip_hash,
|
|
) = load_module(pyc_filename)
|
|
if magic_int != PYTHON_MAGIC_INT:
|
|
msg = (
|
|
"Can't compare code - Python is running with magic %s, but code is magic %s "
|
|
% (PYTHON_MAGIC_INT, magic_int)
|
|
)
|
|
return msg
|
|
try:
|
|
code_obj2 = load_file(src_filename)
|
|
except SyntaxError as e:
|
|
# src_filename can be the first of a group sometimes
|
|
return str(e).replace(src_filename, pyc_filename)
|
|
cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify)
|
|
if verify == "verify-run":
|
|
try:
|
|
retcode = call("%s %s" % (sys.executable, src_filename), shell=True)
|
|
if retcode != 0:
|
|
return "Child was terminated by signal %d" % retcode
|
|
pass
|
|
except OSError as e:
|
|
return "Execution failed: %s" % e
|
|
pass
|
|
return None
|
|
|
|
|
|
def compare_files(pyc_filename1, pyc_filename2, verify):
|
|
"""Compare two .pyc files."""
|
|
(
|
|
version1,
|
|
timestamp,
|
|
magic_int1,
|
|
code_obj1,
|
|
is_pypy,
|
|
source_size,
|
|
sip_hash,
|
|
) = uncompyle6.load_module(pyc_filename1)
|
|
(
|
|
version2,
|
|
timestamp,
|
|
magic_int2,
|
|
code_obj2,
|
|
is_pypy,
|
|
source_size,
|
|
sip_hash,
|
|
) = uncompyle6.load_module(pyc_filename2)
|
|
if (magic_int1 != magic_int2) and verify == "verify":
|
|
verify = "weak_verify"
|
|
cmp_code_objects(version1, is_pypy, code_obj1, code_obj2, verify)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
t1 = Token("LOAD_CONST", None, "code_object _expandLang", 52)
|
|
t2 = Token("LOAD_CONST", -421, "code_object _expandLang", 55)
|
|
print(repr(t1))
|
|
print(repr(t2))
|
|
print(t1.kind == t2.kind, t1.attr == t2.attr)
|