mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-26 23:10:35 +00:00
Python Linting and Code Formatting (#298)
* Create common print_diff function * Add pylint and black * Fix linting, move classes to utils * Add black/pylint to github actions * Fix linting * Move Bin and SymInfo into their own files * Split out format * Tidy up workdlows and pip, add readme * Lint tests, add tests to readme
This commit is contained in:
parent
fb0d1ccb62
commit
b14116cc93
5
.github/workflows/build.yml
vendored
5
.github/workflows/build.yml
vendored
@ -70,15 +70,14 @@ jobs:
|
||||
path: legobin
|
||||
key: legobin
|
||||
|
||||
- name: Build isledecomp library
|
||||
- name: Install python packages
|
||||
shell: bash
|
||||
run: |
|
||||
pip install tools/isledecomp
|
||||
pip install -r tools/requirements.txt
|
||||
|
||||
- name: Summarize Accuracy
|
||||
shell: bash
|
||||
run: |
|
||||
pip install -r tools/reccmp/requirements.txt
|
||||
python3 tools/reccmp/reccmp.py -S ISLEPROGRESS.SVG --svg-icon tools/reccmp/isle.png -H ISLEPROGRESS.HTML legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB . | tee ISLEPROGRESS.TXT
|
||||
python3 tools/reccmp/reccmp.py -S LEGO1PROGRESS.SVG -T 1929 --svg-icon tools/reccmp/lego1.png -H LEGO1PROGRESS.HTML legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB . | tee LEGO1PROGRESS.TXT
|
||||
|
||||
|
17
.github/workflows/format.yml
vendored
17
.github/workflows/format.yml
vendored
@ -20,3 +20,20 @@ jobs:
|
||||
LEGO1/realtime/*.cpp LEGO1/realtime/*.h \
|
||||
LEGO1/tgl/*.h \
|
||||
LEGO1/viewmanager/*.cpp LEGO1/viewmanager/*.h
|
||||
|
||||
python-format:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Install python libraries
|
||||
shell: bash
|
||||
run: |
|
||||
pip install black pylint pytest -r tools/requirements.txt
|
||||
|
||||
- name: Run pylint and black
|
||||
shell: bash
|
||||
run: |
|
||||
pylint tools --ignore=build
|
||||
black --check tools
|
||||
|
5
.github/workflows/order.yml
vendored
5
.github/workflows/order.yml
vendored
@ -9,12 +9,11 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Build isledecomp library
|
||||
- name: Install python libraries
|
||||
run: |
|
||||
pip install tools/isledecomp
|
||||
pip install -r tools/requirements.txt
|
||||
|
||||
- name: Run checkorder.py
|
||||
run: |
|
||||
pip install -r tools/checkorder/requirements.txt
|
||||
python3 tools/checkorder/checkorder.py --verbose --enforce ISLE
|
||||
python3 tools/checkorder/checkorder.py --verbose --enforce LEGO1
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -16,6 +16,6 @@ ISLE.EXE
|
||||
LEGO1.DLL
|
||||
build/
|
||||
*.swp
|
||||
LEGO1PROGRESS.HTML
|
||||
LEGO1PROGRESS.SVG
|
||||
LEGO1PROGRESS.*
|
||||
ISLEPROGRESS.*
|
||||
*.pyc
|
||||
|
635
.pylintrc
Normal file
635
.pylintrc
Normal file
@ -0,0 +1,635 @@
|
||||
[MAIN]
|
||||
|
||||
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
||||
# 3 compatible code, which means that the block might have code that exists
|
||||
# only in one or another interpreter, leading to false positives when analysed.
|
||||
analyse-fallback-blocks=no
|
||||
|
||||
# Clear in-memory caches upon conclusion of linting. Useful if running pylint
|
||||
# in a server-like mode.
|
||||
clear-cache-post-run=no
|
||||
|
||||
# Load and enable all available extensions. Use --list-extensions to see a list
|
||||
# all available extensions.
|
||||
#enable-all-extensions=
|
||||
|
||||
# In error mode, messages with a category besides ERROR or FATAL are
|
||||
# suppressed, and no reports are done by default. Error mode is compatible with
|
||||
# disabling specific errors.
|
||||
#errors-only=
|
||||
|
||||
# Always return a 0 (non-error) status code, even if lint errors are found.
|
||||
# This is primarily useful in continuous integration scripts.
|
||||
#exit-zero=
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code.
|
||||
extension-pkg-allow-list=
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
|
||||
# for backward compatibility.)
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Return non-zero exit code if any of these messages/categories are detected,
|
||||
# even if score is above --fail-under value. Syntax same as enable. Messages
|
||||
# specified are enabled, while categories only check already-enabled messages.
|
||||
fail-on=
|
||||
|
||||
# Specify a score threshold under which the program will exit with error.
|
||||
fail-under=10
|
||||
|
||||
# Interpret the stdin as a python script, whose filename needs to be passed as
|
||||
# the module_or_package argument.
|
||||
#from-stdin=
|
||||
|
||||
# Files or directories to be skipped. They should be base names, not paths.
|
||||
ignore=CVS
|
||||
|
||||
# Add files or directories matching the regular expressions patterns to the
|
||||
# ignore-list. The regex matches against paths and can be in Posix or Windows
|
||||
# format. Because '\\' represents the directory delimiter on Windows systems,
|
||||
# it can't be used as an escape character.
|
||||
ignore-paths=
|
||||
|
||||
# Files or directories matching the regular expression patterns are skipped.
|
||||
# The regex matches against base names, not paths. The default value ignores
|
||||
# Emacs file locks
|
||||
ignore-patterns=^\.#
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis). It
|
||||
# supports qualified module names, as well as Unix pattern matching.
|
||||
ignored-modules=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
|
||||
# number of processors available to use, and will cap the count on Windows to
|
||||
# avoid hangs.
|
||||
jobs=1
|
||||
|
||||
# Control the amount of potential inferred values when inferring a single
|
||||
# object. This can help the performance when dealing with large functions or
|
||||
# complex, nested conditions.
|
||||
limit-inference-results=100
|
||||
|
||||
# List of plugins (as comma separated values of python module names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# Minimum Python version to use for version dependent checks. Will default to
|
||||
# the version used to run pylint.
|
||||
py-version=3.11
|
||||
|
||||
# Discover python modules and packages in the file system subtree.
|
||||
recursive=no
|
||||
|
||||
# Add paths to the list of the source roots. Supports globbing patterns. The
|
||||
# source root is an absolute path or a path relative to the current working
|
||||
# directory used to determine a package namespace for modules located under the
|
||||
# source root.
|
||||
source-roots=
|
||||
|
||||
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||
# user-friendly hints instead of false-positive error messages.
|
||||
suggestion-mode=yes
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
# In verbose mode, extra non-checker-related info will be displayed.
|
||||
#verbose=
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Naming style matching correct argument names.
|
||||
argument-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct argument names. Overrides argument-
|
||||
# naming-style. If left empty, argument names will be checked with the set
|
||||
# naming style.
|
||||
#argument-rgx=
|
||||
|
||||
# Naming style matching correct attribute names.
|
||||
attr-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct attribute names. Overrides attr-naming-
|
||||
# style. If left empty, attribute names will be checked with the set naming
|
||||
# style.
|
||||
#attr-rgx=
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma.
|
||||
bad-names=foo,
|
||||
bar,
|
||||
baz,
|
||||
toto,
|
||||
tutu,
|
||||
tata
|
||||
|
||||
# Bad variable names regexes, separated by a comma. If names match any regex,
|
||||
# they will always be refused
|
||||
bad-names-rgxs=
|
||||
|
||||
# Naming style matching correct class attribute names.
|
||||
class-attribute-naming-style=any
|
||||
|
||||
# Regular expression matching correct class attribute names. Overrides class-
|
||||
# attribute-naming-style. If left empty, class attribute names will be checked
|
||||
# with the set naming style.
|
||||
#class-attribute-rgx=
|
||||
|
||||
# Naming style matching correct class constant names.
|
||||
class-const-naming-style=UPPER_CASE
|
||||
|
||||
# Regular expression matching correct class constant names. Overrides class-
|
||||
# const-naming-style. If left empty, class constant names will be checked with
|
||||
# the set naming style.
|
||||
#class-const-rgx=
|
||||
|
||||
# Naming style matching correct class names.
|
||||
class-naming-style=PascalCase
|
||||
|
||||
# Regular expression matching correct class names. Overrides class-naming-
|
||||
# style. If left empty, class names will be checked with the set naming style.
|
||||
#class-rgx=
|
||||
|
||||
# Naming style matching correct constant names.
|
||||
const-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct constant names. Overrides const-naming-
|
||||
# style. If left empty, constant names will be checked with the set naming
|
||||
# style.
|
||||
#const-rgx=
|
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter
|
||||
# ones are exempt.
|
||||
docstring-min-length=-1
|
||||
|
||||
# Naming style matching correct function names.
|
||||
function-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct function names. Overrides function-
|
||||
# naming-style. If left empty, function names will be checked with the set
|
||||
# naming style.
|
||||
#function-rgx=
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma.
|
||||
good-names=i,
|
||||
j,
|
||||
k,
|
||||
ex,
|
||||
Run,
|
||||
_
|
||||
|
||||
# Good variable names regexes, separated by a comma. If names match any regex,
|
||||
# they will always be accepted
|
||||
good-names-rgxs=
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name.
|
||||
include-naming-hint=no
|
||||
|
||||
# Naming style matching correct inline iteration names.
|
||||
inlinevar-naming-style=any
|
||||
|
||||
# Regular expression matching correct inline iteration names. Overrides
|
||||
# inlinevar-naming-style. If left empty, inline iteration names will be checked
|
||||
# with the set naming style.
|
||||
#inlinevar-rgx=
|
||||
|
||||
# Naming style matching correct method names.
|
||||
method-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct method names. Overrides method-naming-
|
||||
# style. If left empty, method names will be checked with the set naming style.
|
||||
#method-rgx=
|
||||
|
||||
# Naming style matching correct module names.
|
||||
module-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct module names. Overrides module-naming-
|
||||
# style. If left empty, module names will be checked with the set naming style.
|
||||
#module-rgx=
|
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when
|
||||
# the name regexes allow several styles.
|
||||
name-group=
|
||||
|
||||
# Regular expression which should only match function or class names that do
|
||||
# not require a docstring.
|
||||
no-docstring-rgx=^_
|
||||
|
||||
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
||||
# to this list to register other decorators that produce valid properties.
|
||||
# These decorators are taken in consideration only for invalid-name.
|
||||
property-classes=abc.abstractproperty
|
||||
|
||||
# Regular expression matching correct type alias names. If left empty, type
|
||||
# alias names will be checked with the set naming style.
|
||||
#typealias-rgx=
|
||||
|
||||
# Regular expression matching correct type variable names. If left empty, type
|
||||
# variable names will be checked with the set naming style.
|
||||
#typevar-rgx=
|
||||
|
||||
# Naming style matching correct variable names.
|
||||
variable-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct variable names. Overrides variable-
|
||||
# naming-style. If left empty, variable names will be checked with the set
|
||||
# naming style.
|
||||
#variable-rgx=
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# Warn about protected attribute access inside special methods
|
||||
check-protected-access-in-special-methods=no
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,
|
||||
__new__,
|
||||
setUp,
|
||||
asyncSetUp,
|
||||
__post_init__
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
# List of valid names for the first argument in a metaclass class method.
|
||||
valid-metaclass-classmethod-first-arg=mcs
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# List of regular expressions of class ancestor names to ignore when counting
|
||||
# public methods (see R0903)
|
||||
exclude-too-few-public-methods=
|
||||
|
||||
# List of qualified class names to ignore when counting class parents (see
|
||||
# R0901)
|
||||
ignored-parents=
|
||||
|
||||
# Maximum number of arguments for function / method.
|
||||
max-args=6
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Maximum number of boolean expressions in an if statement (see R0916).
|
||||
max-bool-expr=5
|
||||
|
||||
# Maximum number of branch for function / method body.
|
||||
max-branches=30
|
||||
|
||||
# Maximum number of locals for function / method body.
|
||||
max-locals=30
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
# Maximum number of return / yield for function / method body.
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of statements in function / method body.
|
||||
max-statements=75
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=0
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when caught.
|
||||
overgeneral-exceptions=builtins.BaseException,builtins.Exception
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||
expected-line-ending-format=
|
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit.
|
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line.
|
||||
indent-after-paren=2
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=200
|
||||
|
||||
# Maximum number of lines in a module.
|
||||
max-module-lines=1000
|
||||
|
||||
# Allow the body of a class to be on the same line as the declaration if body
|
||||
# contains single statement.
|
||||
single-line-class-stmt=no
|
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no
|
||||
# else.
|
||||
single-line-if-stmt=no
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# List of modules that can be imported at any level, not just the top level
|
||||
# one.
|
||||
allow-any-import-level=
|
||||
|
||||
# Allow explicit reexports by alias from a package __init__.
|
||||
allow-reexport-from-package=no
|
||||
|
||||
# Allow wildcard imports from modules that define __all__.
|
||||
allow-wildcard-with-all=no
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma.
|
||||
deprecated-modules=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of external dependencies
|
||||
# to the given file (report RP0402 must not be disabled).
|
||||
ext-import-graph=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of all (i.e. internal and
|
||||
# external) dependencies to the given file (report RP0402 must not be
|
||||
# disabled).
|
||||
import-graph=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of internal dependencies
|
||||
# to the given file (report RP0402 must not be disabled).
|
||||
int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
# compatibility libraries.
|
||||
known-standard-library=
|
||||
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=enchant
|
||||
|
||||
# Couples of modules and preferred modules, separated by a comma.
|
||||
preferred-modules=
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# The type of string formatting that logging methods do. `old` means using %
|
||||
# formatting, `new` is for `{}` formatting.
|
||||
logging-format-style=old
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
# function parameter format.
|
||||
logging-modules=logging
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||
# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
|
||||
# UNDEFINED.
|
||||
confidence=HIGH,
|
||||
CONTROL_FLOW,
|
||||
INFERENCE,
|
||||
INFERENCE_FAILURE,
|
||||
UNDEFINED
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once). You can also use "--disable=all" to
|
||||
# disable everything first and then re-enable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||
# --disable=W".
|
||||
disable=raw-checker-failed,
|
||||
bad-inline-option,
|
||||
locally-disabled,
|
||||
file-ignored,
|
||||
suppressed-message,
|
||||
useless-suppression,
|
||||
deprecated-pragma,
|
||||
use-symbolic-message-instead,
|
||||
missing-class-docstring,
|
||||
missing-function-docstring,
|
||||
missing-module-docstring,
|
||||
fixme
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once). See also the "--disable" option for examples.
|
||||
enable=c-extension-no-member
|
||||
|
||||
|
||||
[METHOD_ARGS]
|
||||
|
||||
# List of qualified names (i.e., library.method) which require a timeout
|
||||
# parameter e.g. 'requests.api.get,requests.api.post'
|
||||
timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,
|
||||
XXX,
|
||||
TODO
|
||||
|
||||
# Regular expression of note tags to take in consideration.
|
||||
notes-rgx=
|
||||
|
||||
|
||||
[REFACTORING]
|
||||
|
||||
# Maximum number of nested blocks for function / method body
|
||||
max-nested-blocks=5
|
||||
|
||||
# Complete name of functions that never returns. When checking for
|
||||
# inconsistent-return-statements if a never returning function is called then
|
||||
# it will be considered as an explicit return statement and no message will be
|
||||
# printed.
|
||||
never-returning-functions=sys.exit,argparse.parse_error
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Python expression which should return a score less than or equal to 10. You
|
||||
# have access to the variables 'fatal', 'error', 'warning', 'refactor',
|
||||
# 'convention', and 'info' which contain the number of messages in each
|
||||
# category, as well as 'statement' which is the total number of statements
|
||||
# analyzed. This score is used by the global evaluation report (RP0004).
|
||||
evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
# used to format the message information. See doc for all details.
|
||||
msg-template=
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, json
|
||||
# and msvs (visual studio). You can also give a reporter class, e.g.
|
||||
# mypackage.mymodule.MyReporterClass.
|
||||
#output-format=
|
||||
|
||||
# Tells whether to display a full report or only the messages.
|
||||
reports=no
|
||||
|
||||
# Activate the evaluation score.
|
||||
score=yes
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Comments are removed from the similarity computation
|
||||
ignore-comments=yes
|
||||
|
||||
# Docstrings are removed from the similarity computation
|
||||
ignore-docstrings=yes
|
||||
|
||||
# Imports are removed from the similarity computation
|
||||
ignore-imports=yes
|
||||
|
||||
# Signatures are removed from the similarity computation
|
||||
ignore-signatures=yes
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
|
||||
[SPELLING]
|
||||
|
||||
# Limits count of emitted suggestions for spelling mistakes.
|
||||
max-spelling-suggestions=4
|
||||
|
||||
# Spelling dictionary name. No available dictionaries : You need to install
|
||||
# both the python package and the system dependency for enchant to work..
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should be considered directives if they
|
||||
# appear at the beginning of a comment and should not be checked.
|
||||
spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains the private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to the private dictionary (see the
|
||||
# --spelling-private-dict-file option) instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
[STRING]
|
||||
|
||||
# This flag controls whether inconsistent-quotes generates a warning when the
|
||||
# character used as a quote delimiter is used inconsistently within a module.
|
||||
check-quote-consistency=no
|
||||
|
||||
# This flag controls whether the implicit-str-concat should generate a warning
|
||||
# on implicit string concatenation in sequences defined over several lines.
|
||||
check-str-concat-over-line-jumps=no
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# List of decorators that produce context managers, such as
|
||||
# contextlib.contextmanager. Add to this list to register other decorators that
|
||||
# produce valid context managers.
|
||||
contextmanager-decorators=contextlib.contextmanager
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=
|
||||
|
||||
# Tells whether to warn about missing members when the owner of the attribute
|
||||
# is inferred to be None.
|
||||
ignore-none=yes
|
||||
|
||||
# This flag controls whether pylint should warn about no-member and similar
|
||||
# checks whenever an opaque object is returned when inferring. The inference
|
||||
# can return multiple potential results while evaluating a Python object, but
|
||||
# some branches might not be evaluated, which results in partial inference. In
|
||||
# that case, it might be useful to still emit no-member and other checks for
|
||||
# the rest of the inferred objects.
|
||||
ignore-on-opaque-inference=yes
|
||||
|
||||
# List of symbolic message names to ignore for Mixin members.
|
||||
ignored-checks-for-mixins=no-member,
|
||||
not-async-context-manager,
|
||||
not-context-manager,
|
||||
attribute-defined-outside-init
|
||||
|
||||
# List of class names for which member attributes should not be checked (useful
|
||||
# for classes with dynamically set attributes). This supports the use of
|
||||
# qualified names.
|
||||
ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
|
||||
|
||||
# Show a hint with possible names when a member name was not found. The aspect
|
||||
# of finding the hint is based on edit distance.
|
||||
missing-member-hint=yes
|
||||
|
||||
# The minimum edit distance a name should have in order to be considered a
|
||||
# similar match for a missing member name.
|
||||
missing-member-hint-distance=1
|
||||
|
||||
# The total number of similar names that should be taken in consideration when
|
||||
# showing a hint for a missing member.
|
||||
missing-member-max-choices=1
|
||||
|
||||
# Regex pattern to define which classes are considered mixins.
|
||||
mixin-class-rgx=.*[Mm]ixin
|
||||
|
||||
# List of decorators that change the signature of a decorated function.
|
||||
signature-mutators=
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid defining new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
# Tells whether unused global variables should be treated as a violation.
|
||||
allow-global-unused-variables=yes
|
||||
|
||||
# List of names allowed to shadow builtins
|
||||
allowed-redefined-builtins=
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,
|
||||
_cb
|
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expected to
|
||||
# not be used).
|
||||
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
|
||||
|
||||
# Argument names that match this expression will be ignored.
|
||||
ignored-argument-names=_.*|^ignored_|^unused_
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# List of qualified module names which can have objects that can redefine
|
||||
# builtins.
|
||||
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
|
2
pyproject.toml
Normal file
2
pyproject.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[flake8]
|
||||
max-line-length = 120
|
50
tools/README.md
Normal file
50
tools/README.md
Normal file
@ -0,0 +1,50 @@
|
||||
# LEGO Island Decompilation Tools
|
||||
|
||||
These are a set of Python tools for helping with the decomp project
|
||||
|
||||
## Installing
|
||||
Use pip to install the required packages:
|
||||
|
||||
```
|
||||
pip install -r tools/requirements.txt
|
||||
```
|
||||
|
||||
## reccmp
|
||||
This is a script to compare the original EXE or DLL with a recpmpiled EXE or DLL, provided a .PDB file
|
||||
|
||||
## verexp
|
||||
This verifies exports by comparing the exports of an original DLL and the recompiled DLL
|
||||
|
||||
## checkorder
|
||||
This checks the order of C++ source and header files to make sure the functions are in order
|
||||
|
||||
## isledecomp
|
||||
This is a library that is used by rhe above scripts. it has a collection of useful classes and functions
|
||||
|
||||
### Testing
|
||||
`isledecomp` has a small suite of tests. Install pylint and run it, passing in the directory:
|
||||
|
||||
```
|
||||
pip install pytest
|
||||
pytest tools/isledecomp/tests/
|
||||
```
|
||||
|
||||
## Development
|
||||
In order to keep the code clean and consistent, we use `pylint` and `black`:
|
||||
|
||||
```
|
||||
pip install black pylint
|
||||
```
|
||||
### To run pylint (ignores build and virtualenv):
|
||||
```
|
||||
pylint tools/ --ignore=build,bin,lib
|
||||
```
|
||||
|
||||
### To check code formatting without rewriting files:
|
||||
```
|
||||
black --check tools/
|
||||
```
|
||||
### To apply code formatting:
|
||||
```
|
||||
black tools/
|
||||
```
|
@ -1,14 +1,9 @@
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from isledecomp.dir import (
|
||||
walk_source_dir,
|
||||
is_file_cpp
|
||||
)
|
||||
from isledecomp.dir import walk_source_dir, is_file_cpp
|
||||
from isledecomp.parser import find_code_blocks
|
||||
from isledecomp.parser.util import (
|
||||
is_exact_offset_comment
|
||||
)
|
||||
from isledecomp.parser.util import is_exact_offset_comment
|
||||
|
||||
|
||||
def sig_truncate(sig: str) -> str:
|
||||
@ -21,12 +16,14 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||
"""Open and read the given file, then check whether the code blocks
|
||||
are in order. If verbose, print each block."""
|
||||
|
||||
with open(filename, 'r') as f:
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
code_blocks = find_code_blocks(f)
|
||||
|
||||
bad_comments = [(block.start_line, block.offset_comment)
|
||||
bad_comments = [
|
||||
(block.start_line, block.offset_comment)
|
||||
for block in code_blocks
|
||||
if not is_exact_offset_comment(block.offset_comment)]
|
||||
if not is_exact_offset_comment(block.offset_comment)
|
||||
]
|
||||
|
||||
just_offsets = [block.offset for block in code_blocks]
|
||||
sorted_offsets = sorted(just_offsets)
|
||||
@ -35,8 +32,7 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||
# If we detect inexact comments, don't print anything unless we are
|
||||
# in verbose mode. If the file is out of order, we always print the
|
||||
# file name.
|
||||
should_report = ((len(bad_comments) > 0 and verbose)
|
||||
or file_out_of_order)
|
||||
should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order
|
||||
|
||||
if not should_report and not file_out_of_order:
|
||||
return False
|
||||
@ -49,19 +45,21 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||
prev_offset = 0
|
||||
|
||||
for block in code_blocks:
|
||||
msg = ' '.join([
|
||||
' ' if block.offset > prev_offset else '!',
|
||||
f'{block.offset:08x}',
|
||||
f'{block.end_line - block.start_line:4} lines',
|
||||
f'{order_lookup[block.offset]:3}',
|
||||
' ',
|
||||
msg = " ".join(
|
||||
[
|
||||
" " if block.offset > prev_offset else "!",
|
||||
f"{block.offset:08x}",
|
||||
f"{block.end_line - block.start_line:4} lines",
|
||||
f"{order_lookup[block.offset]:3}",
|
||||
" ",
|
||||
sig_truncate(block.signature),
|
||||
])
|
||||
]
|
||||
)
|
||||
print(msg)
|
||||
prev_offset = block.offset
|
||||
|
||||
for (line_no, line) in bad_comments:
|
||||
print(f'* line {line_no:3} bad offset comment ({line})')
|
||||
for line_no, line in bad_comments:
|
||||
print(f"* line {line_no:3} bad offset comment ({line})")
|
||||
|
||||
print()
|
||||
|
||||
@ -69,15 +67,25 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||
|
||||
|
||||
def parse_args(test_args: list | None = None) -> dict:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument('target', help='The file or directory to check.')
|
||||
p.add_argument('--enforce', action=argparse.BooleanOptionalAction,
|
||||
p = argparse.ArgumentParser(
|
||||
description="Checks the source files to make sure the function offset comments are in order",
|
||||
)
|
||||
p.add_argument("target", help="The file or directory to check.")
|
||||
p.add_argument(
|
||||
"--enforce",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=False,
|
||||
help='Fail with error code if target is out of order.')
|
||||
p.add_argument('--verbose', action=argparse.BooleanOptionalAction,
|
||||
help="Fail with error code if target is out of order.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--verbose",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=False,
|
||||
help=('Display each code block in the file and show '
|
||||
'where each consecutive run of blocks is broken.'))
|
||||
help=(
|
||||
"Display each code block in the file and show "
|
||||
"where each consecutive run of blocks is broken."
|
||||
),
|
||||
)
|
||||
|
||||
if test_args is None:
|
||||
args = p.parse_args()
|
||||
@ -90,31 +98,33 @@ def parse_args(test_args: list | None = None) -> dict:
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
if os.path.isdir(args['target']):
|
||||
files_to_check = list(walk_source_dir(args['target']))
|
||||
elif os.path.isfile(args['target']) and is_file_cpp(args['target']):
|
||||
files_to_check = [args['target']]
|
||||
if os.path.isdir(args["target"]):
|
||||
files_to_check = list(walk_source_dir(args["target"]))
|
||||
elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]):
|
||||
files_to_check = [args["target"]]
|
||||
else:
|
||||
sys.exit('Invalid target')
|
||||
sys.exit("Invalid target")
|
||||
|
||||
files_out_of_order = 0
|
||||
|
||||
for file in files_to_check:
|
||||
is_jumbled = check_file(file, args['verbose'])
|
||||
is_jumbled = check_file(file, args["verbose"])
|
||||
if is_jumbled:
|
||||
files_out_of_order += 1
|
||||
|
||||
if files_out_of_order > 0:
|
||||
error_message = ' '.join([
|
||||
error_message = " ".join(
|
||||
[
|
||||
str(files_out_of_order),
|
||||
'files are' if files_out_of_order > 1 else 'file is',
|
||||
'out of order'
|
||||
])
|
||||
"files are" if files_out_of_order > 1 else "file is",
|
||||
"out of order",
|
||||
]
|
||||
)
|
||||
print(error_message)
|
||||
|
||||
if files_out_of_order > 0 and args['enforce']:
|
||||
if files_out_of_order > 0 and args["enforce"]:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1 +0,0 @@
|
||||
isledecomp
|
@ -0,0 +1,5 @@
|
||||
from .bin import *
|
||||
from .dir import *
|
||||
from .parser import *
|
||||
from .syminfo import *
|
||||
from .utils import *
|
47
tools/isledecomp/isledecomp/bin.py
Normal file
47
tools/isledecomp/isledecomp/bin.py
Normal file
@ -0,0 +1,47 @@
|
||||
import struct
|
||||
|
||||
|
||||
# Declare a class that can automatically convert virtual executable addresses
|
||||
# to file addresses
|
||||
class Bin:
|
||||
def __init__(self, filename, logger):
|
||||
self.logger = logger
|
||||
self.logger.debug('Parsing headers of "%s"... ', filename)
|
||||
self.filename = filename
|
||||
self.file = None
|
||||
self.imagebase = None
|
||||
self.textvirt = None
|
||||
self.textraw = None
|
||||
|
||||
def __enter__(self):
|
||||
self.logger.debug(f"Bin {self.filename} Enter")
|
||||
self.file = open(self.filename, "rb")
|
||||
|
||||
# HACK: Strictly, we should be parsing the header, but we know where
|
||||
# everything is in these two files so we just jump straight there
|
||||
|
||||
# Read ImageBase
|
||||
self.file.seek(0xB4)
|
||||
(self.imagebase,) = struct.unpack("<i", self.file.read(4))
|
||||
|
||||
# Read .text VirtualAddress
|
||||
self.file.seek(0x184)
|
||||
(self.textvirt,) = struct.unpack("<i", self.file.read(4))
|
||||
|
||||
# Read .text PointerToRawData
|
||||
self.file.seek(0x18C)
|
||||
(self.textraw,) = struct.unpack("<i", self.file.read(4))
|
||||
self.logger.debug("... Parsing finished")
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||
self.logger.debug(f"Bin {self.filename} Exit")
|
||||
if self.file:
|
||||
self.file.close()
|
||||
|
||||
def get_addr(self, virt):
|
||||
return virt - self.imagebase - self.textvirt + self.textraw
|
||||
|
||||
def read(self, offset, size):
|
||||
self.file.seek(self.get_addr(offset))
|
||||
return self.file.read(size)
|
@ -1,10 +1,48 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Iterator
|
||||
|
||||
|
||||
class WinePathConverter:
|
||||
def __init__(self, unix_cwd):
|
||||
self.unix_cwd = unix_cwd
|
||||
self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
|
||||
|
||||
def get_wine_path(self, unix_fn: str) -> str:
|
||||
if unix_fn.startswith("./"):
|
||||
return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\")
|
||||
if unix_fn.startswith(self.unix_cwd):
|
||||
return (
|
||||
self.win_cwd
|
||||
+ "\\"
|
||||
+ unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\")
|
||||
)
|
||||
return self._call_winepath_unix2win(unix_fn)
|
||||
|
||||
def get_unix_path(self, win_fn: str) -> str:
|
||||
if win_fn.startswith(".\\") or win_fn.startswith("./"):
|
||||
return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/")
|
||||
if win_fn.startswith(self.win_cwd):
|
||||
return (
|
||||
self.unix_cwd
|
||||
+ "/"
|
||||
+ win_fn.removeprefix(self.win_cwd).replace("\\", "/")
|
||||
)
|
||||
return self._call_winepath_win2unix(win_fn)
|
||||
|
||||
@staticmethod
|
||||
def _call_winepath_unix2win(fn: str) -> str:
|
||||
return subprocess.check_output(["winepath", "-w", fn], text=True).strip()
|
||||
|
||||
@staticmethod
|
||||
def _call_winepath_win2unix(fn: str) -> str:
|
||||
return subprocess.check_output(["winepath", fn], text=True).strip()
|
||||
|
||||
|
||||
def is_file_cpp(filename: str) -> bool:
|
||||
(basefile, ext) = os.path.splitext(filename)
|
||||
return ext.lower() in ('.h', '.cpp')
|
||||
(_, ext) = os.path.splitext(filename)
|
||||
return ext.lower() in (".h", ".cpp")
|
||||
|
||||
|
||||
def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
|
||||
@ -12,10 +50,14 @@ def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
|
||||
any C++ files found."""
|
||||
|
||||
source = os.path.abspath(source)
|
||||
for subdir, dirs, files in os.walk(source):
|
||||
for subdir, _, files in os.walk(source):
|
||||
for file in files:
|
||||
if is_file_cpp(file):
|
||||
yield os.path.join(subdir, file)
|
||||
|
||||
if not recursive:
|
||||
break
|
||||
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
|
@ -7,7 +7,6 @@ from .util import (
|
||||
OffsetMatch,
|
||||
is_blank_or_comment,
|
||||
match_offset_comment,
|
||||
is_exact_offset_comment,
|
||||
get_template_function_name,
|
||||
remove_trailing_comment,
|
||||
distinct_by_module,
|
||||
@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
# Our list of offset marks could have duplicates on
|
||||
# module name, so we'll eliminate those now.
|
||||
for offset_match in distinct_by_module(offset_matches):
|
||||
block = CodeBlock(offset=offset_match.address,
|
||||
block = CodeBlock(
|
||||
offset=offset_match.address,
|
||||
signature=function_sig,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
offset_comment=offset_match.comment,
|
||||
module=offset_match.module,
|
||||
is_template=offset_match.is_template,
|
||||
is_stub=offset_match.is_stub)
|
||||
is_stub=offset_match.is_stub,
|
||||
)
|
||||
blocks.append(block)
|
||||
offset_matches = []
|
||||
state = ReaderState.WANT_OFFSET
|
||||
@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
if can_seek:
|
||||
line_no += 1
|
||||
line = stream.readline()
|
||||
if line == '':
|
||||
if line == "":
|
||||
break
|
||||
|
||||
new_match = match_offset_comment(line)
|
||||
if new_match is not None:
|
||||
# We will allow multiple offsets if we have just begun
|
||||
# the code block, but not after we hit the curly brace.
|
||||
if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
|
||||
ReaderState.WANT_SIG):
|
||||
if state in (
|
||||
ReaderState.WANT_OFFSET,
|
||||
ReaderState.IN_TEMPLATE,
|
||||
ReaderState.WANT_SIG,
|
||||
):
|
||||
# If we detected an offset marker unexpectedly,
|
||||
# we are handling it here so we can continue seeking.
|
||||
can_seek = True
|
||||
@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
# same line. clang-format should prevent this (BraceWrapping)
|
||||
# but it is easy to detect.
|
||||
# If the entire function is on one line, handle that too.
|
||||
if function_sig.endswith('{'):
|
||||
if function_sig.endswith("{"):
|
||||
start_line = line_no
|
||||
state = ReaderState.IN_FUNC
|
||||
elif (function_sig.endswith('}') or
|
||||
function_sig.endswith('};')):
|
||||
elif function_sig.endswith("}") or function_sig.endswith("};"):
|
||||
start_line = line_no
|
||||
end_line = line_no
|
||||
state = ReaderState.FUNCTION_DONE
|
||||
@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
state = ReaderState.WANT_CURLY
|
||||
|
||||
elif state == ReaderState.WANT_CURLY:
|
||||
if line.strip() == '{':
|
||||
if line.strip() == "{":
|
||||
start_line = line_no
|
||||
state = ReaderState.IN_FUNC
|
||||
|
||||
elif state == ReaderState.IN_FUNC:
|
||||
# Naive but reasonable assumption that functions will end with
|
||||
# a curly brace on its own line with no prepended spaces.
|
||||
if line.startswith('}'):
|
||||
if line.startswith("}"):
|
||||
end_line = line_no
|
||||
state = ReaderState.FUNCTION_DONE
|
||||
|
||||
|
@ -5,34 +5,49 @@ from typing import List
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
CodeBlock = namedtuple('CodeBlock',
|
||||
['offset', 'signature', 'start_line', 'end_line',
|
||||
'offset_comment', 'module', 'is_template', 'is_stub'])
|
||||
CodeBlock = namedtuple(
|
||||
"CodeBlock",
|
||||
[
|
||||
"offset",
|
||||
"signature",
|
||||
"start_line",
|
||||
"end_line",
|
||||
"offset_comment",
|
||||
"module",
|
||||
"is_template",
|
||||
"is_stub",
|
||||
],
|
||||
)
|
||||
|
||||
OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
|
||||
'is_stub', 'comment'])
|
||||
OffsetMatch = namedtuple(
|
||||
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
|
||||
)
|
||||
|
||||
# This has not been formally established, but considering that "STUB"
|
||||
# is a temporary state for a function, we assume it will appear last,
|
||||
# after any other modifiers (i.e. TEMPLATE)
|
||||
|
||||
# To match a reasonable variance of formatting for the offset comment
|
||||
offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?', # nopep8
|
||||
flags=re.I)
|
||||
offsetCommentRegex = re.compile(
|
||||
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
|
||||
flags=re.I,
|
||||
)
|
||||
|
||||
# To match the exact syntax (text upper case, hex lower case, with spaces)
|
||||
# that is used in most places
|
||||
offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$') # nopep8
|
||||
offsetCommentExactRegex = re.compile(
|
||||
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
|
||||
) # nopep8
|
||||
|
||||
|
||||
# The goal here is to just read whatever is on the next line, so some
|
||||
# flexibility in the formatting seems OK
|
||||
templateCommentRegex = re.compile(r'\s*//\s+(.*)')
|
||||
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||
|
||||
|
||||
# To remove any comment (//) or block comment (/*) and its leading spaces
|
||||
# from the end of a code line
|
||||
trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
|
||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
|
||||
def get_template_function_name(line: str) -> str:
|
||||
@ -47,7 +62,7 @@ def get_template_function_name(line: str) -> str:
|
||||
|
||||
|
||||
def remove_trailing_comment(line: str) -> str:
|
||||
return trailingCommentRegex.sub('', line)
|
||||
return trailingCommentRegex.sub("", line)
|
||||
|
||||
|
||||
def is_blank_or_comment(line: str) -> bool:
|
||||
@ -55,10 +70,12 @@ def is_blank_or_comment(line: str) -> bool:
|
||||
There could be blank lines or other comments before the
|
||||
function signature, and we want to skip those."""
|
||||
line_strip = line.strip()
|
||||
return (len(line_strip) == 0
|
||||
or line_strip.startswith('//')
|
||||
or line_strip.startswith('/*')
|
||||
or line_strip.endswith('*/'))
|
||||
return (
|
||||
len(line_strip) == 0
|
||||
or line_strip.startswith("//")
|
||||
or line_strip.startswith("/*")
|
||||
or line_strip.endswith("*/")
|
||||
)
|
||||
|
||||
|
||||
def is_exact_offset_comment(line: str) -> bool:
|
||||
@ -72,11 +89,13 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
return OffsetMatch(module=match.group(1),
|
||||
return OffsetMatch(
|
||||
module=match.group(1),
|
||||
address=int(match.group(2), 16),
|
||||
is_template=match.group(3) is not None,
|
||||
is_stub=match.group(4) is not None,
|
||||
comment=line.strip())
|
||||
comment=line.strip(),
|
||||
)
|
||||
|
||||
|
||||
def distinct_by_module(offsets: List) -> List:
|
||||
|
138
tools/isledecomp/isledecomp/syminfo.py
Normal file
138
tools/isledecomp/isledecomp/syminfo.py
Normal file
@ -0,0 +1,138 @@
|
||||
import os
|
||||
import subprocess
|
||||
from .utils import get_file_in_script_dir
|
||||
|
||||
|
||||
class RecompiledInfo:
|
||||
addr = None
|
||||
size = None
|
||||
name = None
|
||||
start = None
|
||||
|
||||
|
||||
# Declare a class that parses the output of cvdump for fast access later
|
||||
class SymInfo:
|
||||
funcs = {}
|
||||
lines = {}
|
||||
names = {}
|
||||
|
||||
def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None):
|
||||
self.logger = sym_logger
|
||||
call = [get_file_in_script_dir("cvdump.exe"), "-l", "-s"]
|
||||
|
||||
if sym_wine_path_converter:
|
||||
# Run cvdump through wine and convert path to Windows-friendly wine path
|
||||
call.insert(0, "wine")
|
||||
call.append(sym_wine_path_converter.get_wine_path(pdb))
|
||||
else:
|
||||
call.append(pdb)
|
||||
|
||||
self.logger.info("Parsing %s ...", pdb)
|
||||
self.logger.debug("Command = %s", call)
|
||||
line_dump = subprocess.check_output(call).decode("utf-8").split("\r\n")
|
||||
|
||||
current_section = None
|
||||
|
||||
self.logger.debug("Parsing output of cvdump.exe ...")
|
||||
|
||||
for i, line in enumerate(line_dump):
|
||||
if line.startswith("***"):
|
||||
current_section = line[4:]
|
||||
|
||||
if current_section == "SYMBOLS" and "S_GPROC32" in line:
|
||||
sym_addr = int(line[26:34], 16)
|
||||
|
||||
info = RecompiledInfo()
|
||||
info.addr = (
|
||||
sym_addr + sym_recompfile.imagebase + sym_recompfile.textvirt
|
||||
)
|
||||
|
||||
use_dbg_offs = False
|
||||
if use_dbg_offs:
|
||||
debug_offs = line_dump[i + 2]
|
||||
debug_start = int(debug_offs[22:30], 16)
|
||||
debug_end = int(debug_offs[43:], 16)
|
||||
|
||||
info.start = debug_start
|
||||
info.size = debug_end - debug_start
|
||||
else:
|
||||
info.start = 0
|
||||
info.size = int(line[41:49], 16)
|
||||
|
||||
info.name = line[77:]
|
||||
|
||||
self.names[info.name] = info
|
||||
self.funcs[sym_addr] = info
|
||||
elif (
|
||||
current_section == "LINES"
|
||||
and line.startswith(" ")
|
||||
and not line.startswith(" ")
|
||||
):
|
||||
sourcepath = line.split()[0]
|
||||
|
||||
if sym_wine_path_converter:
|
||||
# Convert filename to Unix path for file compare
|
||||
sourcepath = sym_wine_path_converter.get_unix_path(sourcepath)
|
||||
|
||||
if sourcepath not in self.lines:
|
||||
self.lines[sourcepath] = {}
|
||||
|
||||
j = i + 2
|
||||
while True:
|
||||
ll = line_dump[j].split()
|
||||
if len(ll) == 0:
|
||||
break
|
||||
|
||||
k = 0
|
||||
while k < len(ll):
|
||||
linenum = int(ll[k + 0])
|
||||
address = int(ll[k + 1], 16)
|
||||
if linenum not in self.lines[sourcepath]:
|
||||
self.lines[sourcepath][linenum] = address
|
||||
k += 2
|
||||
|
||||
j += 1
|
||||
|
||||
self.logger.debug("... Parsing output of cvdump.exe finished")
|
||||
|
||||
def get_recompiled_address(self, filename, line):
|
||||
recompiled_addr = None
|
||||
|
||||
self.logger.debug("Looking for %s:%s", filename, line)
|
||||
filename_basename = os.path.basename(filename).lower()
|
||||
|
||||
for fn in self.lines:
|
||||
# Sometimes a PDB is compiled with a relative path while we always have
|
||||
# an absolute path. Therefore we must
|
||||
try:
|
||||
if os.path.basename(
|
||||
fn
|
||||
).lower() == filename_basename and os.path.samefile(fn, filename):
|
||||
filename = fn
|
||||
break
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
if filename in self.lines and line in self.lines[filename]:
|
||||
recompiled_addr = self.lines[filename][line]
|
||||
|
||||
if recompiled_addr in self.funcs:
|
||||
return self.funcs[recompiled_addr]
|
||||
self.logger.error(
|
||||
"Failed to find function symbol with address: %x", recompiled_addr
|
||||
)
|
||||
return None
|
||||
self.logger.error(
|
||||
"Failed to find function symbol with filename and line: %s:%s",
|
||||
filename,
|
||||
line,
|
||||
)
|
||||
return None
|
||||
|
||||
def get_recompiled_address_from_name(self, name):
|
||||
self.logger.debug("Looking for %s", name)
|
||||
|
||||
if name in self.names:
|
||||
return self.names[name]
|
||||
self.logger.error("Failed to find function symbol with name: %s", name)
|
||||
return None
|
42
tools/isledecomp/isledecomp/utils.py
Normal file
42
tools/isledecomp/isledecomp/utils.py
Normal file
@ -0,0 +1,42 @@
|
||||
import os
|
||||
import sys
|
||||
import colorama
|
||||
|
||||
|
||||
def print_diff(udiff, plain):
|
||||
has_diff = False
|
||||
for line in udiff:
|
||||
has_diff = True
|
||||
color = ""
|
||||
if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
|
||||
# Skip unneeded parts of the diff for the brief view
|
||||
continue
|
||||
# Work out color if we are printing color
|
||||
if not plain:
|
||||
if line.startswith("+"):
|
||||
color = colorama.Fore.GREEN
|
||||
elif line.startswith("-"):
|
||||
color = colorama.Fore.RED
|
||||
print(color + line)
|
||||
# Reset color if we're printing in color
|
||||
if not plain:
|
||||
print(colorama.Style.RESET_ALL, end="")
|
||||
return has_diff
|
||||
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
|
||||
|
||||
class OffsetPlaceholderGenerator:
|
||||
def __init__(self):
|
||||
self.counter = 0
|
||||
self.replacements = {}
|
||||
|
||||
def get(self, replace_addr):
|
||||
if replace_addr in self.replacements:
|
||||
return self.replacements[replace_addr]
|
||||
self.counter += 1
|
||||
replacement = f"<OFFSET{self.counter}>"
|
||||
self.replacements[replace_addr] = replacement
|
||||
return replacement
|
@ -1,9 +1,9 @@
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='isledecomp',
|
||||
version='0.1.0',
|
||||
description='Python tools for the isledecomp project',
|
||||
name="isledecomp",
|
||||
version="0.1.0",
|
||||
description="Python tools for the isledecomp project",
|
||||
packages=find_packages(),
|
||||
tests_require=['pytest'],
|
||||
tests_require=["pytest"],
|
||||
)
|
||||
|
@ -1,17 +1,16 @@
|
||||
import os
|
||||
import pytest
|
||||
from typing import List, TextIO
|
||||
from isledecomp.parser import find_code_blocks
|
||||
from isledecomp.parser.util import CodeBlock
|
||||
|
||||
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples')
|
||||
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
|
||||
|
||||
|
||||
def sample_file(filename: str) -> TextIO:
|
||||
"""Wrapper for opening the samples from the directory that does not
|
||||
depend on the cwd where we run the test"""
|
||||
full_path = os.path.join(SAMPLE_DIR, filename)
|
||||
return open(full_path, 'r')
|
||||
return open(full_path, "r", encoding="utf-8")
|
||||
|
||||
|
||||
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
|
||||
@ -25,7 +24,7 @@ def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
|
||||
|
||||
def test_sanity():
|
||||
"""Read a very basic file"""
|
||||
with sample_file('basic_file.cpp') as f:
|
||||
with sample_file("basic_file.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
assert len(blocks) == 3
|
||||
@ -39,7 +38,7 @@ def test_sanity():
|
||||
def test_oneline():
|
||||
"""(Assuming clang-format permits this) This sample has a function
|
||||
on a single line. This will test the end-of-function detection"""
|
||||
with sample_file('oneline_function.cpp') as f:
|
||||
with sample_file("oneline_function.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
assert len(blocks) == 2
|
||||
@ -49,7 +48,7 @@ def test_oneline():
|
||||
|
||||
def test_missing_offset():
|
||||
"""What if the function doesn't have an offset comment?"""
|
||||
with sample_file('missing_offset.cpp') as f:
|
||||
with sample_file("missing_offset.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
# TODO: For now, the function without the offset will just be ignored.
|
||||
@ -62,7 +61,7 @@ def test_jumbled_case():
|
||||
"""The parser just reports what it sees. It is the responsibility of
|
||||
the downstream tools to do something about a jumbled file.
|
||||
Just verify that we are reading it correctly."""
|
||||
with sample_file('out_of_order.cpp') as f:
|
||||
with sample_file("out_of_order.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
assert len(blocks) == 3
|
||||
@ -70,7 +69,7 @@ def test_jumbled_case():
|
||||
|
||||
|
||||
def test_bad_file():
|
||||
with sample_file('poorly_formatted.cpp') as f:
|
||||
with sample_file("poorly_formatted.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
assert len(blocks) == 3
|
||||
@ -78,7 +77,7 @@ def test_bad_file():
|
||||
|
||||
def test_indented():
|
||||
"""Offsets for functions inside of a class will probably be indented."""
|
||||
with sample_file('basic_class.cpp') as f:
|
||||
with sample_file("basic_class.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
# TODO: We don't properly detect the end of these functions
|
||||
@ -87,17 +86,17 @@ def test_indented():
|
||||
# all the functions that are there.
|
||||
|
||||
assert len(blocks) == 2
|
||||
assert blocks[0].offset == int('0x12345678', 16)
|
||||
assert blocks[0].offset == int("0x12345678", 16)
|
||||
assert blocks[0].start_line == 15
|
||||
# assert blocks[0].end_line == 18
|
||||
|
||||
assert blocks[1].offset == int('0xdeadbeef', 16)
|
||||
assert blocks[1].offset == int("0xdeadbeef", 16)
|
||||
assert blocks[1].start_line == 22
|
||||
# assert blocks[1].end_line == 24
|
||||
|
||||
|
||||
def test_inline():
|
||||
with sample_file('inline.cpp') as f:
|
||||
with sample_file("inline.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
assert len(blocks) == 2
|
||||
@ -110,19 +109,19 @@ def test_multiple_offsets():
|
||||
"""If multiple offset marks appear before for a code block, take them
|
||||
all but ensure module name (case-insensitive) is distinct.
|
||||
Use first module occurrence in case of duplicates."""
|
||||
with sample_file('multiple_offsets.cpp') as f:
|
||||
with sample_file("multiple_offsets.cpp") as f:
|
||||
blocks = find_code_blocks(f)
|
||||
|
||||
assert len(blocks) == 4
|
||||
assert blocks[0].module == 'TEST'
|
||||
assert blocks[0].module == "TEST"
|
||||
assert blocks[0].start_line == 9
|
||||
|
||||
assert blocks[1].module == 'HELLO'
|
||||
assert blocks[1].module == "HELLO"
|
||||
assert blocks[1].start_line == 9
|
||||
|
||||
# Duplicate modules are ignored
|
||||
assert blocks[2].start_line == 16
|
||||
assert blocks[2].offset == 0x2345
|
||||
|
||||
assert blocks[3].module == 'TEST'
|
||||
assert blocks[3].module == "TEST"
|
||||
assert blocks[3].offset == 0x2002
|
||||
|
@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
from collections import namedtuple
|
||||
from typing import List
|
||||
import pytest
|
||||
from isledecomp.parser.util import (
|
||||
is_blank_or_comment,
|
||||
match_offset_comment,
|
||||
@ -10,21 +10,20 @@ from isledecomp.parser.util import (
|
||||
|
||||
|
||||
blank_or_comment_param = [
|
||||
(True, ''),
|
||||
(True, '\t'),
|
||||
(True, ' '),
|
||||
(False, '\tint abc=123;'),
|
||||
(True, '// OFFSET: LEGO1 0xdeadbeef'),
|
||||
(True, ' /* Block comment beginning'),
|
||||
(True, 'Block comment ending */ '),
|
||||
|
||||
(True, ""),
|
||||
(True, "\t"),
|
||||
(True, " "),
|
||||
(False, "\tint abc=123;"),
|
||||
(True, "// OFFSET: LEGO1 0xdeadbeef"),
|
||||
(True, " /* Block comment beginning"),
|
||||
(True, "Block comment ending */ "),
|
||||
# TODO: does clang-format have anything to say about these cases?
|
||||
(False, 'x++; // Comment folows'),
|
||||
(False, 'x++; /* Block comment begins'),
|
||||
(False, "x++; // Comment folows"),
|
||||
(False, "x++; /* Block comment begins"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('expected, line', blank_or_comment_param)
|
||||
@pytest.mark.parametrize("expected, line", blank_or_comment_param)
|
||||
def test_is_blank_or_comment(line: str, expected: bool):
|
||||
assert is_blank_or_comment(line) is expected
|
||||
|
||||
@ -32,82 +31,73 @@ def test_is_blank_or_comment(line: str, expected: bool):
|
||||
offset_comment_samples = [
|
||||
# (can_parse: bool, exact_match: bool, line: str)
|
||||
# Should match both expected modules with optional STUB marker
|
||||
(True, True, '// OFFSET: LEGO1 0xdeadbeef'),
|
||||
(True, True, '// OFFSET: LEGO1 0xdeadbeef STUB'),
|
||||
(True, True, '// OFFSET: ISLE 0x12345678'),
|
||||
(True, True, '// OFFSET: ISLE 0x12345678 STUB'),
|
||||
|
||||
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
|
||||
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
|
||||
(True, True, "// OFFSET: ISLE 0x12345678"),
|
||||
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
|
||||
# No trailing spaces allowed
|
||||
(True, False, '// OFFSET: LEGO1 0xdeadbeef '),
|
||||
(True, False, '// OFFSET: LEGO1 0xdeadbeef STUB '),
|
||||
|
||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
|
||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
|
||||
# Must have exactly one space between elements
|
||||
(True, False, '//OFFSET: ISLE 0xdeadbeef'),
|
||||
(True, False, '// OFFSET:ISLE 0xdeadbeef'),
|
||||
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
|
||||
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
|
||||
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
|
||||
(True, False, '// OFFSET: ISLE 0xdeadbeef STUB'),
|
||||
|
||||
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
|
||||
# Must have 0x prefix for hex number
|
||||
(True, False, '// OFFSET: ISLE deadbeef'),
|
||||
|
||||
(True, False, "// OFFSET: ISLE deadbeef"),
|
||||
# Offset, module name, and STUB must be uppercase
|
||||
(True, False, '// offset: ISLE 0xdeadbeef'),
|
||||
(True, False, '// offset: isle 0xdeadbeef'),
|
||||
(True, False, '// OFFSET: LEGO1 0xdeadbeef stub'),
|
||||
|
||||
(True, False, "// offset: ISLE 0xdeadbeef"),
|
||||
(True, False, "// offset: isle 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
|
||||
# Hex string must be lowercase
|
||||
(True, False, '// OFFSET: ISLE 0xDEADBEEF'),
|
||||
|
||||
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
|
||||
# TODO: How flexible should we be with matching the module name?
|
||||
(True, True, '// OFFSET: OMNI 0x12345678'),
|
||||
(True, True, '// OFFSET: LEG01 0x12345678'),
|
||||
(True, False, '// OFFSET: hello 0x12345678'),
|
||||
|
||||
(True, True, "// OFFSET: OMNI 0x12345678"),
|
||||
(True, True, "// OFFSET: LEG01 0x12345678"),
|
||||
(True, False, "// OFFSET: hello 0x12345678"),
|
||||
# Not close enough to match
|
||||
(False, False, '// OFFSET: ISLE0x12345678'),
|
||||
(False, False, '// OFFSET: 0x12345678'),
|
||||
(False, False, '// LEGO1: 0x12345678'),
|
||||
|
||||
(False, False, "// OFFSET: ISLE0x12345678"),
|
||||
(False, False, "// OFFSET: 0x12345678"),
|
||||
(False, False, "// LEGO1: 0x12345678"),
|
||||
# Hex string shorter than 8 characters
|
||||
(True, True, '// OFFSET: LEGO1 0x1234'),
|
||||
|
||||
(True, True, "// OFFSET: LEGO1 0x1234"),
|
||||
# TODO: These match but shouldn't.
|
||||
# (False, False, '// OFFSET: LEGO1 0'),
|
||||
# (False, False, '// OFFSET: LEGO1 0x'),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
|
||||
def test_offset_match(line: str, match: bool, exact):
|
||||
@pytest.mark.parametrize("match, _, line", offset_comment_samples)
|
||||
def test_offset_match(line: str, match: bool, _):
|
||||
did_match = match_offset_comment(line) is not None
|
||||
assert did_match is match
|
||||
|
||||
|
||||
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
|
||||
def test_exact_offset_comment(line: str, exact: bool, match):
|
||||
@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
|
||||
def test_exact_offset_comment(line: str, exact: bool, _):
|
||||
assert is_exact_offset_comment(line) is exact
|
||||
|
||||
|
||||
# Helper for the next test: cut down version of OffsetMatch
|
||||
MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
|
||||
MiniOfs = namedtuple("MiniOfs", ["module", "value"])
|
||||
|
||||
distinct_by_module_samples = [
|
||||
# empty set
|
||||
([], []),
|
||||
# same module name
|
||||
([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
|
||||
[MiniOfs('TEST', 123)]),
|
||||
([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
|
||||
# same module name, case-insensitive
|
||||
([MiniOfs('test', 123), MiniOfs('TEST', 555)],
|
||||
[MiniOfs('test', 123)]),
|
||||
([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
|
||||
# duplicates, non-consecutive
|
||||
([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
|
||||
[MiniOfs('test', 123), MiniOfs('abc', 111)]),
|
||||
(
|
||||
[MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
|
||||
[MiniOfs("test", 123), MiniOfs("abc", 111)],
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sample, expected', distinct_by_module_samples)
|
||||
@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
|
||||
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
|
||||
assert distinct_by_module(sample) == expected
|
||||
|
@ -2,284 +2,51 @@
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
from capstone import *
|
||||
import difflib
|
||||
import struct
|
||||
import subprocess
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import colorama
|
||||
import json
|
||||
import re
|
||||
from isledecomp.dir import walk_source_dir
|
||||
from isledecomp.parser import find_code_blocks
|
||||
|
||||
from isledecomp import (
|
||||
Bin,
|
||||
find_code_blocks,
|
||||
get_file_in_script_dir,
|
||||
OffsetPlaceholderGenerator,
|
||||
print_diff,
|
||||
SymInfo,
|
||||
walk_source_dir,
|
||||
WinePathConverter,
|
||||
)
|
||||
|
||||
from capstone import Cs, CS_ARCH_X86, CS_MODE_32
|
||||
import colorama
|
||||
from pystache import Renderer
|
||||
|
||||
parser = argparse.ArgumentParser(allow_abbrev=False,
|
||||
description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.')
|
||||
parser.add_argument('original', metavar='original-binary', help='The original binary')
|
||||
parser.add_argument('recompiled', metavar='recompiled-binary', help='The recompiled binary')
|
||||
parser.add_argument('pdb', metavar='recompiled-pdb', help='The PDB of the recompiled binary')
|
||||
parser.add_argument('decomp_dir', metavar='decomp-dir', help='The decompiled source tree')
|
||||
parser.add_argument('--total', '-T', metavar='<count>', help='Total number of expected functions (improves total accuracy statistic)')
|
||||
parser.add_argument('--verbose', '-v', metavar='<offset>', help='Print assembly diff for specific function (original file\'s offset)')
|
||||
parser.add_argument('--html', '-H', metavar='<file>', help='Generate searchable HTML summary of status and diffs')
|
||||
parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
|
||||
parser.add_argument('--svg', '-S', metavar='<file>', help='Generate SVG graphic of progress')
|
||||
parser.add_argument('--svg-icon', metavar='icon', help='Icon to use in SVG (PNG)')
|
||||
parser.add_argument('--print-rec-addr', action='store_true', help='Print addresses of recompiled functions too')
|
||||
REGISTER_LIST = set(
|
||||
[
|
||||
"ax",
|
||||
"bp",
|
||||
"bx",
|
||||
"cx",
|
||||
"di",
|
||||
"dx",
|
||||
"eax",
|
||||
"ebp",
|
||||
"ebx",
|
||||
"ecx",
|
||||
"edi",
|
||||
"edx",
|
||||
"esi",
|
||||
"esp",
|
||||
"si",
|
||||
"sp",
|
||||
]
|
||||
)
|
||||
WORDS = re.compile(r"\w+")
|
||||
|
||||
parser.set_defaults(loglevel=logging.INFO)
|
||||
parser.add_argument('--debug', action='store_const', const=logging.DEBUG, dest='loglevel', help='Print script debug information')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=args.loglevel, format='[%(levelname)s] %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
colorama.init()
|
||||
|
||||
verbose = None
|
||||
found_verbose_target = False
|
||||
if args.verbose:
|
||||
try:
|
||||
verbose = int(args.verbose, 16)
|
||||
except ValueError:
|
||||
parser.error('invalid verbose argument')
|
||||
html_path = args.html
|
||||
|
||||
plain = args.no_color
|
||||
|
||||
original = args.original
|
||||
if not os.path.isfile(original):
|
||||
parser.error(f'Original binary {original} does not exist')
|
||||
|
||||
recomp = args.recompiled
|
||||
if not os.path.isfile(recomp):
|
||||
parser.error(f'Recompiled binary {recomp} does not exist')
|
||||
|
||||
syms = args.pdb
|
||||
if not os.path.isfile(syms):
|
||||
parser.error(f'Symbols PDB {syms} does not exist')
|
||||
|
||||
source = args.decomp_dir
|
||||
if not os.path.isdir(source):
|
||||
parser.error(f'Source directory {source} does not exist')
|
||||
|
||||
svg = args.svg
|
||||
|
||||
# Declare a class that can automatically convert virtual executable addresses
|
||||
# to file addresses
|
||||
class Bin:
|
||||
def __init__(self, filename):
|
||||
logger.debug(f'Parsing headers of "{filename}"... ')
|
||||
self.file = open(filename, 'rb')
|
||||
|
||||
#HACK: Strictly, we should be parsing the header, but we know where
|
||||
# everything is in these two files so we just jump straight there
|
||||
|
||||
# Read ImageBase
|
||||
self.file.seek(0xB4)
|
||||
self.imagebase, = struct.unpack('<i', self.file.read(4))
|
||||
|
||||
# Read .text VirtualAddress
|
||||
self.file.seek(0x184)
|
||||
self.textvirt, = struct.unpack('<i', self.file.read(4))
|
||||
|
||||
# Read .text PointerToRawData
|
||||
self.file.seek(0x18C)
|
||||
self.textraw, = struct.unpack('<i', self.file.read(4))
|
||||
logger.debug('... Parsing finished')
|
||||
|
||||
def __del__(self):
|
||||
if self.file:
|
||||
self.file.close()
|
||||
|
||||
def get_addr(self, virt):
|
||||
return virt - self.imagebase - self.textvirt + self.textraw
|
||||
|
||||
def read(self, offset, size):
|
||||
self.file.seek(self.get_addr(offset))
|
||||
return self.file.read(size)
|
||||
|
||||
class RecompiledInfo:
|
||||
def __init__(self):
|
||||
self.addr = None
|
||||
self.size = None
|
||||
self.name = None
|
||||
self.start = None
|
||||
|
||||
class WinePathConverter:
|
||||
def __init__(self, unix_cwd):
|
||||
self.unix_cwd = unix_cwd
|
||||
self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
|
||||
|
||||
def get_wine_path(self, unix_fn: str) -> str:
|
||||
if unix_fn.startswith('./'):
|
||||
return self.win_cwd + '\\' + unix_fn[2:].replace('/', '\\')
|
||||
if unix_fn.startswith(self.unix_cwd):
|
||||
return self.win_cwd + '\\' + unix_fn.removeprefix(self.unix_cwd).replace('/', '\\').lstrip('\\')
|
||||
return self._call_winepath_unix2win(unix_fn)
|
||||
|
||||
def get_unix_path(self, win_fn: str) -> str:
|
||||
if win_fn.startswith('.\\') or win_fn.startswith('./'):
|
||||
return self.unix_cwd + '/' + win_fn[2:].replace('\\', '/')
|
||||
if win_fn.startswith(self.win_cwd):
|
||||
return self.unix_cwd + '/' + win_fn.removeprefix(self.win_cwd).replace('\\', '/')
|
||||
return self._call_winepath_win2unix(win_fn)
|
||||
|
||||
@staticmethod
|
||||
def _call_winepath_unix2win(fn: str) -> str:
|
||||
return subprocess.check_output(['winepath', '-w', fn], text=True).strip()
|
||||
|
||||
@staticmethod
|
||||
def _call_winepath_win2unix(fn: str) -> str:
|
||||
return subprocess.check_output(['winepath', fn], text=True).strip()
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
|
||||
# Declare a class that parses the output of cvdump for fast access later
|
||||
class SymInfo:
|
||||
funcs = {}
|
||||
lines = {}
|
||||
names = {}
|
||||
|
||||
def __init__(self, pdb, file, wine_path_converter):
|
||||
call = [get_file_in_script_dir('cvdump.exe'), '-l', '-s']
|
||||
|
||||
if wine_path_converter:
|
||||
# Run cvdump through wine and convert path to Windows-friendly wine path
|
||||
call.insert(0, 'wine')
|
||||
call.append(wine_path_converter.get_wine_path(pdb))
|
||||
else:
|
||||
call.append(pdb)
|
||||
|
||||
logger.info(f'Parsing {pdb} ...')
|
||||
logger.debug(f'Command = {call}')
|
||||
line_dump = subprocess.check_output(call).decode('utf-8').split('\r\n')
|
||||
|
||||
current_section = None
|
||||
|
||||
logger.debug('Parsing output of cvdump.exe ...')
|
||||
|
||||
for i, line in enumerate(line_dump):
|
||||
if line.startswith('***'):
|
||||
current_section = line[4:]
|
||||
|
||||
if current_section == 'SYMBOLS' and 'S_GPROC32' in line:
|
||||
addr = int(line[26:34], 16)
|
||||
|
||||
info = RecompiledInfo()
|
||||
info.addr = addr + recompfile.imagebase + recompfile.textvirt
|
||||
|
||||
use_dbg_offs = False
|
||||
if use_dbg_offs:
|
||||
debug_offs = line_dump[i + 2]
|
||||
debug_start = int(debug_offs[22:30], 16)
|
||||
debug_end = int(debug_offs[43:], 16)
|
||||
|
||||
info.start = debug_start
|
||||
info.size = debug_end - debug_start
|
||||
else:
|
||||
info.start = 0
|
||||
info.size = int(line[41:49], 16)
|
||||
|
||||
info.name = line[77:]
|
||||
|
||||
self.names[info.name] = info
|
||||
self.funcs[addr] = info
|
||||
elif current_section == 'LINES' and line.startswith(' ') and not line.startswith(' '):
|
||||
sourcepath = line.split()[0]
|
||||
|
||||
if wine_path_converter:
|
||||
# Convert filename to Unix path for file compare
|
||||
sourcepath = wine_path_converter.get_unix_path(sourcepath)
|
||||
|
||||
if sourcepath not in self.lines:
|
||||
self.lines[sourcepath] = {}
|
||||
|
||||
j = i + 2
|
||||
while True:
|
||||
ll = line_dump[j].split()
|
||||
if len(ll) == 0:
|
||||
break
|
||||
|
||||
k = 0
|
||||
while k < len(ll):
|
||||
linenum = int(ll[k + 0])
|
||||
address = int(ll[k + 1], 16)
|
||||
if linenum not in self.lines[sourcepath]:
|
||||
self.lines[sourcepath][linenum] = address
|
||||
k += 2
|
||||
|
||||
j += 1
|
||||
|
||||
logger.debug('... Parsing output of cvdump.exe finished')
|
||||
|
||||
def get_recompiled_address(self, filename, line):
|
||||
addr = None
|
||||
found = False
|
||||
|
||||
logger.debug(f'Looking for {filename}:{line}')
|
||||
filename_basename = os.path.basename(filename).lower()
|
||||
|
||||
for fn in self.lines:
|
||||
# Sometimes a PDB is compiled with a relative path while we always have
|
||||
# an absolute path. Therefore we must
|
||||
try:
|
||||
if (os.path.basename(fn).lower() == filename_basename and
|
||||
os.path.samefile(fn, filename)):
|
||||
filename = fn
|
||||
break
|
||||
except FileNotFoundError as e:
|
||||
continue
|
||||
|
||||
if filename in self.lines and line in self.lines[fn]:
|
||||
addr = self.lines[fn][line]
|
||||
|
||||
if addr in self.funcs:
|
||||
return self.funcs[addr]
|
||||
else:
|
||||
logger.error(f'Failed to find function symbol with address: 0x{addr:x}')
|
||||
else:
|
||||
logger.error(f'Failed to find function symbol with filename and line: {filename}:{line}')
|
||||
|
||||
def get_recompiled_address_from_name(self, name):
|
||||
logger.debug('Looking for %s', name)
|
||||
|
||||
if name in self.names:
|
||||
return self.names[name]
|
||||
else:
|
||||
logger.error(f'Failed to find function symbol with name: {name}')
|
||||
|
||||
wine_path_converter = None
|
||||
if os.name != 'nt':
|
||||
wine_path_converter = WinePathConverter(source)
|
||||
origfile = Bin(original)
|
||||
recompfile = Bin(recomp)
|
||||
syminfo = SymInfo(syms, recompfile, wine_path_converter)
|
||||
|
||||
print()
|
||||
|
||||
md = Cs(CS_ARCH_X86, CS_MODE_32)
|
||||
|
||||
class OffsetPlaceholderGenerator:
|
||||
def __init__(self):
|
||||
self.counter = 0
|
||||
self.replacements = {}
|
||||
|
||||
def get(self, addr):
|
||||
if addr in self.replacements:
|
||||
return self.replacements[addr]
|
||||
else:
|
||||
self.counter += 1
|
||||
replacement = f'<OFFSET{self.counter}>'
|
||||
self.replacements[addr] = replacement
|
||||
return replacement
|
||||
|
||||
def sanitize(file, placeholderGenerator, mnemonic, op_str):
|
||||
def sanitize(file, placeholder_generator, mnemonic, op_str):
|
||||
op_str_is_number = False
|
||||
try:
|
||||
int(op_str, 16)
|
||||
@ -287,76 +54,61 @@ def sanitize(file, placeholderGenerator, mnemonic, op_str):
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if (mnemonic == 'call' or mnemonic == 'jmp') and op_str_is_number:
|
||||
if (mnemonic in ["call", "jmp"]) and op_str_is_number:
|
||||
# Filter out "calls" because the offsets we're not currently trying to
|
||||
# match offsets. As long as there's a call in the right place, it's
|
||||
# probably accurate.
|
||||
op_str = placeholderGenerator.get(int(op_str, 16))
|
||||
op_str = placeholder_generator.get(int(op_str, 16))
|
||||
else:
|
||||
|
||||
def filter_out_ptr(ptype, op_str):
|
||||
try:
|
||||
ptrstr = ptype + ' ptr ['
|
||||
ptrstr = ptype + " ptr ["
|
||||
start = op_str.index(ptrstr) + len(ptrstr)
|
||||
end = op_str.index(']', start)
|
||||
end = op_str.index("]", start)
|
||||
|
||||
# This will throw ValueError if not hex
|
||||
inttest = int(op_str[start:end], 16)
|
||||
|
||||
return op_str[0:start] + placeholderGenerator.get(inttest) + op_str[end:]
|
||||
return (
|
||||
op_str[0:start] + placeholder_generator.get(inttest) + op_str[end:]
|
||||
)
|
||||
except ValueError:
|
||||
return op_str
|
||||
|
||||
# Filter out dword ptrs where the pointer is to an offset
|
||||
op_str = filter_out_ptr('dword', op_str)
|
||||
op_str = filter_out_ptr('word', op_str)
|
||||
op_str = filter_out_ptr('byte', op_str)
|
||||
op_str = filter_out_ptr("dword", op_str)
|
||||
op_str = filter_out_ptr("word", op_str)
|
||||
op_str = filter_out_ptr("byte", op_str)
|
||||
|
||||
# Use heuristics to filter out any args that look like offsets
|
||||
words = op_str.split(' ')
|
||||
words = op_str.split(" ")
|
||||
for i, word in enumerate(words):
|
||||
try:
|
||||
inttest = int(word, 16)
|
||||
if inttest >= file.imagebase + file.textvirt:
|
||||
words[i] = placeholderGenerator.get(inttest)
|
||||
words[i] = placeholder_generator.get(inttest)
|
||||
except ValueError:
|
||||
pass
|
||||
op_str = ' '.join(words)
|
||||
op_str = " ".join(words)
|
||||
|
||||
return mnemonic, op_str
|
||||
|
||||
def parse_asm(file, addr, size):
|
||||
|
||||
def parse_asm(disassembler, file, asm_addr, size):
|
||||
asm = []
|
||||
data = file.read(addr, size)
|
||||
placeholderGenerator = OffsetPlaceholderGenerator()
|
||||
for i in md.disasm(data, 0):
|
||||
data = file.read(asm_addr, size)
|
||||
placeholder_generator = OffsetPlaceholderGenerator()
|
||||
for i in disassembler.disasm(data, 0):
|
||||
# Use heuristics to disregard some differences that aren't representative
|
||||
# of the accuracy of a function (e.g. global offsets)
|
||||
mnemonic, op_str = sanitize(file, placeholderGenerator, i.mnemonic, i.op_str)
|
||||
mnemonic, op_str = sanitize(file, placeholder_generator, i.mnemonic, i.op_str)
|
||||
if op_str is None:
|
||||
asm.append(mnemonic)
|
||||
else:
|
||||
asm.append(f'{mnemonic} {op_str}')
|
||||
asm.append(f"{mnemonic} {op_str}")
|
||||
return asm
|
||||
|
||||
REGISTER_LIST = set([
|
||||
'ax',
|
||||
'bp',
|
||||
'bx',
|
||||
'cx',
|
||||
'di',
|
||||
'dx',
|
||||
'eax',
|
||||
'ebp',
|
||||
'ebx',
|
||||
'ecx',
|
||||
'edi',
|
||||
'edx',
|
||||
'esi',
|
||||
'esp',
|
||||
'si',
|
||||
'sp',
|
||||
])
|
||||
WORDS = re.compile(r'\w+')
|
||||
|
||||
def get_registers(line: str):
|
||||
to_replace = []
|
||||
@ -367,8 +119,15 @@ def get_registers(line: str):
|
||||
to_replace.append((reg, match.start()))
|
||||
return to_replace
|
||||
|
||||
def replace_register(lines: list[str], start_line: int, reg: str, replacement: str) -> list[str]:
|
||||
return [line.replace(reg, replacement) if i >= start_line else line for i, line in enumerate(lines)]
|
||||
|
||||
def replace_register(
|
||||
lines: list[str], start_line: int, reg: str, replacement: str
|
||||
) -> list[str]:
|
||||
return [
|
||||
line.replace(reg, replacement) if i >= start_line else line
|
||||
for i, line in enumerate(lines)
|
||||
]
|
||||
|
||||
|
||||
# Is it possible to make new_asm the same as original_asm by swapping registers?
|
||||
def can_resolve_register_differences(original_asm, new_asm):
|
||||
@ -382,20 +141,19 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||
return False
|
||||
|
||||
# Look for the mismatching lines
|
||||
for i in range(len(original_asm)):
|
||||
for i, original_line in enumerate(original_asm):
|
||||
new_line = new_asm[i]
|
||||
original_line = original_asm[i]
|
||||
if new_line != original_line:
|
||||
# Find all the registers to replace
|
||||
to_replace = get_registers(original_line)
|
||||
|
||||
for j in range(len(to_replace)):
|
||||
(reg, reg_index) = to_replace[j]
|
||||
replacing_reg = new_line[reg_index:reg_index + len(reg)]
|
||||
for replace in to_replace:
|
||||
(reg, reg_index) = replace
|
||||
replacing_reg = new_line[reg_index : reg_index + len(reg)]
|
||||
if replacing_reg in REGISTER_LIST:
|
||||
if replacing_reg != reg:
|
||||
# Do a three-way swap replacing in all the subsequent lines
|
||||
temp_reg = '&' * len(reg)
|
||||
temp_reg = "&" * len(reg)
|
||||
new_asm = replace_register(new_asm, i, replacing_reg, temp_reg)
|
||||
new_asm = replace_register(new_asm, i, reg, replacing_reg)
|
||||
new_asm = replace_register(new_asm, i, temp_reg, reg)
|
||||
@ -403,21 +161,160 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||
# No replacement to do, different code, bail out
|
||||
return False
|
||||
# Check if the lines are now the same
|
||||
for i in range(len(original_asm)):
|
||||
if new_asm[i] != original_asm[i]:
|
||||
for i, original_line in enumerate(original_asm):
|
||||
if new_asm[i] != original_line:
|
||||
return False
|
||||
return True
|
||||
|
||||
function_count = 0
|
||||
total_accuracy = 0
|
||||
total_effective_accuracy = 0
|
||||
htmlinsert = []
|
||||
|
||||
# Generate basename of original file, used in locating OFFSET lines
|
||||
basename = os.path.basename(os.path.splitext(original)[0])
|
||||
def gen_html(html_file, data):
|
||||
output_data = Renderer().render_path(
|
||||
get_file_in_script_dir("template.html"), {"data": data}
|
||||
)
|
||||
|
||||
for srcfilename in walk_source_dir(source):
|
||||
with open(srcfilename, 'r') as srcfile:
|
||||
with open(html_file, "w", encoding="utf-8") as htmlfile:
|
||||
htmlfile.write(output_data)
|
||||
|
||||
|
||||
def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_accuracy):
|
||||
icon_data = None
|
||||
if icon:
|
||||
with open(icon, "rb") as iconfile:
|
||||
icon_data = base64.b64encode(iconfile.read()).decode("utf-8")
|
||||
|
||||
total_statistic = raw_accuracy / total_funcs
|
||||
full_percentbar_width = 127.18422
|
||||
output_data = Renderer().render_path(
|
||||
get_file_in_script_dir("template.svg"),
|
||||
{
|
||||
"name": name_svg,
|
||||
"icon": icon_data,
|
||||
"implemented": f"{(svg_implemented_funcs / total_funcs * 100):.2f}% ({svg_implemented_funcs}/{total_funcs})",
|
||||
"accuracy": f"{(raw_accuracy / svg_implemented_funcs * 100):.2f}%",
|
||||
"progbar": total_statistic * full_percentbar_width,
|
||||
"percent": f"{(total_statistic * 100):.2f}%",
|
||||
},
|
||||
)
|
||||
with open(svg_file, "w", encoding="utf-8") as svgfile:
|
||||
svgfile.write(output_data)
|
||||
|
||||
|
||||
# Do the actual work
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
allow_abbrev=False,
|
||||
description="Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"original", metavar="original-binary", help="The original binary"
|
||||
)
|
||||
parser.add_argument(
|
||||
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
|
||||
)
|
||||
parser.add_argument(
|
||||
"pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
|
||||
)
|
||||
parser.add_argument(
|
||||
"decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--total",
|
||||
"-T",
|
||||
metavar="<count>",
|
||||
help="Total number of expected functions (improves total accuracy statistic)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
metavar="<offset>",
|
||||
help="Print assembly diff for specific function (original file's offset)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--html",
|
||||
"-H",
|
||||
metavar="<file>",
|
||||
help="Generate searchable HTML summary of status and diffs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-color", "-n", action="store_true", help="Do not color the output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--svg", "-S", metavar="<file>", help="Generate SVG graphic of progress"
|
||||
)
|
||||
parser.add_argument("--svg-icon", metavar="icon", help="Icon to use in SVG (PNG)")
|
||||
parser.add_argument(
|
||||
"--print-rec-addr",
|
||||
action="store_true",
|
||||
help="Print addresses of recompiled functions too",
|
||||
)
|
||||
|
||||
parser.set_defaults(loglevel=logging.INFO)
|
||||
parser.add_argument(
|
||||
"--debug",
|
||||
action="store_const",
|
||||
const=logging.DEBUG,
|
||||
dest="loglevel",
|
||||
help="Print script debug information",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=args.loglevel, format="[%(levelname)s] %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
colorama.init()
|
||||
|
||||
verbose = None
|
||||
found_verbose_target = False
|
||||
if args.verbose:
|
||||
try:
|
||||
verbose = int(args.verbose, 16)
|
||||
except ValueError:
|
||||
parser.error("invalid verbose argument")
|
||||
html_path = args.html
|
||||
|
||||
plain = args.no_color
|
||||
|
||||
original = args.original
|
||||
if not os.path.isfile(original):
|
||||
parser.error(f"Original binary {original} does not exist")
|
||||
|
||||
recomp = args.recompiled
|
||||
if not os.path.isfile(recomp):
|
||||
parser.error(f"Recompiled binary {recomp} does not exist")
|
||||
|
||||
syms = args.pdb
|
||||
if not os.path.isfile(syms):
|
||||
parser.error(f"Symbols PDB {syms} does not exist")
|
||||
|
||||
source = args.decomp_dir
|
||||
if not os.path.isdir(source):
|
||||
parser.error(f"Source directory {source} does not exist")
|
||||
|
||||
svg = args.svg
|
||||
|
||||
wine_path_converter = None
|
||||
if os.name != "nt":
|
||||
wine_path_converter = WinePathConverter(source)
|
||||
with Bin(original, logger) as origfile, Bin(recomp, logger) as recompfile:
|
||||
syminfo = SymInfo(
|
||||
syms, recompfile, logger, sym_wine_path_converter=wine_path_converter
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
capstone_disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
|
||||
|
||||
function_count = 0
|
||||
total_accuracy = 0
|
||||
total_effective_accuracy = 0
|
||||
htmlinsert = []
|
||||
|
||||
# Generate basename of original file, used in locating OFFSET lines
|
||||
basename = os.path.basename(os.path.splitext(original)[0])
|
||||
|
||||
for srcfilename in walk_source_dir(source):
|
||||
with open(srcfilename, "r", encoding="utf-8") as srcfile:
|
||||
blocks = find_code_blocks(srcfile)
|
||||
|
||||
for block in blocks:
|
||||
@ -440,7 +337,9 @@ for srcfilename in walk_source_dir(source):
|
||||
if not recinfo:
|
||||
continue
|
||||
else:
|
||||
recinfo = syminfo.get_recompiled_address(srcfilename, block.start_line)
|
||||
recinfo = syminfo.get_recompiled_address(
|
||||
srcfilename, block.start_line
|
||||
)
|
||||
if not recinfo:
|
||||
continue
|
||||
|
||||
@ -449,8 +348,18 @@ for srcfilename in walk_source_dir(source):
|
||||
ratio = 0.0
|
||||
effective_ratio = 0.0
|
||||
if recinfo.size:
|
||||
origasm = parse_asm(origfile, addr + recinfo.start, recinfo.size)
|
||||
recompasm = parse_asm(recompfile, recinfo.addr + recinfo.start, recinfo.size)
|
||||
origasm = parse_asm(
|
||||
capstone_disassembler,
|
||||
origfile,
|
||||
addr + recinfo.start,
|
||||
recinfo.size,
|
||||
)
|
||||
recompasm = parse_asm(
|
||||
capstone_disassembler,
|
||||
recompfile,
|
||||
recinfo.addr + recinfo.start,
|
||||
recinfo.size,
|
||||
)
|
||||
|
||||
diff = difflib.SequenceMatcher(None, origasm, recompasm)
|
||||
ratio = diff.ratio()
|
||||
@ -464,28 +373,40 @@ for srcfilename in walk_source_dir(source):
|
||||
else:
|
||||
ratio = 0
|
||||
|
||||
percenttext = f'{(effective_ratio * 100):.2f}%'
|
||||
percenttext = f"{(effective_ratio * 100):.2f}%"
|
||||
if not plain:
|
||||
if effective_ratio == 1.0:
|
||||
percenttext = colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
|
||||
percenttext = (
|
||||
colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
|
||||
)
|
||||
elif effective_ratio > 0.8:
|
||||
percenttext = colorama.Fore.YELLOW + percenttext + colorama.Style.RESET_ALL
|
||||
percenttext = (
|
||||
colorama.Fore.YELLOW
|
||||
+ percenttext
|
||||
+ colorama.Style.RESET_ALL
|
||||
)
|
||||
else:
|
||||
percenttext = colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
|
||||
percenttext = (
|
||||
colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
|
||||
)
|
||||
|
||||
if effective_ratio == 1.0 and ratio != 1.0:
|
||||
if plain:
|
||||
percenttext += '*'
|
||||
percenttext += "*"
|
||||
else:
|
||||
percenttext += colorama.Fore.RED + '*' + colorama.Style.RESET_ALL
|
||||
percenttext += (
|
||||
colorama.Fore.RED + "*" + colorama.Style.RESET_ALL
|
||||
)
|
||||
|
||||
if args.print_rec_addr:
|
||||
addrs = f'0x{addr:x} / 0x{recinfo.addr:x}'
|
||||
addrs = f"0x{addr:x} / 0x{recinfo.addr:x}"
|
||||
else:
|
||||
addrs = hex(addr)
|
||||
|
||||
if not verbose:
|
||||
print(f' {recinfo.name} ({addrs}) is {percenttext} similar to the original')
|
||||
print(
|
||||
f" {recinfo.name} ({addrs}) is {percenttext} similar to the original"
|
||||
)
|
||||
|
||||
function_count += 1
|
||||
total_accuracy += ratio
|
||||
@ -497,81 +418,48 @@ for srcfilename in walk_source_dir(source):
|
||||
# If verbose, print the diff for that function to the output
|
||||
if verbose:
|
||||
if effective_ratio == 1.0:
|
||||
ok_text = 'OK!' if plain else (colorama.Fore.GREEN + '✨ OK! ✨' + colorama.Style.RESET_ALL)
|
||||
ok_text = (
|
||||
"OK!"
|
||||
if plain
|
||||
else (
|
||||
colorama.Fore.GREEN
|
||||
+ "✨ OK! ✨"
|
||||
+ colorama.Style.RESET_ALL
|
||||
)
|
||||
)
|
||||
if ratio == 1.0:
|
||||
print(f'{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n')
|
||||
print(
|
||||
f"{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n"
|
||||
)
|
||||
else:
|
||||
print(f'{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n')
|
||||
print(
|
||||
f"{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n"
|
||||
)
|
||||
else:
|
||||
for line in udiff:
|
||||
if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
|
||||
# Skip unneeded parts of the diff for the brief view
|
||||
pass
|
||||
elif line.startswith('+'):
|
||||
if plain:
|
||||
print(line)
|
||||
else:
|
||||
print(colorama.Fore.GREEN + line)
|
||||
elif line.startswith('-'):
|
||||
if plain:
|
||||
print(line)
|
||||
else:
|
||||
print(colorama.Fore.RED + line)
|
||||
else:
|
||||
print(line)
|
||||
if not plain:
|
||||
print(colorama.Style.RESET_ALL, end='')
|
||||
print_diff(udiff, plain)
|
||||
|
||||
print(f'\n{recinfo.name} is only {percenttext} similar to the original, diff above')
|
||||
print(
|
||||
f"\n{recinfo.name} is only {percenttext} similar to the original, diff above"
|
||||
)
|
||||
|
||||
# If html, record the diffs to an HTML file
|
||||
if html_path:
|
||||
htmlinsert.append({"address": f"0x{addr:x}",
|
||||
htmlinsert.append(
|
||||
{
|
||||
"address": f"0x{addr:x}",
|
||||
"name": recinfo.name,
|
||||
"matching": effective_ratio,
|
||||
"diff": '\n'.join(udiff)})
|
||||
|
||||
|
||||
def gen_html(html_file, data):
|
||||
output_data = Renderer().render_path(get_file_in_script_dir('template.html'),
|
||||
{
|
||||
"data": data,
|
||||
"diff": "\n".join(udiff),
|
||||
}
|
||||
)
|
||||
|
||||
with open(html_file, 'w') as htmlfile:
|
||||
htmlfile.write(output_data)
|
||||
|
||||
|
||||
def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_accuracy):
|
||||
icon_data = None
|
||||
if icon:
|
||||
with open(icon, 'rb') as iconfile:
|
||||
icon_data = base64.b64encode(iconfile.read()).decode('utf-8')
|
||||
|
||||
total_statistic = raw_accuracy / total_funcs
|
||||
full_percentbar_width = 127.18422
|
||||
output_data = Renderer().render_path(get_file_in_script_dir('template.svg'),
|
||||
{
|
||||
"name": name_svg,
|
||||
"icon": icon_data,
|
||||
"implemented": f'{(svg_implemented_funcs / total_funcs * 100):.2f}% ({svg_implemented_funcs}/{total_funcs})',
|
||||
"accuracy": f'{(raw_accuracy / svg_implemented_funcs * 100):.2f}%',
|
||||
"progbar": total_statistic * full_percentbar_width,
|
||||
"percent": f'{(total_statistic * 100):.2f}%',
|
||||
}
|
||||
)
|
||||
with open(svg_file, 'w') as svgfile:
|
||||
svgfile.write(output_data)
|
||||
|
||||
|
||||
if html_path:
|
||||
if html_path:
|
||||
gen_html(html_path, json.dumps(htmlinsert))
|
||||
|
||||
if verbose:
|
||||
if verbose:
|
||||
if not found_verbose_target:
|
||||
print(f'Failed to find the function with address 0x{verbose:x}')
|
||||
else:
|
||||
print(f"Failed to find the function with address 0x{verbose:x}")
|
||||
else:
|
||||
implemented_funcs = function_count
|
||||
|
||||
if args.total:
|
||||
@ -580,7 +468,16 @@ else:
|
||||
if function_count > 0:
|
||||
effective_accuracy = total_effective_accuracy / function_count * 100
|
||||
actual_accuracy = total_accuracy / function_count * 100
|
||||
print(f'\nTotal effective accuracy {effective_accuracy:.2f}% across {function_count} functions ({actual_accuracy:.2f}% actual accuracy)')
|
||||
print(
|
||||
f"\nTotal effective accuracy {effective_accuracy:.2f}% across {function_count} functions ({actual_accuracy:.2f}% actual accuracy)"
|
||||
)
|
||||
|
||||
if svg:
|
||||
gen_svg(svg, os.path.basename(original), args.svg_icon, implemented_funcs, function_count, total_effective_accuracy)
|
||||
gen_svg(
|
||||
svg,
|
||||
os.path.basename(original),
|
||||
args.svg_icon,
|
||||
implemented_funcs,
|
||||
function_count,
|
||||
total_effective_accuracy,
|
||||
)
|
||||
|
@ -1,3 +1,4 @@
|
||||
tools/isledecomp
|
||||
capstone
|
||||
colorama
|
||||
isledecomp
|
@ -1,76 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import colorama
|
||||
import difflib
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
parser = argparse.ArgumentParser(allow_abbrev=False,
|
||||
description='Verify Exports: Compare the exports of two DLLs.')
|
||||
parser.add_argument('original', metavar='original-binary', help='The original binary')
|
||||
parser.add_argument('recompiled', metavar='recompiled-binary', help='The recompiled binary')
|
||||
parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
|
||||
from isledecomp.utils import print_diff
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
allow_abbrev=False, description="Verify Exports: Compare the exports of two DLLs."
|
||||
)
|
||||
parser.add_argument("original", metavar="original-binary", help="The original binary")
|
||||
parser.add_argument(
|
||||
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-color", "-n", action="store_true", help="Do not color the output"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isfile(args.original):
|
||||
parser.error(f'Original binary file {args.original} does not exist')
|
||||
parser.error(f"Original binary file {args.original} does not exist")
|
||||
|
||||
if not os.path.isfile(args.recompiled):
|
||||
parser.error(f'Recompiled binary {args.recompiled} does not exist')
|
||||
parser.error(f"Recompiled binary {args.recompiled} does not exist")
|
||||
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
|
||||
def get_exports(file):
|
||||
call = [get_file_in_script_dir('DUMPBIN.EXE'), '/EXPORTS']
|
||||
|
||||
if os.name != 'nt':
|
||||
call.insert(0, 'wine')
|
||||
file = subprocess.check_output(['winepath', '-w', file]).decode('utf-8').strip()
|
||||
def get_exports(file):
|
||||
call = [get_file_in_script_dir("DUMPBIN.EXE"), "/EXPORTS"]
|
||||
|
||||
if os.name != "nt":
|
||||
call.insert(0, "wine")
|
||||
file = subprocess.check_output(["winepath", "-w", file]).decode("utf-8").strip()
|
||||
|
||||
call.append(file)
|
||||
|
||||
raw = subprocess.check_output(call).decode('utf-8').split('\r\n')
|
||||
raw = subprocess.check_output(call).decode("utf-8").split("\r\n")
|
||||
exports = []
|
||||
|
||||
start = False
|
||||
|
||||
for line in raw:
|
||||
if not start:
|
||||
if line == ' ordinal hint name':
|
||||
if line == " ordinal hint name":
|
||||
start = True
|
||||
else:
|
||||
if line:
|
||||
exports.append(line[27:line.rindex(' (')])
|
||||
exports.append(line[27 : line.rindex(" (")])
|
||||
elif exports:
|
||||
break
|
||||
|
||||
return exports
|
||||
|
||||
|
||||
og_exp = get_exports(args.original)
|
||||
re_exp = get_exports(args.recompiled)
|
||||
|
||||
udiff = difflib.unified_diff(og_exp, re_exp)
|
||||
has_diff = False
|
||||
|
||||
for line in udiff:
|
||||
has_diff = True
|
||||
color = ''
|
||||
if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
|
||||
# Skip unneeded parts of the diff for the brief view
|
||||
continue
|
||||
# Work out color if we are printing color
|
||||
if not args.no_color:
|
||||
if line.startswith('+'):
|
||||
color = colorama.Fore.GREEN
|
||||
elif line.startswith('-'):
|
||||
color = colorama.Fore.RED
|
||||
print(color + line)
|
||||
# Reset color if we're printing in color
|
||||
if not args.no_color:
|
||||
print(colorama.Style.RESET_ALL, end='')
|
||||
has_diff = print_diff(udiff, args.no_color)
|
||||
|
||||
sys.exit(1 if has_diff else 0)
|
||||
|
Loading…
Reference in New Issue
Block a user