mirror of
https://github.com/Xeeynamo/sotn-decomp.git
synced 2024-10-07 02:24:01 +00:00
Add script to automatically decompile functions
This commit is contained in:
parent
869ce3b771
commit
09b412f592
4
Makefile
4
Makefile
@ -224,10 +224,6 @@ extract_st%: require-tools
|
||||
$(SPLAT) $(CONFIG_DIR)/splat.st$*.yaml
|
||||
$(CONFIG_DIR)/generated.symbols.%.txt:
|
||||
|
||||
decompile: $(M2C_APP)
|
||||
$(M2CTX) $(SOURCE)
|
||||
$(M2C_APP) $(M2C_ARGS) --target mipsel-gcc-c --context ctx.c $(FUNC) $(ASSEMBLY)
|
||||
|
||||
context:
|
||||
$(M2CTX) $(SOURCE)
|
||||
@echo ctx.c has been updated.
|
||||
|
12
README.md
12
README.md
@ -52,17 +52,15 @@ Some non-matching functions are present in the source preprocessed by the macro
|
||||
1. Run `make clean extract all expected` at least once
|
||||
1. After setup and build, choose an overlay (eg. `ST/WRP`)
|
||||
1. Look for one of those functions which hasn't successfully decompiled yet (eg. `INCLUDE_ASM("asm/st/wrp/nonmatchings/6FD0", func_801873A0);`)
|
||||
1. Look for its assembly file (eg. `asm/st/wrp/nonmatchings/6FD0/func_801873A0.s`)
|
||||
1. Run `SOURCE=src/st/wrp/6FD0.c ASSEMBLY=asm/st/wrp/nonmatchings/6FD0/func_801873A0.s make decompile` to dump the decompiled code on the console
|
||||
1. Replace the `INCLUDE_ASM(...);` you targeted with the console output content
|
||||
1. Invoke `python3 ./tools/asm-differ/diff.py -mwo --overlay st/wrp func_801873A0`
|
||||
|
||||
You will probably have some differences from your compiled code to the original; keep refactoring the code and move variables around until you have a 100% match.
|
||||
1. Run `./tools/decompile.py func_801873A0` to decompile the function in the C source code where the function is supposed to be located
|
||||
1. If the function does not compile, try addressing the compilation errors until `make` compiles
|
||||
1. If the function does not match, invoke `python3 ./tools/asm-differ/diff.py -mwo --overlay st/wrp func_801873A0` and refactor the code until it matches
|
||||
1. If the function matches, try refactoring to keep the code clean while checking if the function still matches once in a while
|
||||
|
||||
There are a few tricks to make the process more streamlined:
|
||||
|
||||
1. Use [decomp.me](https://decomp.me/) with PSY-Q 4.0. Be aware that the repo is using GCC 2.6.x, so decomp.me will sometimes give a slightly different output.
|
||||
1. The “context” section of decomp.me, is provided by the cmd `SOURCE=src/dra/42398.c make context` as mentioned in the how to decompile.
|
||||
1. The “context” section of decomp.me, is provided by the cmd `SOURCE=src/wrp/6FD0.c make context` as mentioned in the how to decompile.
|
||||
1. Use [decomp-permuter](https://github.com/simonlindholm/decomp-permuter) to solve some mismatches.
|
||||
1. Use [this](https://github.com/mkst/sssv/wiki/Jump-Tables) and [this](https://github.com/pmret/papermario/wiki/GCC-2.8.1-Tips-and-Tricks) guide to understand how some compiler patterns work.
|
||||
1. Use the `#ifndef NON_MATCHING` if your code is logically equivalent but you cannot yet fully match it.
|
||||
|
219
tools/decompile.py
Executable file
219
tools/decompile.py
Executable file
@ -0,0 +1,219 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import argparse
|
||||
import io
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from contextlib import redirect_stdout
|
||||
from enum import Enum
|
||||
import m2ctx
|
||||
import m2c.src.main as m2c
|
||||
|
||||
|
||||
# gets the root directory of the project
|
||||
# the way it works is that it looks for the directory 'src'
|
||||
def get_root_dir():
|
||||
def search_root_dir(base_dir):
|
||||
for dir in os.listdir(base_dir):
|
||||
if os.path.isdir(dir) and dir == "src":
|
||||
return os.path.normpath(base_dir)
|
||||
return search_root_dir(os.path.join(base_dir, ".."))
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return search_root_dir(base_dir=script_dir)
|
||||
|
||||
|
||||
def get_all_c_files(src_dir):
|
||||
c_files_list = list()
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for f in files:
|
||||
if f.endswith(".c"):
|
||||
c_files_list.append(os.path.join(root, f))
|
||||
return c_files_list
|
||||
|
||||
|
||||
# global variables
|
||||
root_dir = get_root_dir()
|
||||
asm_dir = os.path.join(root_dir, "asm")
|
||||
src_dir = os.path.join(root_dir, "src")
|
||||
src_files = get_all_c_files(src_dir)
|
||||
|
||||
|
||||
class NonMatchingFunc(object):
|
||||
def __init__(self, nonmatching_path):
|
||||
split = nonmatching_path.split("/")
|
||||
|
||||
self.asm_path = nonmatching_path
|
||||
self.name = os.path.splitext(os.path.basename(nonmatching_path))[0]
|
||||
self.overlay_name = split[split.index("nonmatchings") - 1]
|
||||
self.text_offset = split[split.index("nonmatchings") + 1]
|
||||
|
||||
assumed_path = f"/{self.overlay_name}/{self.text_offset}.c"
|
||||
c_paths = [src for src in src_files if src.endswith(assumed_path)]
|
||||
assert len(c_paths) == 1
|
||||
self.src_path = c_paths[0]
|
||||
|
||||
|
||||
def get_nonmatching_functions(base_path, func_name) -> list:
|
||||
function_list = list()
|
||||
for root, dirs, files in os.walk(base_path):
|
||||
if "/nonmatchings/" in root:
|
||||
for f in files:
|
||||
if f == f"{func_name}.s":
|
||||
full_path = os.path.join(root, f)
|
||||
function = NonMatchingFunc(full_path)
|
||||
function_list.append(function)
|
||||
return function_list
|
||||
|
||||
|
||||
def get_c_context(src_file) -> str:
|
||||
return m2ctx.import_c_file(src_file)
|
||||
|
||||
|
||||
def decompile(func: NonMatchingFunc, ctx_str: str):
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", encoding="utf-8", suffix=".c") as tmp_ctx:
|
||||
tmp_ctx.writelines(ctx_str)
|
||||
tmp_ctx.flush()
|
||||
options = m2c.parse_flags([
|
||||
"-P", "4",
|
||||
"--pointer-style", "left",
|
||||
"--target", "mipsel-gcc-c",
|
||||
"--context", tmp_ctx.name,
|
||||
func.asm_path,
|
||||
])
|
||||
|
||||
with redirect_stdout(io.StringIO()) as f:
|
||||
m2c.run(options)
|
||||
return f.getvalue()
|
||||
|
||||
|
||||
def guess_unknown_type(dec: str) -> str:
|
||||
ret = ""
|
||||
for line in dec.splitlines():
|
||||
if line.find("?") == -1:
|
||||
line = line
|
||||
elif line.startswith("? func"):
|
||||
line = line.replace("? func_", "/*?*/ void func_")
|
||||
elif line.startswith("extern ? D_"):
|
||||
line = line.replace("extern ? D_", "extern /*?*/s32 D_")
|
||||
elif line.startswith("extern ?* D_"):
|
||||
line = line.replace("extern ?* D_", "extern /*?*/u8* D_")
|
||||
ret += line + "\n"
|
||||
return ret
|
||||
|
||||
|
||||
class InjectRes(Enum):
|
||||
SUCCESS = 0
|
||||
NOT_INJECTED = 1
|
||||
NOT_COMPILABLE = 2
|
||||
NON_MATCHING = 3
|
||||
UNKNOWN_ERROR = -1
|
||||
|
||||
|
||||
# check if the overlay can be compiled
|
||||
def check_injected_code() -> InjectRes:
|
||||
compile_result = subprocess.run(
|
||||
f"make {func.overlay_name}",
|
||||
cwd=root_dir,
|
||||
shell=True,
|
||||
check=False,
|
||||
capture_output=True)
|
||||
if compile_result.returncode == 0:
|
||||
# good news, the code was compilable
|
||||
# now checking for the checksum...
|
||||
check_result = subprocess.run(
|
||||
"make check",
|
||||
cwd=root_dir,
|
||||
shell=True,
|
||||
check=False,
|
||||
capture_output=True)
|
||||
if check_result.returncode == 0:
|
||||
# decompilation successful! There is nothing else to do
|
||||
return InjectRes.SUCCESS
|
||||
else:
|
||||
return InjectRes.NON_MATCHING
|
||||
else:
|
||||
return InjectRes.NOT_COMPILABLE
|
||||
|
||||
|
||||
def inject_decompiled_function_into_file(func: NonMatchingFunc, dec: str) -> InjectRes:
|
||||
with open(func.src_path) as file:
|
||||
lines = [line.rstrip() for line in file]
|
||||
|
||||
# this portion of code NEEDS to be resiliant; if there is an exception
|
||||
# while writing the file content, the original source code where the
|
||||
# function is supposed to be injected will be lost.
|
||||
try:
|
||||
# assume function matches
|
||||
found = False
|
||||
newcode = ""
|
||||
for line in lines:
|
||||
if line.startswith("INCLUDE_ASM(") and func.name in line:
|
||||
newcode += dec
|
||||
found = True
|
||||
else:
|
||||
newcode += line + "\n"
|
||||
with open(func.src_path, "w") as file:
|
||||
file.writelines(newcode)
|
||||
|
||||
if not found:
|
||||
return InjectRes.NOT_INJECTED
|
||||
result = check_injected_code()
|
||||
if result == InjectRes.SUCCESS:
|
||||
return result
|
||||
|
||||
newcode = ""
|
||||
for line in lines:
|
||||
if line.startswith("INCLUDE_ASM(") and func.name in line:
|
||||
newcode += "#ifdef NON_MATCHING\n"
|
||||
newcode += line + "\n"
|
||||
newcode += "#else\n"
|
||||
newcode += dec
|
||||
newcode += "#endif\n"
|
||||
else:
|
||||
newcode += line + "\n"
|
||||
with open(func.src_path, "w") as file:
|
||||
file.writelines(newcode)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
with open(func.src_path, "w") as file:
|
||||
for line in lines:
|
||||
file.write(line)
|
||||
file.write("\n")
|
||||
raise e
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="automatically decompiles a function")
|
||||
parser.add_argument("function", help="function name to decompile")
|
||||
|
||||
args = parser.parse_args()
|
||||
if __name__ == "__main__":
|
||||
funcs = get_nonmatching_functions(asm_dir, args.function)
|
||||
if len(funcs) == 0:
|
||||
print(f"function {args.function} not found or already decompiled")
|
||||
|
||||
func = funcs[0]
|
||||
# print(f"func: {func.name}")
|
||||
# print(f"overlay: {func.overlay_name}")
|
||||
# print(f"text: {func.text_offset}")
|
||||
# print(f"asm: {func.asm_path}")
|
||||
# print(f"src: {func.src_path}")
|
||||
|
||||
ctx = get_c_context(func.src_path)
|
||||
dec = decompile(func, ctx)
|
||||
decres = guess_unknown_type(dec)
|
||||
match inject_decompiled_function_into_file(func, decres):
|
||||
case InjectRes.SUCCESS:
|
||||
print(f"function '{func.name}' decompiled successfully!")
|
||||
case InjectRes.NON_MATCHING:
|
||||
print(f"function '{func.name}' decompiled but not matching")
|
||||
case InjectRes.NOT_COMPILABLE:
|
||||
print(f"function '{func.name}' decompiled but cannot be compiled")
|
||||
case InjectRes.NOT_INJECTED:
|
||||
print(f"function '{func.name}' might already be decompiled")
|
||||
case _:
|
||||
print("unhandled error!")
|
@ -6,8 +6,18 @@ import sys
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
root_dir = os.path.abspath(os.path.join(script_dir, ".."))
|
||||
|
||||
def get_root_dir():
|
||||
def search_root_dir(base_dir):
|
||||
for dir in os.listdir(base_dir):
|
||||
if os.path.isdir(dir) and dir == "src":
|
||||
return os.path.normpath(base_dir)
|
||||
return search_root_dir(os.path.join(base_dir, ".."))
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return search_root_dir(base_dir=script_dir)
|
||||
|
||||
|
||||
root_dir = get_root_dir()
|
||||
src_dir = root_dir + "src/"
|
||||
|
||||
# Project-specific
|
||||
@ -24,24 +34,31 @@ CPP_FLAGS = [
|
||||
"-DM2CTX",
|
||||
]
|
||||
|
||||
def import_c_file(in_file) -> str:
|
||||
in_file = os.path.relpath(in_file, root_dir)
|
||||
|
||||
def import_c_file(src_file) -> str:
|
||||
in_file = src_file
|
||||
if not src_file.startswith(root_dir):
|
||||
in_file = os.path.relpath(src_file, root_dir)
|
||||
|
||||
cpp_command = ["gcc", "-E", "-P", "-dM", *CPP_FLAGS, in_file]
|
||||
cpp_command2 = ["gcc", "-E", "-P", *CPP_FLAGS, in_file]
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".c") as tmp:
|
||||
stock_macros = subprocess.check_output(["gcc", "-E", "-P", "-dM", tmp.name], cwd=root_dir, encoding="utf-8")
|
||||
stock_macros = subprocess.check_output(
|
||||
["gcc", "-E", "-P", "-dM", tmp.name], cwd=root_dir, encoding="utf-8")
|
||||
|
||||
out_text = ""
|
||||
try:
|
||||
out_text += subprocess.check_output(cpp_command, cwd=root_dir, encoding="utf-8")
|
||||
out_text += subprocess.check_output(cpp_command2, cwd=root_dir, encoding="utf-8")
|
||||
out_text += subprocess.check_output(cpp_command,
|
||||
cwd=root_dir, encoding="utf-8")
|
||||
out_text += subprocess.check_output(cpp_command2,
|
||||
cwd=root_dir, encoding="utf-8")
|
||||
except subprocess.CalledProcessError:
|
||||
print(
|
||||
"Failed to preprocess input file, when running command:\n"
|
||||
+ ' '.join(cpp_command),
|
||||
file=sys.stderr,
|
||||
)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if not out_text:
|
||||
@ -52,6 +69,7 @@ def import_c_file(in_file) -> str:
|
||||
out_text = out_text.replace(line + "\n", "")
|
||||
return out_text
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""Create a context file which can be used for mips_to_c"""
|
||||
|
Loading…
Reference in New Issue
Block a user