Add script to automatically decompile functions

This commit is contained in:
Luciano Ciccariello 2022-12-24 20:01:37 +00:00
parent 869ce3b771
commit 09b412f592
4 changed files with 250 additions and 19 deletions

View File

@ -224,10 +224,6 @@ extract_st%: require-tools
$(SPLAT) $(CONFIG_DIR)/splat.st$*.yaml
$(CONFIG_DIR)/generated.symbols.%.txt:
decompile: $(M2C_APP)
$(M2CTX) $(SOURCE)
$(M2C_APP) $(M2C_ARGS) --target mipsel-gcc-c --context ctx.c $(FUNC) $(ASSEMBLY)
context:
$(M2CTX) $(SOURCE)
@echo ctx.c has been updated.

View File

@ -52,17 +52,15 @@ Some non-matching functions are present in the source preprocessed by the macro
1. Run `make clean extract all expected` at least once
1. After setup and build, choose an overlay (eg. `ST/WRP`)
1. Look for one of those functions which hasn't successfully decompiled yet (eg. `INCLUDE_ASM("asm/st/wrp/nonmatchings/6FD0", func_801873A0);`)
1. Look for its assembly file (eg. `asm/st/wrp/nonmatchings/6FD0/func_801873A0.s`)
1. Run `SOURCE=src/st/wrp/6FD0.c ASSEMBLY=asm/st/wrp/nonmatchings/6FD0/func_801873A0.s make decompile` to dump the decompiled code on the console
1. Replace the `INCLUDE_ASM(...);` you targeted with the console output content
1. Invoke `python3 ./tools/asm-differ/diff.py -mwo --overlay st/wrp func_801873A0`
You will probably have some differences from your compiled code to the original; keep refactoring the code and move variables around until you have a 100% match.
1. Run `./tools/decompile.py func_801873A0` to decompile the function in the C source code where the function is supposed to be located
1. If the function does not compile, try addressing the compilation errors until `make` compiles
1. If the function does not match, invoke `python3 ./tools/asm-differ/diff.py -mwo --overlay st/wrp func_801873A0` and refactor the code until it matches
1. If the function matches, try refactoring to keep the code clean while checking if the function still matches once in a while
There are a few tricks to make the process more streamlined:
1. Use [decomp.me](https://decomp.me/) with PSY-Q 4.0. Be aware that the repo is using GCC 2.6.x, so decomp.me will sometimes give a slightly different output.
1. The “context” section of decomp.me, is provided by the cmd `SOURCE=src/dra/42398.c make context` as mentioned in the how to decompile.
1. The “context” section of decomp.me, is provided by the cmd `SOURCE=src/wrp/6FD0.c make context` as mentioned in the how to decompile.
1. Use [decomp-permuter](https://github.com/simonlindholm/decomp-permuter) to solve some mismatches.
1. Use [this](https://github.com/mkst/sssv/wiki/Jump-Tables) and [this](https://github.com/pmret/papermario/wiki/GCC-2.8.1-Tips-and-Tricks) guide to understand how some compiler patterns work.
1. Use the `#ifndef NON_MATCHING` if your code is logically equivalent but you cannot yet fully match it.

219
tools/decompile.py Executable file
View File

@ -0,0 +1,219 @@
#!/usr/bin/python3
import argparse
import io
import os
import subprocess
import tempfile
from contextlib import redirect_stdout
from enum import Enum
import m2ctx
import m2c.src.main as m2c
# gets the root directory of the project
# the way it works is that it looks for the directory 'src'
def get_root_dir():
def search_root_dir(base_dir):
for dir in os.listdir(base_dir):
if os.path.isdir(dir) and dir == "src":
return os.path.normpath(base_dir)
return search_root_dir(os.path.join(base_dir, ".."))
script_dir = os.path.dirname(os.path.realpath(__file__))
return search_root_dir(base_dir=script_dir)
def get_all_c_files(src_dir):
c_files_list = list()
for root, dirs, files in os.walk(src_dir):
for f in files:
if f.endswith(".c"):
c_files_list.append(os.path.join(root, f))
return c_files_list
# global variables
root_dir = get_root_dir()
asm_dir = os.path.join(root_dir, "asm")
src_dir = os.path.join(root_dir, "src")
src_files = get_all_c_files(src_dir)
class NonMatchingFunc(object):
def __init__(self, nonmatching_path):
split = nonmatching_path.split("/")
self.asm_path = nonmatching_path
self.name = os.path.splitext(os.path.basename(nonmatching_path))[0]
self.overlay_name = split[split.index("nonmatchings") - 1]
self.text_offset = split[split.index("nonmatchings") + 1]
assumed_path = f"/{self.overlay_name}/{self.text_offset}.c"
c_paths = [src for src in src_files if src.endswith(assumed_path)]
assert len(c_paths) == 1
self.src_path = c_paths[0]
def get_nonmatching_functions(base_path, func_name) -> list:
function_list = list()
for root, dirs, files in os.walk(base_path):
if "/nonmatchings/" in root:
for f in files:
if f == f"{func_name}.s":
full_path = os.path.join(root, f)
function = NonMatchingFunc(full_path)
function_list.append(function)
return function_list
def get_c_context(src_file) -> str:
return m2ctx.import_c_file(src_file)
def decompile(func: NonMatchingFunc, ctx_str: str):
with tempfile.NamedTemporaryFile(
mode="w", encoding="utf-8", suffix=".c") as tmp_ctx:
tmp_ctx.writelines(ctx_str)
tmp_ctx.flush()
options = m2c.parse_flags([
"-P", "4",
"--pointer-style", "left",
"--target", "mipsel-gcc-c",
"--context", tmp_ctx.name,
func.asm_path,
])
with redirect_stdout(io.StringIO()) as f:
m2c.run(options)
return f.getvalue()
def guess_unknown_type(dec: str) -> str:
ret = ""
for line in dec.splitlines():
if line.find("?") == -1:
line = line
elif line.startswith("? func"):
line = line.replace("? func_", "/*?*/ void func_")
elif line.startswith("extern ? D_"):
line = line.replace("extern ? D_", "extern /*?*/s32 D_")
elif line.startswith("extern ?* D_"):
line = line.replace("extern ?* D_", "extern /*?*/u8* D_")
ret += line + "\n"
return ret
class InjectRes(Enum):
SUCCESS = 0
NOT_INJECTED = 1
NOT_COMPILABLE = 2
NON_MATCHING = 3
UNKNOWN_ERROR = -1
# check if the overlay can be compiled
def check_injected_code() -> InjectRes:
compile_result = subprocess.run(
f"make {func.overlay_name}",
cwd=root_dir,
shell=True,
check=False,
capture_output=True)
if compile_result.returncode == 0:
# good news, the code was compilable
# now checking for the checksum...
check_result = subprocess.run(
"make check",
cwd=root_dir,
shell=True,
check=False,
capture_output=True)
if check_result.returncode == 0:
# decompilation successful! There is nothing else to do
return InjectRes.SUCCESS
else:
return InjectRes.NON_MATCHING
else:
return InjectRes.NOT_COMPILABLE
def inject_decompiled_function_into_file(func: NonMatchingFunc, dec: str) -> InjectRes:
with open(func.src_path) as file:
lines = [line.rstrip() for line in file]
# this portion of code NEEDS to be resiliant; if there is an exception
# while writing the file content, the original source code where the
# function is supposed to be injected will be lost.
try:
# assume function matches
found = False
newcode = ""
for line in lines:
if line.startswith("INCLUDE_ASM(") and func.name in line:
newcode += dec
found = True
else:
newcode += line + "\n"
with open(func.src_path, "w") as file:
file.writelines(newcode)
if not found:
return InjectRes.NOT_INJECTED
result = check_injected_code()
if result == InjectRes.SUCCESS:
return result
newcode = ""
for line in lines:
if line.startswith("INCLUDE_ASM(") and func.name in line:
newcode += "#ifdef NON_MATCHING\n"
newcode += line + "\n"
newcode += "#else\n"
newcode += dec
newcode += "#endif\n"
else:
newcode += line + "\n"
with open(func.src_path, "w") as file:
file.writelines(newcode)
return result
except Exception as e:
with open(func.src_path, "w") as file:
for line in lines:
file.write(line)
file.write("\n")
raise e
parser = argparse.ArgumentParser(
description="automatically decompiles a function")
parser.add_argument("function", help="function name to decompile")
args = parser.parse_args()
if __name__ == "__main__":
funcs = get_nonmatching_functions(asm_dir, args.function)
if len(funcs) == 0:
print(f"function {args.function} not found or already decompiled")
func = funcs[0]
# print(f"func: {func.name}")
# print(f"overlay: {func.overlay_name}")
# print(f"text: {func.text_offset}")
# print(f"asm: {func.asm_path}")
# print(f"src: {func.src_path}")
ctx = get_c_context(func.src_path)
dec = decompile(func, ctx)
decres = guess_unknown_type(dec)
match inject_decompiled_function_into_file(func, decres):
case InjectRes.SUCCESS:
print(f"function '{func.name}' decompiled successfully!")
case InjectRes.NON_MATCHING:
print(f"function '{func.name}' decompiled but not matching")
case InjectRes.NOT_COMPILABLE:
print(f"function '{func.name}' decompiled but cannot be compiled")
case InjectRes.NOT_INJECTED:
print(f"function '{func.name}' might already be decompiled")
case _:
print("unhandled error!")

View File

@ -6,8 +6,18 @@ import sys
import subprocess
import tempfile
script_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.abspath(os.path.join(script_dir, ".."))
def get_root_dir():
def search_root_dir(base_dir):
for dir in os.listdir(base_dir):
if os.path.isdir(dir) and dir == "src":
return os.path.normpath(base_dir)
return search_root_dir(os.path.join(base_dir, ".."))
script_dir = os.path.dirname(os.path.realpath(__file__))
return search_root_dir(base_dir=script_dir)
root_dir = get_root_dir()
src_dir = root_dir + "src/"
# Project-specific
@ -24,24 +34,31 @@ CPP_FLAGS = [
"-DM2CTX",
]
def import_c_file(in_file) -> str:
in_file = os.path.relpath(in_file, root_dir)
def import_c_file(src_file) -> str:
in_file = src_file
if not src_file.startswith(root_dir):
in_file = os.path.relpath(src_file, root_dir)
cpp_command = ["gcc", "-E", "-P", "-dM", *CPP_FLAGS, in_file]
cpp_command2 = ["gcc", "-E", "-P", *CPP_FLAGS, in_file]
with tempfile.NamedTemporaryFile(suffix=".c") as tmp:
stock_macros = subprocess.check_output(["gcc", "-E", "-P", "-dM", tmp.name], cwd=root_dir, encoding="utf-8")
stock_macros = subprocess.check_output(
["gcc", "-E", "-P", "-dM", tmp.name], cwd=root_dir, encoding="utf-8")
out_text = ""
try:
out_text += subprocess.check_output(cpp_command, cwd=root_dir, encoding="utf-8")
out_text += subprocess.check_output(cpp_command2, cwd=root_dir, encoding="utf-8")
out_text += subprocess.check_output(cpp_command,
cwd=root_dir, encoding="utf-8")
out_text += subprocess.check_output(cpp_command2,
cwd=root_dir, encoding="utf-8")
except subprocess.CalledProcessError:
print(
"Failed to preprocess input file, when running command:\n"
+ ' '.join(cpp_command),
file=sys.stderr,
)
)
sys.exit(1)
if not out_text:
@ -52,6 +69,7 @@ def import_c_file(in_file) -> str:
out_text = out_text.replace(line + "\n", "")
return out_text
def main():
parser = argparse.ArgumentParser(
description="""Create a context file which can be used for mips_to_c"""