Allow make-config to cross-reference symbols from data and bss (#1650)
Some checks failed
Format code / format (push) Failing after 0s
Build C code / extract-assets (push) Failing after 0s
Build Saturn version / build-and-test-saturn (push) Has been skipped
Build C code / build-linux (push) Has been skipped
Build C code / build-macos (push) Has been skipped
Build C code / build-windows (push) Has been skipped
Build C code / build-linux-lle (push) Has been skipped
Build Saturn version / function-finder-saturn (push) Has been skipped
Build Debug Module tool / build (push) Failing after 0s
Build PSX and PSP version / build-and-test (pspeu, hd) (push) Has been skipped
Build PSX and PSP version / build-and-test (pspeu, pspeu) (push) Has been skipped
Build PSX and PSP version / build-and-test (us, us) (push) Has been skipped
Build PSX and PSP version / generate-progress-report (pspeu, hd) (push) Has been skipped
Build PSX and PSP version / generate-progress-report (pspeu, pspeu) (push) Has been skipped
Build PSX and PSP version / generate-progress-report (us, us) (push) Has been skipped
Build PSX and PSP version / generate-duplicates-report (us, us) (push) Has been skipped
Build PSX and PSP version / generate-duplicates-report-psp (pspeu, pspeu) (push) Has been skipped

Improve `make-config.py` to internally use the `symbols.py cross` and
cross-reference symbols from duplicate functions. This allows to add
even more symbols for new overlays with a third pass.

This was one of my biggest pain points when I was de-duplicating the
boss `MAR` overlay as I was not able to directly copy&paste duplicate
functions or include shared ones without having to individually fix the
symbols coming from `.data` and `.bss`. This is now automated.

```
$ python3 ./tools/make-config.py ric --version hd
✔ generating psx splat config
✔ splitting config/splat.hd.ric.yaml
✔ adjusting files at src/ric
✔ disassembling matched functions
✔ finding duplicates across overlays
✔ adding cross-referenced function names
✔ renamed 141 functions, splitting again
✔ cross-referencing 141 functions
✔ adding cross-referenced symbol names
✔ renamed 145 data/bss symbols, splitting again
```
This commit is contained in:
Luciano Ciccariello 2024-09-21 20:59:55 +01:00 committed by GitHub
parent f48d23d3b8
commit 148691da5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 159 additions and 32 deletions

View File

@ -440,21 +440,20 @@ force_extract:
rm -rf src/
mv src_tmp src
# Rewrites symbol list from a successful build
force_symbols:
$(PYTHON) ./tools/symbols.py map build/us/dra.map --no-default > config/symbols.us.dra.txt
$(PYTHON) ./tools/symbols.py map build/us/ric.map --no-default > config/symbols.us.ric.txt
$(PYTHON) ./tools/symbols.py map build/us/stcen.map --no-default > config/symbols.us.stcen.txt
$(PYTHON) ./tools/symbols.py map build/us/stdre.map --no-default > config/symbols.us.stdre.txt
$(PYTHON) ./tools/symbols.py map build/us/stno3.map --no-default > config/symbols.us.stno3.txt
$(PYTHON) ./tools/symbols.py map build/us/stnp3.map --no-default > config/symbols.us.stnp3.txt
$(PYTHON) ./tools/symbols.py map build/us/stnz0.map --no-default > config/symbols.us.stnz0.txt
$(PYTHON) ./tools/symbols.py map build/us/stsel.map --no-default > config/symbols.us.stsel.txt
$(PYTHON) ./tools/symbols.py map build/us/stst0.map --no-default > config/symbols.us.stst0.txt
$(PYTHON) ./tools/symbols.py map build/us/stwrp.map --no-default > config/symbols.us.stwrp.txt
$(PYTHON) ./tools/symbols.py map build/us/strwrp.map --no-default > config/symbols.us.strwrp.txt
$(PYTHON) ./tools/symbols.py map build/us/bomar.map --no-default > config/symbols.us.bomar.txt
$(PYTHON) ./tools/symbols.py map build/us/tt_000.map --no-default > config/symbols.us.tt_000.txt
force_symbols: ##@ Extract a full list of symbols from a successful build
$(PYTHON) ./tools/symbols.py elf build/us/dra.elf > config/symbols.us.dra.txt
$(PYTHON) ./tools/symbols.py elf build/us/ric.elf > config/symbols.us.ric.txt
$(PYTHON) ./tools/symbols.py elf build/us/stcen.elf > config/symbols.us.stcen.txt
$(PYTHON) ./tools/symbols.py elf build/us/stdre.elf > config/symbols.us.stdre.txt
$(PYTHON) ./tools/symbols.py elf build/us/stno3.elf > config/symbols.us.stno3.txt
$(PYTHON) ./tools/symbols.py elf build/us/stnp3.elf > config/symbols.us.stnp3.txt
$(PYTHON) ./tools/symbols.py elf build/us/stnz0.elf > config/symbols.us.stnz0.txt
$(PYTHON) ./tools/symbols.py elf build/us/stsel.elf > config/symbols.us.stsel.txt
$(PYTHON) ./tools/symbols.py elf build/us/stst0.elf > config/symbols.us.stst0.txt
$(PYTHON) ./tools/symbols.py elf build/us/stwrp.elf > config/symbols.us.stwrp.txt
$(PYTHON) ./tools/symbols.py elf build/us/strwrp.elf > config/symbols.us.strwrp.txt
$(PYTHON) ./tools/symbols.py elf build/us/bomar.elf > config/symbols.us.bomar.txt
$(PYTHON) ./tools/symbols.py elf build/us/tt_000.elf > config/symbols.us.tt_000.txt
context: ##@ create a context for decomp.me. Set the SOURCE variable prior to calling this target
$(M2CTX) $(SOURCE)

View File

@ -16,7 +16,7 @@ import sys
import threading
import time
from symbols import sort_symbols_from_file
from symbols import get_non_matching_symbols, sort_symbols_from_file
parser = argparse.ArgumentParser(
description="Make files inside config/ for a PSP overlay"
@ -321,6 +321,18 @@ def make_ovl_path(ovl_name: str, version: str) -> str:
omgpanic(f"'{ovl_name}' not recognized for '{version}' version")
def make_dst_path(ovl_name: str) -> str:
if is_weapon(ovl_name):
return f"weapon/{ovl_name}"
if is_servant(ovl_name):
return f"servant/{ovl_name}"
if is_boss(ovl_name):
return f"boss/{ovl_name}"
if is_stage(ovl_name):
return f"st/{ovl_name}"
return ovl_name
##### SPLAT CONFIG UTILITIES
@ -366,21 +378,12 @@ def get_splat_config(
ver: str,
name: str,
):
filename = input.split("/")[-1]
if is_weapon(name):
path_stuff = f"weapon/{name}"
file_stuff = name
elif is_servant(name):
path_stuff = f"servant/{name}"
file_stuff = name
elif is_boss(name):
path_stuff = f"boss/{name}"
path_stuff = make_dst_path(name)
if is_boss(name):
file_stuff = f"bo{name}"
elif is_stage(name):
path_stuff = f"st/{name}"
file_stuff = f"st{name}"
else:
path_stuff = name
file_stuff = name
platform = "psx"
@ -967,6 +970,59 @@ def hydrate_psx_duplicate_symbols(splat_config, ovl_name: str, version: str):
return found
def hydrate_psx_cross_ref_symbols(splat_config, ovl_name: str, version: str):
"""
leverage symbols.py cross <matching> <nonmatching> to find symbols in data and bss
"""
if version != "us":
# assume the equivalent overlay in the US version is already decompiled
right_matchings_path = f"asm/us/{make_dst_path(ovl_name)}/matchings"
elif is_stage(ovl_name) or is_boss(ovl_name):
# pick NZ0 as the most complete overlay to cross-reference symbols
right_matchings_path = f"asm/us/{make_dst_path('nz0')}/matchings"
else:
yowarning(
f"cannot find a similar overlay to {version}/{ovl_name} to cross-reference"
)
return
left_nonmatchings_path = os.path.join(get_asm_path(splat_config), "nonmatchings")
left_func_paths = list_all_files(left_nonmatchings_path)
left_func_path_set = {}
for func_path in left_func_paths:
file_name = os.path.basename(func_path)
if file_name.startswith("D_"):
continue
left_func_path_set[file_name] = func_path
# the functions to cross-reference need to exist on both the overlays to compare
func_paths_to_cross_reference = {}
for func_path in list_all_files(right_matchings_path):
if os.path.basename(func_path) in left_func_path_set:
func_paths_to_cross_reference[os.path.basename(func_path)] = func_path
spinner_start(f"cross-referencing {len(func_paths_to_cross_reference)} functions")
syms = dict()
for func_name in func_paths_to_cross_reference:
match_func_path = func_paths_to_cross_reference[func_name]
with open(match_func_path, "r") as asm_ref_file:
cross_func_path = left_func_path_set[func_name]
with open(cross_func_path, "r") as asm_cross_file:
err, new_syms = get_non_matching_symbols(
asm_ref_file.readlines(), asm_cross_file.readlines()
)
if err != "ok":
continue
for sym in new_syms:
syms[sym] = new_syms[sym]
spinner_start("adding cross-referenced symbol names")
for sym in syms:
add_symbol(splat_config, version, sym, syms[sym])
return len(syms)
def assert_sotn_decomp_cwd():
"""
Ensure the tool is running from the sotn-decomp root directory.
@ -1020,7 +1076,12 @@ def make_config(ovl_name: str, version: str):
else:
found = hydrate_psx_duplicate_symbols(splat_config, ovl_name, version)
if found > 0:
spinner_start(f"cross-referenced {found} symbols, splitting again")
spinner_start(f"renamed {found} functions, splitting again")
shutil.rmtree(get_asm_path(splat_config))
split(splat_config_path, False)
found = hydrate_psx_cross_ref_symbols(splat_config, ovl_name, version)
if found > 0:
spinner_start(f"renamed {found} data/bss symbols, splitting again")
shutil.rmtree(get_asm_path(splat_config))
split(splat_config_path, False)
spinner_stop(True) # done 🫡

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import argparse
import subprocess
import mapfile_parser
import os
from pathlib import Path
@ -54,6 +55,21 @@ map_parser.add_argument(
help="Do not include Splat default symbols that starts with D_ or func_",
)
elf_parser = subparsers.add_parser(
"elf",
description="Print the list of symbols from an elf file",
)
elf_parser.add_argument(
"elf_file_name",
help="The elf file to extract the symbols from",
)
elf_parser.add_argument(
"--no-default",
required=False,
action="store_true",
help="Do not include Splat default symbols that starts with D_ or func_",
)
def is_splat_symbol_name(name):
return (
@ -214,12 +230,12 @@ def get_non_matching_symbols(asm_ref, asm_cross):
return imm
return imm - 0x10000
syms = dict()
ref_line_count = len(asm_ref)
cross_line_count = len(asm_cross)
if ref_line_count != cross_line_count:
return "fail", []
return "fail", syms
syms = dict()
prev_instr_hi = False
cross_off = 0
for i in range(0, ref_line_count):
@ -232,9 +248,9 @@ def get_non_matching_symbols(asm_ref, asm_cross):
if tokens_ref == tokens_cross:
continue # if tokens are identical, skip and continue
if tokens_ref == None or tokens_cross == None:
return "fail", [] # token mis-match, functions are different
return "fail", syms # token mis-match, functions are different
if is_value_equal(tokens_ref, tokens_cross, "OP") == False:
return "fail", [] # if op code is not the same, functions are different
return "fail", syms # if op code is not the same, functions are different
if is_value_equal(tokens_ref, tokens_cross, "SYM") == True:
continue # if a symbol is found and it is the same then continue
if "SYM" not in tokens_ref:
@ -372,6 +388,55 @@ def print_map_symbols(map_file_name, no_default):
print(f"{syms[vram]} = 0x{vram:08X}; // allow_duplicated:True")
def get_elf_symbols(elf_file_name) -> dict:
with subprocess.Popen(
args=["nm", elf_file_name],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
env=dict(os.environ),
) as p:
stdout_raw, stderr_raw = p.communicate()
output = stdout_raw.decode("utf-8").splitlines()
symbols = dict()
for line in output:
off, kind, name = line.split(" ")
if name.startswith("LM"):
continue
if name.startswith("_") and name.endswith("_c"):
continue
if name.startswith("__pad"):
continue
if name.endswith("_END"):
continue
if name.endswith("_START"):
continue
if name.endswith("_VRAM"):
continue
if kind == "A":
continue
symbols[name] = int(off, base=16)
return symbols
def print_elf_symbols(elf_file_name, no_default):
with subprocess.Popen(
args=["nm", elf_file_name],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
env=dict(os.environ),
) as p:
stdout_raw, stderr_raw = p.communicate()
output = stdout_raw.decode("utf-8").splitlines()
symbols = get_elf_symbols(elf_file_name)
sorted_symbols = sorted(symbols.items(), key=lambda item: item[1])
for name, offset in sorted_symbols:
if no_default and (name.startswith("func_") or name.startswith("D_")):
continue
print(f"{name} = 0x{offset:08X}; // allow_duplicated:True")
if __name__ == "__main__":
args = parser.parse_args()
if args.version == None:
@ -386,3 +451,5 @@ if __name__ == "__main__":
remove_orphans_from_config(args.config_yaml)
elif args.command == "map":
print_map_symbols(args.map_file_name, args.no_default)
elif args.command == "elf":
print_elf_symbols(args.elf_file_name, args.no_default)