Add script to print block numbers for BSS ordering (#1707)

* Write script to print block numbers for BSS ordering

* Rename to ido_block_numbers.py

* Update ido_block_numbers.py for reencode.sh
This commit is contained in:
cadmic 2024-03-01 13:40:27 -08:00 committed by GitHub
parent bdee3d33b4
commit ca45c543f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

302
tools/ido_block_numbers.py Executable file
View File

@ -0,0 +1,302 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: © 2024 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
# IDO symbol table parser for BSS ordering debugging. The compiler will assign
# "block numbers" or "dense numbers" to symbols in order as it encounters them
# in the source file, and the BSS section is sorted by this block number mod 256.
# This script dumps the compiler-generated symbol table so you can see which
# block numbers are assigned to each symbol.
#
# Resources:
# https://hackmd.io/@Roman971/BJ2DOyhBa
# https://github.com/decompals/ultralib/blob/main/tools/mdebug.py
# https://www.cs.unibo.it/~solmi/teaching/arch_2002-2003/AssemblyLanguageProgDoc.pdf
# https://github.com/decompals/IDO/blob/main/IDO_7.1/dist/compiler_eoe/usr/include/sym.h
import argparse
import itertools
from pathlib import Path
import platform
import struct
import subprocess
import sys
class Header:
SIZE = 0x60
def __init__(self, data):
(
self.magic,
self.vstamp,
self.ilineMax,
self.cbLine,
self.cbLineOffset,
self.idnMax,
self.cbDnOffset,
self.ipdMax,
self.cbPdOffset,
self.isymMax,
self.cbSymOffset,
self.ioptMax,
self.cbOptOffset,
self.iauxMax,
self.cbAuxOffset,
self.issMax,
self.cbSsOffset,
self.issExtMax,
self.cbSsExtOffset,
self.ifdMax,
self.cbFdOffset,
self.crfd,
self.cbRfdOffset,
self.iextMax,
self.cbExtOffset,
) = struct.unpack(">2H23I", data)
class FileDescriptor:
SIZE = 0x48
def __init__(self, data):
(
self.adr,
self.rss,
self.issBase,
self.cbSs,
self.isymBase,
self.csym,
self.ilineBase,
self.cline,
self.ioptBase,
self.copt,
self.ipdFirst,
self.cpd,
self.iauxBase,
self.caux,
self.rfdBase,
self.crfd,
self.flags,
self.cbLineOffset,
self.cbLine,
) = struct.unpack(">10I2H7I", data)
class Symbol:
SIZE = 0xC
def __init__(self, data):
(
self.iss,
self.value,
self.flags,
) = struct.unpack(">3I", data)
def symbol_type(self):
symbol_types = {
0: "nil",
1: "global",
2: "static",
3: "param",
4: "local",
5: "label",
6: "proc",
7: "block",
8: "end",
9: "member",
10: "typedef",
11: "file",
14: "staticproc",
15: "constant",
26: "struct",
27: "union",
28: "enum",
34: "indirect",
}
return symbol_types[self.flags >> 26]
def symbol_storage_class(self):
symbol_storage_classes = {
0: "nil",
1: "text",
2: "data",
3: "bss",
4: "register",
5: "abs",
6: "undefined",
8: "bits",
9: "dbx",
10: "regimage",
11: "info",
}
return symbol_storage_classes[(self.flags >> 21) & 0x1F]
class ExternalSymbol:
SIZE = 0x10
def __init__(self, data):
(
self.flags,
self.ifd,
) = struct.unpack(">2H", data[0:4])
self.asym = Symbol(data[4:])
def read_entry(data, base, offset, size):
start = base + offset * size
return data[start : start + size]
def read_string(data, start):
size = 0
while data[start + size] != 0:
size += 1
return data[start : start + size].decode("ascii")
def print_symbol_table(data):
header = Header(data[0 : Header.SIZE])
print(f"block [mod 256]: linkage type class name")
# File descriptors
fds = []
for i in range(header.ifdMax):
fds.append(
FileDescriptor(read_entry(data, header.cbFdOffset, i, FileDescriptor.SIZE))
)
# Symbol identifiers ("dense numbers")
for i in range(header.idnMax):
ifd, isym = struct.unpack(">II", read_entry(data, header.cbDnOffset, i, 8))
if isym == 0xFFFFF:
# TODO: is this always a string?
extern = False
sym_name = ""
st = "string"
sc = ""
else:
extern = ifd == 0x7FFFFFFF
if extern:
ext = ExternalSymbol(
read_entry(data, header.cbExtOffset, isym, ExternalSymbol.SIZE)
)
sym = ext.asym
sym_name = read_string(data, header.cbSsExtOffset + sym.iss)
else:
fd = fds[ifd]
sym = Symbol(
read_entry(
data, header.cbSymOffset, fd.isymBase + isym, Symbol.SIZE
)
)
sym_name = read_string(data, header.cbSsOffset + fd.issBase + sym.iss)
st = sym.symbol_type()
sc = sym.symbol_storage_class()
print(
f'{i:>9} [{i%256:>3}]: {"extern" if extern else "":<7} {st:<10} {sc:<9} {sym_name:<40}'
)
def find_compiler_command_line(filename, oot_version):
is_macos = platform.system() == "Darwin"
make = "gmake" if is_macos else "make"
make_command_line = [
make,
"--always-make",
"--dry-run",
f"VERSION={oot_version}",
]
print(f"Running {make} to find compiler command line ...", file=sys.stderr)
make_output = (
subprocess.check_output(make_command_line).decode("utf-8").splitlines()
)
found = 0
for line in make_output:
parts = line.split()
if "-o" in parts and str(filename) in parts:
makefile_command_line = parts
found += 1
if found != 1:
print(
f"Could not determine compiler command line for {filename}", file=sys.stderr
)
sys.exit(1)
# Assume command line is of the form:
# tools/reencode.sh [COMPILER] [COMPILER_ARGS]
compiler_command_line = makefile_command_line[1:]
print(f'Command line: {" ".join(compiler_command_line)}', file=sys.stderr)
return compiler_command_line
def generate_symbol_table(command_line):
# Find source file in compiler arguments
source_file = None
args = []
for arg in command_line:
if arg.endswith(".c"):
source_file = Path(arg)
else:
args.append(arg)
if source_file is None:
raise Exception("No source file found")
source_contents = source_file.read_text()
stem = "reencode_tmp"
input_file = Path(f"{stem}.c")
symbol_table_file = Path(f"{stem}.T")
ucode_file = Path(f"{stem}.B")
try:
# Write temporary file with #line directive to simulate asm-processor
with open(input_file, "w") as f:
f.write('#line 1 "{}"\n'.format(source_file))
f.write(source_contents)
# Invoke compiler
# -Hf stops compilation after cfe so we can inspect the symbol table
subprocess.run(args + ["-Hf", input_file], check=True)
# Read symbol table
return symbol_table_file.read_bytes()
finally:
# Cleanup
input_file.unlink(missing_ok=True)
symbol_table_file.unlink(missing_ok=True)
ucode_file.unlink(missing_ok=True)
def main():
parser = argparse.ArgumentParser(
description="Dump IDO symbol table for debugging BSS ordering"
)
parser.add_argument("filename", metavar="FILE", type=Path, help="C source file")
parser.add_argument(
"-v",
"--oot-version",
type=str,
default="gc-eu-mq-dbg",
help="OOT version (default: gc-eu-mq-dbg)",
)
args = parser.parse_args()
command_line = find_compiler_command_line(args.filename, args.oot_version)
data = generate_symbol_table(command_line)
print_symbol_table(data)
if __name__ == "__main__":
main()