Automate fixing BSS ordering (#2009)

* Automate fixing BSS ordering

* Typo

* Some cleanups

* Move pragma check after printing BSS info

* Some proofreading

* multiprocessing, require version, some colors

* Tweak output

* Black + mypy

* Move logging and sys.exit out of helper functions

* Use stdout instead of stderr in fix_bss.py

* Add suggestion to conflicting offsets error

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>

* Remove var = list[T]()

* Improve error handling

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>

* Add error if no pointers to BSS

* Add comment about process_file_worker

* Only print updates if stdout is a tty

* Use new binary-search-esque candidate generation algorithm

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>

* Add Wikipedia link

* More comment tweaks

---------

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>
This commit is contained in:
cadmic 2024-08-02 17:31:15 -07:00 committed by GitHub
parent 078e21f6c6
commit 0da402b9de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 1071 additions and 352 deletions

View File

@ -378,7 +378,7 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD)
ifeq ($(PERMUTER),) # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case
# Handle encoding (UTF-8 -> EUC-JP) and custom pragmas
$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py $(CC)
$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py -v $(VERSION) -- $(CC)
endif
else

View File

@ -1,10 +1,7 @@
#include "global.h"
#include "terminal.h"
// For retail BSS ordering, the block number of sMainThread must be 0 or
// just above (the exact upper bound depends on the block numbers assigned to
// extern variables declared in headers).
#pragma increment_block_number 60
#pragma increment_block_number "gc-eu:64 gc-eu-mq:64"
OSThread sMainThread;
STACK(sMainStack, 0x900);

View File

@ -44,14 +44,7 @@
#include "terminal.h"
#include "alloca.h"
// For retail BSS ordering, the block number of sFaultInstance must be 0 or
// just above (the exact upper bound depends on the block numbers assigned to
// extern variables declared in headers).
#if OOT_DEBUG
#pragma increment_block_number 0
#else
#pragma increment_block_number 20
#endif
#pragma increment_block_number "gc-eu:64 gc-eu-mq:64 gc-eu-mq-dbg:0"
void FaultDrawer_Init(void);
void FaultDrawer_SetOsSyncPrintfEnabled(u32 enabled);

View File

@ -7,9 +7,7 @@ s32 gScreenWidth = SCREEN_WIDTH;
s32 gScreenHeight = SCREEN_HEIGHT;
u32 gSystemHeapSize = 0;
// For retail BSS ordering, the block number of gIrqMgr must be greater than the
// the block numbers assigned to extern variables above (declared in variables.h).
#pragma increment_block_number 220
#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"
PreNmiBuff* gAppNmiBufferPtr;
Scheduler gScheduler;

View File

@ -5,9 +5,7 @@
#include "macros.h"
#include "sys_math3d.h"
// For retail BSS ordering, the block number of cbf in Math3D_CylVsCylOverlapCenterDist
// must be 0.
#pragma increment_block_number 108
#pragma increment_block_number "gc-eu:108 gc-eu-mq:108"
s32 Math3D_LineVsLineClosestTwoPoints(Vec3f* lineAPointA, Vec3f* lineAPointB, Vec3f* lineBPointA, Vec3f* lineBPointB,
Vec3f* lineAClosestToB, Vec3f* lineBClosestToA);

View File

@ -8,8 +8,7 @@
#include "assets/objects/gameplay_dangeon_keep/gameplay_dangeon_keep.h"
#include "assets/objects/object_bdoor/object_bdoor.h"
// For retail BSS ordering, the block number of sCurCeilingPoly
// must be between 2 and 243 inclusive.
#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"
static CollisionPoly* sCurCeilingPoly;
static s32 sCurCeilingBgId;
@ -1906,7 +1905,7 @@ s32 func_8002F9EC(PlayState* play, Actor* actor, CollisionPoly* poly, s32 bgId,
return false;
}
#pragma increment_block_number 22
#pragma increment_block_number "gc-eu:22 gc-eu-mq:22"
// Local data used for Farore's Wind light (stored in BSS)
LightInfo D_8015BC00;

View File

@ -4,9 +4,7 @@
#include "terminal.h"
#include "overlays/actors/ovl_En_Horse/z_en_horse.h"
// For retail BSS ordering, the block number of D_8015BD7C
// must be between 88 and 123 inclusive.
#pragma increment_block_number 30
#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"
s16 Camera_RequestSettingImpl(Camera* camera, s16 requestedSetting, s16 flags);
s32 Camera_RequestModeImpl(Camera* camera, s16 requestedMode, u8 forceModeChange);
@ -3632,7 +3630,7 @@ s32 Camera_KeepOn3(Camera* camera) {
return 1;
}
#pragma increment_block_number 100
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
s32 Camera_KeepOn4(Camera* camera) {
static Vec3f D_8015BD50;

View File

@ -12,9 +12,7 @@ typedef s32 (*ColChkLineFunc)(PlayState*, CollisionCheckContext*, Collider*, Vec
#define SAC_ENABLE (1 << 0)
// For retail BSS ordering, the block number of sparkInit in CollisionCheck_BlueBlood
// must be between 183 and 255 inclusive.
#pragma increment_block_number 50
#pragma increment_block_number "gc-eu:64 gc-eu-mq:64"
#if OOT_DEBUG
/**
@ -2695,7 +2693,7 @@ typedef enum {
/* 2 */ MASSTYPE_NORMAL
} ColChkMassType;
#pragma increment_block_number 253
#pragma increment_block_number "gc-eu:252 gc-eu-mq:252"
/**
* Get mass type. Immovable colliders cannot be pushed, while heavy colliders can only be pushed by heavy and immovable

View File

@ -1,9 +1,6 @@
#include "global.h"
// For retail BSS ordering, the block number of D_8015FA88 must be 0 or
// just above (the exact upper bound depends on the block numbers assigned to
// extern variables declared in headers).
#pragma increment_block_number 60
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
ALIGNED(16) SaveContext gSaveContext;
u32 D_8015FA88;

View File

@ -120,9 +120,7 @@ u16 gCamAtSplinePointsAppliedFrame;
u16 gCamEyePointAppliedFrame;
u16 gCamAtPointAppliedFrame;
// For retail BSS ordering, the block number of sReturnToCamId must be greater
// than that of gCamAtPointAppliedFrame (declared in variables.h).
#pragma increment_block_number 180
#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"
// Cam ID to return to when a scripted cutscene is finished
s16 sReturnToCamId;

View File

@ -1,10 +1,7 @@
#include "global.h"
#include "terminal.h"
// For retail BSS ordering, the block number of sKaleidoScopeUpdateFunc must be 0 or
// just above (the exact upper bound depends on the block numbers assigned to
// extern variables declared in headers).
#pragma increment_block_number 60
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
void (*sKaleidoScopeUpdateFunc)(PlayState* play);
void (*sKaleidoScopeDrawFunc)(PlayState* play);

View File

@ -7,10 +7,7 @@
#include "assets/objects/gameplay_keep/gameplay_keep.h"
#include "assets/objects/gameplay_field_keep/gameplay_field_keep.h"
// For retail BSS ordering, the block number of sLensFlareUnused must be lower
// than the extern variables declared in the header (e.g. gLightningStrike)
// while the block number of sNGameOverLightNode must be higher.
#pragma increment_block_number 80
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
typedef enum {
/* 0x00 */ LIGHTNING_BOLT_START,
@ -215,10 +212,7 @@ s16 sLightningFlashAlpha;
s16 sSunDepthTestX;
s16 sSunDepthTestY;
// These variables could be moved farther down in the file to reduce the amount
// of block number padding here, but currently this causes BSS ordering issues
// for debug.
#pragma increment_block_number 217
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
LightNode* sNGameOverLightNode;
LightInfo sNGameOverLightInfo;

View File

@ -100,21 +100,19 @@ static ColliderCylinderInit sLightBallCylinderInit = {
static u8 D_808E4C58[] = { 0, 12, 10, 12, 14, 16, 12, 14, 16, 12, 14, 16, 12, 14, 16, 10, 16, 14 };
static Vec3f sZeroVec = { 0.0f, 0.0f, 0.0f };
// For retail BSS ordering, the block number of sGanondorf must be 0 or just above.
// TODO: There's probably a way to do this with less padding by spreading the variables out and moving
// data around. It would be easier if we had more options for controlling BSS ordering in debug.
#pragma increment_block_number 50
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
static EnGanonMant* sCape;
#pragma increment_block_number 200
// TODO: There's probably a way to match BSS ordering with less padding by spreading the variables out and moving
// data around. It would be easier if we had more options for controlling BSS ordering in debug.
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
static s32 sSeed1;
static s32 sSeed2;
static s32 sSeed3;
#pragma increment_block_number 200
#pragma increment_block_number "gc-eu:192 gc-eu-mq:192"
static BossGanon* sGanondorf;

View File

@ -53,7 +53,7 @@ ActorProfile En_Wonder_Item_Profile = {
/**/ NULL,
};
#pragma increment_block_number 1
#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"
static Vec3f sTagPointsFree[9];
static Vec3f sTagPointsOrdered[9];

View File

@ -14,8 +14,7 @@
#include "assets/scenes/dungeons/ice_doukutu/ice_doukutu_scene.h"
#include "terminal.h"
// For retail BSS ordering, the block number of sSfxPos
// must be between 0 and 213 inclusive.
#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"
#define FLAGS ACTOR_FLAG_4
@ -1396,7 +1395,7 @@ void func_80B3F3D8(void) {
Sfx_PlaySfxCentered2(NA_SE_PL_SKIP);
}
#pragma increment_block_number 20
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
void EnXc_PlayDiveSFX(Vec3f* src, PlayState* play) {
static Vec3f D_80B42DA0;

View File

@ -11,8 +11,7 @@
#include "ichain.h"
#include "terminal.h"
// For retail BSS ordering, the block number of sStreamSfxProjectedPos must be 0.
#pragma increment_block_number 206
#pragma increment_block_number "gc-eu:206 gc-eu-mq:206"
#define FLAGS ACTOR_FLAG_4

View File

@ -354,22 +354,19 @@ void Player_Action_CsAction(Player* this, PlayState* play);
// .bss part 1
// For retail BSS ordering, the block number of sDogSpawnPos in Player_Update
// must be between 0 and 53 inclusive.
// TODO: There's probably a way to do this with less padding by spreading the variables out and moving
// data around. It would be easier if we had more options for controlling BSS ordering in debug.
#pragma increment_block_number 30
#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"
static s32 D_80858AA0;
#pragma increment_block_number 250
// TODO: There's probably a way to match BSS ordering with less padding by spreading the variables out and moving
// data around. It would be easier if we had more options for controlling BSS ordering in debug.
#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"
static s32 D_80858AA4;
static Vec3f sInteractWallCheckResult;
static Input* sControlInput;
#pragma increment_block_number 50
#pragma increment_block_number "gc-eu:192 gc-eu-mq:192"
// .data

View File

@ -1,234 +0,0 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2024 zeldaret
# SPDX-License-Identifier: CC0-1.0
from __future__ import annotations
import argparse
import dataclasses
import enum
from pathlib import Path
import sys
from typing import BinaryIO
import elftools.elf.elffile
import mapfile_parser.mapfile
@dataclasses.dataclass
class Reloc:
name: str
offset_32: int | None
offset_hi16: int | None
offset_lo16: int | None
addend: int
@dataclasses.dataclass
class Pointer:
name: str
addend: int
base_value: int
build_value: int
def read_relocs(object_path: Path, section_name: str) -> list[Reloc]:
with open(object_path, "rb") as f:
elffile = elftools.elf.elffile.ELFFile(f)
symtab = elffile.get_section_by_name(".symtab")
data = elffile.get_section_by_name(section_name).data()
reloc_section = elffile.get_section_by_name(f".rel{section_name}")
if reloc_section is None:
return []
relocs = []
offset_hi16 = 0
for reloc in reloc_section.iter_relocations():
reloc_offset = reloc.entry["r_offset"]
reloc_type = reloc.entry["r_info_type"]
reloc_name = symtab.get_symbol(reloc.entry["r_info_sym"]).name
if reloc_type == 2: # R_MIPS_32
offset_32 = reloc_offset
addend = int.from_bytes(
data[reloc_offset : reloc_offset + 4], "big", signed=True
)
relocs.append(Reloc(reloc_name, offset_32, None, None, addend))
elif reloc_type == 4: # R_MIPS_26
pass
elif reloc_type == 5: # R_MIPS_HI16
offset_hi16 = reloc_offset
elif reloc_type == 6: # R_MIPS_LO16
offset_lo16 = reloc_offset
addend_hi16 = int.from_bytes(
data[offset_hi16 + 2 : offset_hi16 + 4], "big", signed=False
)
addend_lo16 = int.from_bytes(
data[offset_lo16 + 2 : offset_lo16 + 4], "big", signed=True
)
addend = (addend_hi16 << 16) + addend_lo16
relocs.append(Reloc(reloc_name, None, offset_hi16, offset_lo16, addend))
else:
raise NotImplementedError(f"Unsupported relocation type: {reloc_type}")
return relocs
def read_u32(f: BinaryIO, offset: int) -> int:
f.seek(offset)
return int.from_bytes(f.read(4), "big")
def read_u16(f: BinaryIO, offset: int) -> int:
f.seek(offset)
return int.from_bytes(f.read(2), "big")
def read_s16(f: BinaryIO, offset: int) -> int:
f.seek(offset)
return int.from_bytes(f.read(2), "big", signed=True)
def main():
parser = argparse.ArgumentParser(
description="Report bss reorderings between the baserom and the current build "
"by parsing relocations from the built object files and comparing their final values "
"between the baserom and the current build. "
"Assumes that the only differences are due to ordering and that the text sections of the "
"ROMS are not shifted."
)
parser.add_argument(
"--oot-version",
"-v",
type=str,
default="gc-eu-mq-dbg",
help="OOT version (default: gc-eu-mq-dbg)",
)
parser.add_argument(
"--segment",
type=str,
help="ROM segment to check, e.g. 'boot', 'code', or 'ovl_player_actor' (default: all)",
)
parser.add_argument(
"--all-sections",
action="store_true",
help="Check ordering for all section types, not just .bss",
)
args = parser.parse_args()
version = args.oot_version
mapfile = mapfile_parser.mapfile.MapFile()
mapfile.readMapFile(f"build/{version}/oot-{version}.map")
# Segments built from source code (filtering out assets)
source_code_segments = []
for mapfile_segment in mapfile:
if (
args.segment
and mapfile_segment.name != f"..{args.segment}"
and mapfile_segment.name != f"..{args.segment}.bss"
):
continue
if not (
mapfile_segment.name.startswith("..boot")
or mapfile_segment.name.startswith("..code")
or mapfile_segment.name.startswith("..buffers")
or mapfile_segment.name.startswith("..ovl_")
):
continue
source_code_segments.append(mapfile_segment)
base = open(f"baseroms/{version}/baserom-decompressed.z64", "rb")
build = open(f"build/{version}/oot-{version}.z64", "rb")
# Find all pointers with different values
pointers = []
for mapfile_segment in source_code_segments:
for file in mapfile_segment:
if not str(file.filepath).endswith(".o"):
continue
if file.sectionType == ".bss":
continue
for reloc in read_relocs(file.filepath, file.sectionType):
if reloc.offset_32 is not None:
base_value = read_u32(base, file.vrom + reloc.offset_32)
build_value = read_u32(build, file.vrom + reloc.offset_32)
elif reloc.offset_hi16 is not None and reloc.offset_lo16 is not None:
if (
read_u16(base, file.vrom + reloc.offset_hi16)
!= read_u16(build, file.vrom + reloc.offset_hi16)
) or (
read_u16(base, file.vrom + reloc.offset_lo16)
!= read_u16(build, file.vrom + reloc.offset_lo16)
):
print(
f"Error: Reference to {reloc.name} in {file.filepath} is in a shifted (or non-matching even ignoring relocs) portion of the ROM.\n"
"Please ensure that the only differences between the baserom and the current build are due to data ordering.",
file=sys.stderr,
)
sys.exit(1)
base_value = (
read_u16(base, file.vrom + reloc.offset_hi16 + 2) << 16
) + read_s16(base, file.vrom + reloc.offset_lo16 + 2)
build_value = (
read_u16(build, file.vrom + reloc.offset_hi16 + 2) << 16
) + read_s16(build, file.vrom + reloc.offset_lo16 + 2)
else:
assert False, "Invalid relocation"
pointers.append(
Pointer(reloc.name, reloc.addend, base_value, build_value)
)
# Remove duplicates and sort by baserom address
pointers = list({p.base_value: p for p in pointers}.values())
pointers.sort(key=lambda p: p.base_value)
# Go through sections and report differences
for mapfile_segment in source_code_segments:
for file in mapfile_segment:
if not args.all_sections and not file.sectionType == ".bss":
continue
pointers_in_section = [
p
for p in pointers
if file.vram <= p.build_value < file.vram + file.size
]
if not pointers_in_section:
continue
# Try to detect if the section is shifted by comparing the lowest
# address among any pointer into the section between base and build
base_min_address = min(p.base_value for p in pointers_in_section)
build_min_address = min(p.build_value for p in pointers_in_section)
section_shift = build_min_address - base_min_address
if all(
p.build_value == p.base_value + section_shift
for p in pointers_in_section
):
continue
print(f"{file.filepath} {file.sectionType} is reordered:")
for i, p in enumerate(pointers_in_section):
if p.addend > 0:
addend_str = f"+0x{p.addend:X}"
elif p.addend < 0:
addend_str = f"-0x{-p.addend:X}"
else:
addend_str = ""
if i > 0 and p.build_value < pointers_in_section[i - 1].build_value:
print(" --------------------") # BSS wraps around
print(
f" {p.base_value:08X} -> {p.build_value:08X} {p.name}{addend_str}"
)
if __name__ == "__main__":
main()

705
tools/fix_bss.py Executable file
View File

@ -0,0 +1,705 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2024 zeldaret
# SPDX-License-Identifier: CC0-1.0
from __future__ import annotations
import argparse
from collections import Counter
import colorama
from dataclasses import dataclass
import io
import itertools
import multiprocessing
import multiprocessing.pool
from pathlib import Path
import re
import shlex
import sys
import time
from typing import BinaryIO, Iterator
from ido_block_numbers import (
generate_make_log,
find_compiler_command_line,
run_cfe,
SymbolTableEntry,
UcodeOp,
)
import elftools.elf.elffile
import mapfile_parser.mapfile
def read_u32(f: BinaryIO, offset: int) -> int:
f.seek(offset)
return int.from_bytes(f.read(4), "big")
def read_u16(f: BinaryIO, offset: int) -> int:
f.seek(offset)
return int.from_bytes(f.read(2), "big")
def read_s16(f: BinaryIO, offset: int) -> int:
f.seek(offset)
return int.from_bytes(f.read(2), "big", signed=True)
class FixBssException(Exception):
pass
@dataclass
class Reloc:
name: str
offset_32: int | None
offset_hi16: int | None
offset_lo16: int | None
addend: int
@dataclass
class Pointer:
name: str
addend: int
base_value: int
build_value: int
# Read relocations from an ELF file section
def read_relocs(object_path: Path, section_name: str) -> list[Reloc]:
with open(object_path, "rb") as f:
elffile = elftools.elf.elffile.ELFFile(f)
symtab = elffile.get_section_by_name(".symtab")
data = elffile.get_section_by_name(section_name).data()
reloc_section = elffile.get_section_by_name(f".rel{section_name}")
if reloc_section is None:
return []
relocs = []
offset_hi16 = 0
for reloc in reloc_section.iter_relocations():
reloc_offset = reloc.entry["r_offset"]
reloc_type = reloc.entry["r_info_type"]
reloc_name = symtab.get_symbol(reloc.entry["r_info_sym"]).name
if reloc_type == 2: # R_MIPS_32
offset_32 = reloc_offset
addend = int.from_bytes(
data[reloc_offset : reloc_offset + 4], "big", signed=True
)
relocs.append(Reloc(reloc_name, offset_32, None, None, addend))
elif reloc_type == 4: # R_MIPS_26
pass
elif reloc_type == 5: # R_MIPS_HI16
offset_hi16 = reloc_offset
elif reloc_type == 6: # R_MIPS_LO16
offset_lo16 = reloc_offset
addend_hi16 = int.from_bytes(
data[offset_hi16 + 2 : offset_hi16 + 4], "big", signed=False
)
addend_lo16 = int.from_bytes(
data[offset_lo16 + 2 : offset_lo16 + 4], "big", signed=True
)
addend = (addend_hi16 << 16) + addend_lo16
relocs.append(Reloc(reloc_name, None, offset_hi16, offset_lo16, addend))
else:
raise NotImplementedError(f"Unsupported relocation type: {reloc_type}")
return relocs
def get_file_pointers(
file: mapfile_parser.mapfile.File,
base: BinaryIO,
build: BinaryIO,
) -> list[Pointer]:
pointers = []
# TODO: open each ELF file only once instead of once per section?
for reloc in read_relocs(file.filepath, file.sectionType):
if reloc.offset_32 is not None:
base_value = read_u32(base, file.vrom + reloc.offset_32)
build_value = read_u32(build, file.vrom + reloc.offset_32)
elif reloc.offset_hi16 is not None and reloc.offset_lo16 is not None:
if (
read_u16(base, file.vrom + reloc.offset_hi16)
!= read_u16(build, file.vrom + reloc.offset_hi16)
) or (
read_u16(base, file.vrom + reloc.offset_lo16)
!= read_u16(build, file.vrom + reloc.offset_lo16)
):
raise FixBssException(
f"Reference to {reloc.name} in {file.filepath} is in a shifted or non-matching portion of the ROM.\n"
"Please ensure that the only differences between the baserom and the current build are due to BSS ordering."
)
base_value = (
read_u16(base, file.vrom + reloc.offset_hi16 + 2) << 16
) + read_s16(base, file.vrom + reloc.offset_lo16 + 2)
build_value = (
read_u16(build, file.vrom + reloc.offset_hi16 + 2) << 16
) + read_s16(build, file.vrom + reloc.offset_lo16 + 2)
else:
assert False, "Invalid relocation"
pointers.append(Pointer(reloc.name, reloc.addend, base_value, build_value))
return pointers
base = None
build = None
def get_file_pointers_worker_init(version: str):
global base
global build
base = open(f"baseroms/{version}/baserom-decompressed.z64", "rb")
build = open(f"build/{version}/oot-{version}.z64", "rb")
def get_file_pointers_worker(file: mapfile_parser.mapfile.File) -> list[Pointer]:
assert base is not None
assert build is not None
return get_file_pointers(file, base, build)
# Compare pointers between the baserom and the current build, returning a dictionary from
# C files to a list of pointers into their BSS sections
def compare_pointers(version: str) -> dict[Path, list[Pointer]]:
mapfile_path = Path(f"build/{version}/oot-{version}.map")
if not mapfile_path.exists():
raise FixBssException(f"Could not open {mapfile_path}")
mapfile = mapfile_parser.mapfile.MapFile()
mapfile.readMapFile(mapfile_path)
# Segments built from source code (filtering out assets)
source_code_segments = []
for mapfile_segment in mapfile:
if not (
mapfile_segment.name.startswith("..boot")
or mapfile_segment.name.startswith("..code")
or mapfile_segment.name.startswith("..buffers")
or mapfile_segment.name.startswith("..ovl_")
):
continue
source_code_segments.append(mapfile_segment)
# Find all pointers with different values
if not sys.stdout.isatty():
print(f"Comparing pointers between baserom and build ...")
pointers = []
file_results = []
with multiprocessing.Pool(
initializer=get_file_pointers_worker_init,
initargs=(version,),
) as p:
for mapfile_segment in source_code_segments:
for file in mapfile_segment:
if not str(file.filepath).endswith(".o"):
continue
if file.sectionType == ".bss":
continue
file_result = p.apply_async(get_file_pointers_worker, (file,))
file_results.append(file_result)
# Report progress and wait until all files are done
num_files = len(file_results)
while True:
time.sleep(0.010)
num_files_done = sum(file_result.ready() for file_result in file_results)
if sys.stdout.isatty():
print(
f"Comparing pointers between baserom and build ... {num_files_done:>{len(f'{num_files}')}}/{num_files}",
end="\r",
)
if num_files_done == num_files:
break
if sys.stdout.isatty():
print("")
# Collect results and check for errors
for file_result in file_results:
try:
pointers.extend(file_result.get())
except FixBssException as e:
print(f"{colorama.Fore.RED}Error: {str(e)}{colorama.Fore.RESET}")
sys.exit(1)
# Remove duplicates and sort by baserom address
pointers = list({p.base_value: p for p in pointers}.values())
pointers.sort(key=lambda p: p.base_value)
# Go through sections and collect differences
pointers_by_file = {}
for mapfile_segment in source_code_segments:
for file in mapfile_segment:
if not file.sectionType == ".bss":
continue
pointers_in_section = [
p
for p in pointers
if file.vram <= p.build_value < file.vram + file.size
]
if not pointers_in_section:
continue
c_file = file.filepath.relative_to(f"build/{version}").with_suffix(".c")
pointers_by_file[c_file] = pointers_in_section
return pointers_by_file
@dataclass
class Pragma:
line_number: int
block_number: int
amount: int
# A BSS variable in the source code
@dataclass
class BssVariable:
block_number: int
name: str
size: int
align: int
# A BSS variable with its offset in the compiled .bss section
@dataclass
class BssSymbol:
name: str
offset: int
size: int
align: int
INCREMENT_BLOCK_NUMBER_RE = re.compile(r"increment_block_number_(\d+)_(\d+)")
# Find increment_block_number pragmas by parsing the symbol names generated by preprocess.py.
# This is pretty ugly but it seems more reliable than trying to determine the line numbers of
# BSS variables in the C file.
def find_pragmas(symbol_table: list[SymbolTableEntry]) -> list[Pragma]:
# Keep track of first block number and count for each line number
first_block_number = {}
amounts: Counter[int] = Counter()
for block_number, entry in enumerate(symbol_table):
if match := INCREMENT_BLOCK_NUMBER_RE.match(entry.name):
line_number = int(match.group(1))
if line_number not in first_block_number:
first_block_number[line_number] = block_number
amounts[line_number] += 1
pragmas = []
for line_number, block_number in sorted(first_block_number.items()):
pragmas.append(Pragma(line_number, block_number, amounts[line_number]))
return pragmas
# Find all BSS variables from IDO's symbol table and U-Code output.
def find_bss_variables(
symbol_table: list[SymbolTableEntry], ucode: list[UcodeOp]
) -> list[BssVariable]:
bss_variables = []
init_block_numbers = set(op.i1 for op in ucode if op.opcode_name == "init")
last_function_name = None
for op in ucode:
# gsym: file-level global symbol
# lsym: file-level static symbol
# fsym: function-level static symbol
if op.opcode_name in ("gsym", "lsym", "fsym"):
block_number = op.i1
if block_number in init_block_numbers:
continue # not BSS
name = symbol_table[block_number].name
if op.opcode_name == "fsym":
name = f"{last_function_name}::{name}"
size = op.args[0]
align = 1 << op.lexlev
# TODO: IDO seems to automatically align anything with size 8 or more to
# an 8-byte boundary in BSS. Is this correct?
if size >= 8:
align = 8
bss_variables.append(BssVariable(block_number, name, size, align))
elif op.opcode_name == "ent":
last_function_name = symbol_table[op.i1].name
bss_variables.sort(key=lambda var: var.block_number)
return bss_variables
# Predict offsets of BSS variables in the build.
def predict_bss_ordering(variables: list[BssVariable]) -> list[BssSymbol]:
bss_symbols = []
offset = 0
# Sort by block number mod 256 (for ties, the original order is preserved)
for var in sorted(variables, key=lambda var: var.block_number % 256):
size = var.size
align = var.align
offset = (offset + align - 1) & ~(align - 1)
bss_symbols.append(BssSymbol(var.name, offset, size, align))
offset += size
return bss_symbols
# Match up BSS variables between the baserom and the build using the pointers from relocations.
# Note that we may not be able to match all variables if a variable is not referenced by any pointer.
def determine_base_bss_ordering(
build_bss_symbols: list[BssSymbol], pointers: list[Pointer]
) -> list[BssSymbol]:
# Assume that the lowest address is the start of the BSS section
base_section_start = min(p.base_value for p in pointers)
build_section_start = min(p.build_value for p in pointers)
found_symbols: dict[str, BssSymbol] = {}
for p in pointers:
base_offset = p.base_value - base_section_start
build_offset = p.build_value - build_section_start
new_symbol = None
new_offset = 0
for symbol in build_bss_symbols:
if (
symbol.offset <= build_offset
and build_offset < symbol.offset + symbol.size
):
new_symbol = symbol
new_offset = base_offset - (build_offset - symbol.offset)
break
if new_symbol is None:
if p.addend > 0:
addend_str = f"+0x{p.addend:X}"
elif p.addend < 0:
addend_str = f"-0x{-p.addend:X}"
else:
addend_str = ""
raise FixBssException(
f"Could not find BSS symbol for pointer {p.name}{addend_str} "
f"(base address 0x{p.base_value:08X}, build address 0x{p.build_value:08X})"
)
if new_symbol.name in found_symbols:
# Sanity check that offsets agree
existing_offset = found_symbols[new_symbol.name].offset
if new_offset != existing_offset:
raise FixBssException(
f"BSS symbol {new_symbol.name} found at conflicting offsets in this baserom "
f"(0x{existing_offset:04X} and 0x{new_offset:04X}). Is the build up-to-date?"
)
else:
found_symbols[new_symbol.name] = BssSymbol(
new_symbol.name, new_offset, new_symbol.size, new_symbol.align
)
return list(sorted(found_symbols.values(), key=lambda symbol: symbol.offset))
# Generate a sequence of integers in the range [0, 256) with a 2-adic valuation of exactly `nu`.
# The 2-adic valuation of an integer n is the largest k such that 2^k divides n
# (see https://en.wikipedia.org/wiki/P-adic_valuation), and for convenience we define
# the 2-adic valuation of 0 to be 8. Here's what the sequences look like for nu = 0..8:
# 8: 0
# 7: 128
# 6: 64, 192
# 5: 32, 96, 160, 224
# 4: 16, 48, 80, 112, ...
# 3: 8, 24, 40, 56, ...
# 2: 4, 12, 20, 28, ...
# 1: 2, 6, 10, 14, ...
# 0: 1, 3, 5, 7, ...
def gen_seq(nu: int) -> Iterator[int]:
if nu == 8:
yield 0
else:
for i in range(1 << (7 - nu)):
yield (2 * i + 1) * (1 << nu)
# Yields all n-tuples of integers in the range [0, 256) with minimum 2-adic valuation
# of exactly `min_nu`.
def gen_candidates_impl(n: int, min_nu: int) -> Iterator[tuple[int, ...]]:
if n == 1:
for n in gen_seq(min_nu):
yield (n,)
else:
# (a, *b) has min 2-adic valuation = min_nu if and only if either:
# a has 2-adic valuation > min_nu and b has min 2-adic valuation == min_nu
# a has 2-adic valuation == min_nu and b has min 2-adic valuation >= min_nu
for min_nu_a in reversed(range(min_nu + 1, 9)):
for a in gen_seq(min_nu_a):
for b in gen_candidates_impl(n - 1, min_nu):
yield (a, *b)
for a in gen_seq(min_nu):
for min_nu_b in reversed(range(min_nu, 9)):
for b in gen_candidates_impl(n - 1, min_nu_b):
yield (a, *b)
# Yields all n-tuples of integers in the range [0, 256), ordered by descending minimum
# 2-adic valuation of the elements in the tuple. For example, for n = 2 the sequence is:
# (0, 0), (0, 128), (128, 0), (128, 128), (0, 64), (0, 192), (128, 64), (128, 192), ...
def gen_candidates(n: int) -> Iterator[tuple[int, ...]]:
for nu in reversed(range(9)):
yield from gen_candidates_impl(n, nu)
# Determine a new set of increment_block_number pragmas that will fix the BSS ordering.
def solve_bss_ordering(
pragmas: list[Pragma],
bss_variables: list[BssVariable],
base_bss_symbols: list[BssSymbol],
) -> list[Pragma]:
base_symbols_by_name = {symbol.name: symbol for symbol in base_bss_symbols}
# Our "algorithm" just tries all possible combinations of increment_block_number amounts,
# which can get very slow with more than a few pragmas. But, we order the candidates in a
# binary-search-esque way to try to find a solution faster.
for new_amounts in gen_candidates(len(pragmas)):
# Generate new block numbers
new_bss_variables = []
for var in bss_variables:
new_block_number = var.block_number
for pragma, new_amount in zip(pragmas, new_amounts):
if var.block_number >= pragma.block_number:
new_block_number += new_amount - pragma.amount
new_bss_variables.append(
BssVariable(new_block_number, var.name, var.size, var.align)
)
# Predict new BSS and check if new ordering matches
new_bss_symbols = predict_bss_ordering(new_bss_variables)
bss_ordering_matches = True
for symbol in new_bss_symbols:
base_symbol = base_symbols_by_name.get(symbol.name)
if base_symbol is None:
continue
if symbol.offset != base_symbol.offset:
bss_ordering_matches = False
break
if bss_ordering_matches:
new_pragmas = []
for pragma, new_amount in zip(pragmas, new_amounts):
new_pragmas.append(
Pragma(pragma.line_number, pragma.block_number, new_amount)
)
return new_pragmas
raise FixBssException("Could not find any solutions")
def update_source_file(version_to_update: str, file: Path, new_pragmas: list[Pragma]):
with open(file, "r", encoding="utf-8") as f:
lines = f.readlines()
for pragma in new_pragmas:
line = lines[pragma.line_number - 1]
if not line.startswith("#pragma increment_block_number "):
raise FixBssException(
f"Expected #pragma increment_block_number on line {pragma.line_number}"
)
# Grab pragma argument and remove quotes
arg = line.strip()[len("#pragma increment_block_number ") + 1 : -1]
amounts_by_version = {}
for part in arg.split():
version, amount_str = part.split(":")
amounts_by_version[version] = int(amount_str)
amounts_by_version[version_to_update] = pragma.amount
new_arg = " ".join(
f"{version}:{amount}" for version, amount in amounts_by_version.items()
)
new_line = f'#pragma increment_block_number "{new_arg}"\n'
lines[pragma.line_number - 1] = new_line
with open(file, "w", encoding="utf-8") as f:
f.writelines(lines)
def process_file(
file: Path,
pointers: list[Pointer],
make_log: list[str],
dry_run: bool,
version: str,
):
print(f"{colorama.Fore.CYAN}Processing {file} ...{colorama.Fore.RESET}")
command_line = find_compiler_command_line(make_log, file)
if command_line is None:
raise FixBssException(f"Could not determine compiler command line for {file}")
print(f"Compiler command: {shlex.join(command_line)}")
symbol_table, ucode = run_cfe(command_line, keep_files=False)
bss_variables = find_bss_variables(symbol_table, ucode)
print("BSS variables:")
for var in bss_variables:
i = var.block_number
print(
f" {i:>6} [{i%256:>3}]: size=0x{var.size:04X} align=0x{var.align:X} {var.name}"
)
build_bss_symbols = predict_bss_ordering(bss_variables)
print("Current build BSS ordering:")
for symbol in build_bss_symbols:
print(
f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}"
)
if not pointers:
raise FixBssException(f"No pointers to BSS found in ROM for {file}")
base_bss_symbols = determine_base_bss_ordering(build_bss_symbols, pointers)
print("Baserom BSS ordering:")
for symbol in base_bss_symbols:
print(
f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}"
)
pragmas = find_pragmas(symbol_table)
max_pragmas = 3
if not pragmas:
raise FixBssException(f"No increment_block_number pragmas found in {file}")
elif len(pragmas) > max_pragmas:
raise FixBssException(
f"Too many increment_block_number pragmas found in {file} (found {len(pragmas)}, max {max_pragmas})"
)
print("Solving BSS ordering ...")
new_pragmas = solve_bss_ordering(pragmas, bss_variables, base_bss_symbols)
print("New increment_block_number amounts:")
for pragma in new_pragmas:
print(f" line {pragma.line_number}: {pragma.amount}")
if not dry_run:
update_source_file(version, file, new_pragmas)
print(f"{colorama.Fore.GREEN}Updated {file}{colorama.Fore.RESET}")
def process_file_worker(*x):
# Collect output in a buffer to avoid interleaving output when processing multiple files
old_stdout = sys.stdout
fake_stdout = io.StringIO()
try:
sys.stdout = fake_stdout
process_file(*x)
except Exception as e:
print(f"{colorama.Fore.RED}Error: {str(e)}{colorama.Fore.RESET}")
raise
finally:
sys.stdout = old_stdout
print()
print(fake_stdout.getvalue(), end="")
def main():
parser = argparse.ArgumentParser(
description="Automatically fix BSS ordering by editing increment_block_number pragmas. "
"Assumes that the build is up-to-date and that only differences between the baserom and "
"the current build are due to BSS ordering."
)
parser.add_argument(
"--oot-version",
"-v",
type=str,
required=True,
help="OOT version",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Print changes instead of editing source files",
)
parser.add_argument(
"files",
metavar="FILE",
nargs="*",
type=Path,
help="Fix BSS ordering for a particular C file (default: all files with BSS differences)",
)
args = parser.parse_args()
version = args.oot_version
pointers_by_file = compare_pointers(version)
files_with_reordering = []
for file, pointers in pointers_by_file.items():
# Try to detect if the section is shifted by comparing the lowest
# address among any pointer into the section between base and build
base_min_address = min(p.base_value for p in pointers)
build_min_address = min(p.build_value for p in pointers)
if not all(
p.build_value - build_min_address == p.base_value - base_min_address
for p in pointers
):
files_with_reordering.append(file)
if files_with_reordering:
print("Files with BSS reordering:")
for file in files_with_reordering:
print(f" {file}")
else:
print("No BSS reordering found.")
if args.files:
files_to_fix = args.files
else:
files_to_fix = files_with_reordering
if not files_to_fix:
return
print(f"Running make to find compiler command line ...")
make_log = generate_make_log(version)
with multiprocessing.Pool() as p:
file_results = []
for file in files_to_fix:
file_result = p.apply_async(
process_file_worker,
(
file,
pointers_by_file.get(file, []),
make_log,
args.dry_run,
version,
),
)
file_results.append(file_result)
# Wait until all files are done
while not all(file_result.ready() for file_result in file_results):
time.sleep(0.010)
# Collect results and check for errors
num_successes = sum(file_result.successful() for file_result in file_results)
if num_successes == len(file_results):
print()
print(f"Updated {num_successes}/{len(file_results)} files.")
else:
print()
print(
f"{colorama.Fore.RED}Updated {num_successes}/{len(file_results)} files.{colorama.Fore.RESET}"
)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -14,14 +14,20 @@
# https://github.com/decompals/ultralib/blob/main/tools/mdebug.py
# https://www.cs.unibo.it/~solmi/teaching/arch_2002-2003/AssemblyLanguageProgDoc.pdf
# https://github.com/decompals/IDO/blob/main/IDO_7.1/dist/compiler_eoe/usr/include/sym.h
# https://github.com/Synray/ido-ucode-utils
from __future__ import annotations
import argparse
from dataclasses import dataclass
import itertools
from pathlib import Path
import platform
import struct
import subprocess
import shlex
import sys
from typing import Optional, Tuple
class Header:
@ -157,10 +163,15 @@ def read_string(data, start):
return data[start : start + size].decode("ascii")
def print_symbol_table(data):
header = Header(data[0 : Header.SIZE])
@dataclass
class SymbolTableEntry:
symbol: Optional[Symbol]
name: str
extern: bool
print(f"block [mod 256]: linkage type class name")
def parse_symbol_table(data: bytes) -> list[SymbolTableEntry]:
header = Header(data[0 : Header.SIZE])
# File descriptors
fds = []
@ -170,15 +181,14 @@ def print_symbol_table(data):
)
# Symbol identifiers ("dense numbers")
entries = []
for i in range(header.idnMax):
ifd, isym = struct.unpack(">II", read_entry(data, header.cbDnOffset, i, 8))
if isym == 0xFFFFF:
# TODO: is this always a string?
extern = False
sym = None
sym_name = ""
st = "string"
sc = ""
extern = False
else:
extern = ifd == 0x7FFFFFFF
if extern:
@ -195,15 +205,251 @@ def print_symbol_table(data):
)
)
sym_name = read_string(data, header.cbSsOffset + fd.issBase + sym.iss)
st = sym.symbol_type()
sc = sym.symbol_storage_class()
entries.append(SymbolTableEntry(sym, sym_name, extern))
return entries
def print_symbol_table(symbol_table: list[SymbolTableEntry]):
print(f"block [mod 256]: linkage type class name")
for i, entry in enumerate(symbol_table):
if not entry.symbol:
# TODO: is this always a string?
st = "string"
sc = ""
else:
st = entry.symbol.symbol_type()
sc = entry.symbol.symbol_storage_class()
print(
f'{i:>9} [{i%256:>3}]: {"extern" if extern else "":<7} {st:<10} {sc:<9} {sym_name:<40}'
f'{i:>9} [{i%256:>3}]: {"extern" if entry.extern else "":<7} {st:<10} {sc:<9} {entry.name:<40}'
)
def find_compiler_command_line(filename, oot_version):
@dataclass
class UcodeOp:
opcode: int
opcode_name: str
mtype: int
dtype: int
lexlev: int
i1: int
args: list[int]
string: Optional[bytes]
@dataclass
class UcodeOpInfo:
opcode: int
name: str
length: int
has_const: bool
UCODE_OP_INFO = [
UcodeOpInfo(0x00, "abs", 2, False),
UcodeOpInfo(0x01, "add", 2, False),
UcodeOpInfo(0x02, "adj", 4, False),
UcodeOpInfo(0x03, "aent", 4, False),
UcodeOpInfo(0x04, "and", 2, False),
UcodeOpInfo(0x05, "aos", 2, False),
UcodeOpInfo(0x06, "asym", 4, False),
UcodeOpInfo(0x07, "bgn", 4, False),
UcodeOpInfo(0x08, "bgnb", 2, False),
UcodeOpInfo(0x09, "bsub", 2, False),
UcodeOpInfo(0x0A, "cg1", 2, False),
UcodeOpInfo(0x0B, "cg2", 2, False),
UcodeOpInfo(0x0C, "chkh", 2, False),
UcodeOpInfo(0x0D, "chkl", 2, False),
UcodeOpInfo(0x0E, "chkn", 2, False),
UcodeOpInfo(0x0F, "chkt", 2, False),
UcodeOpInfo(0x10, "cia", 4, True),
UcodeOpInfo(0x11, "clab", 4, False),
UcodeOpInfo(0x12, "clbd", 2, False),
UcodeOpInfo(0x13, "comm", 4, True),
UcodeOpInfo(0x14, "csym", 4, False),
UcodeOpInfo(0x15, "ctrl", 4, False),
UcodeOpInfo(0x16, "cubd", 2, False),
UcodeOpInfo(0x17, "cup", 4, False),
UcodeOpInfo(0x18, "cvt", 4, False),
UcodeOpInfo(0x19, "cvtl", 2, False),
UcodeOpInfo(0x1A, "dec", 2, False),
UcodeOpInfo(0x1B, "def", 4, False),
UcodeOpInfo(0x1C, "dif", 4, False),
UcodeOpInfo(0x1D, "div", 2, False),
UcodeOpInfo(0x1E, "dup", 2, False),
UcodeOpInfo(0x1F, "end", 2, False),
UcodeOpInfo(0x20, "endb", 2, False),
UcodeOpInfo(0x21, "ent", 4, False),
UcodeOpInfo(0x22, "ueof", 2, False),
UcodeOpInfo(0x23, "equ", 2, False),
UcodeOpInfo(0x24, "esym", 4, False),
UcodeOpInfo(0x25, "fill", 4, False),
UcodeOpInfo(0x26, "fjp", 2, False),
UcodeOpInfo(0x27, "fsym", 4, False),
UcodeOpInfo(0x28, "geq", 2, False),
UcodeOpInfo(0x29, "grt", 2, False),
UcodeOpInfo(0x2A, "gsym", 4, False),
UcodeOpInfo(0x2B, "hsym", 4, False),
UcodeOpInfo(0x2C, "icuf", 4, False),
UcodeOpInfo(0x2D, "idx", 2, False),
UcodeOpInfo(0x2E, "iequ", 4, False),
UcodeOpInfo(0x2F, "igeq", 4, False),
UcodeOpInfo(0x30, "igrt", 4, False),
UcodeOpInfo(0x31, "ijp", 2, False),
UcodeOpInfo(0x32, "ilda", 6, False),
UcodeOpInfo(0x33, "ildv", 4, False),
UcodeOpInfo(0x34, "ileq", 4, False),
UcodeOpInfo(0x35, "iles", 4, False),
UcodeOpInfo(0x36, "ilod", 4, False),
UcodeOpInfo(0x37, "inc", 2, False),
UcodeOpInfo(0x38, "ineq", 4, False),
UcodeOpInfo(0x39, "init", 6, True),
UcodeOpInfo(0x3A, "inn", 4, False),
UcodeOpInfo(0x3B, "int", 4, False),
UcodeOpInfo(0x3C, "ior", 2, False),
UcodeOpInfo(0x3D, "isld", 4, False),
UcodeOpInfo(0x3E, "isst", 4, False),
UcodeOpInfo(0x3F, "istr", 4, False),
UcodeOpInfo(0x40, "istv", 4, False),
UcodeOpInfo(0x41, "ixa", 2, False),
UcodeOpInfo(0x42, "lab", 4, False),
UcodeOpInfo(0x43, "lbd", 2, False),
UcodeOpInfo(0x44, "lbdy", 2, False),
UcodeOpInfo(0x45, "lbgn", 2, False),
UcodeOpInfo(0x46, "lca", 4, True),
UcodeOpInfo(0x47, "lda", 6, False),
UcodeOpInfo(0x48, "ldap", 2, False),
UcodeOpInfo(0x49, "ldc", 4, True),
UcodeOpInfo(0x4A, "ldef", 4, False),
UcodeOpInfo(0x4B, "ldsp", 2, False),
UcodeOpInfo(0x4C, "lend", 2, False),
UcodeOpInfo(0x4D, "leq", 2, False),
UcodeOpInfo(0x4E, "les", 2, False),
UcodeOpInfo(0x4F, "lex", 2, False),
UcodeOpInfo(0x50, "lnot", 2, False),
UcodeOpInfo(0x51, "loc", 2, False),
UcodeOpInfo(0x52, "lod", 4, False),
UcodeOpInfo(0x53, "lsym", 4, False),
UcodeOpInfo(0x54, "ltrm", 2, False),
UcodeOpInfo(0x55, "max", 2, False),
UcodeOpInfo(0x56, "min", 2, False),
UcodeOpInfo(0x57, "mod", 2, False),
UcodeOpInfo(0x58, "mov", 4, False),
UcodeOpInfo(0x59, "movv", 2, False),
UcodeOpInfo(0x5A, "mpmv", 4, False),
UcodeOpInfo(0x5B, "mpy", 2, False),
UcodeOpInfo(0x5C, "mst", 2, False),
UcodeOpInfo(0x5D, "mus", 4, False),
UcodeOpInfo(0x5E, "neg", 2, False),
UcodeOpInfo(0x5F, "neq", 2, False),
UcodeOpInfo(0x60, "nop", 2, False),
UcodeOpInfo(0x61, "not", 2, False),
UcodeOpInfo(0x62, "odd", 2, False),
UcodeOpInfo(0x63, "optn", 4, False),
UcodeOpInfo(0x64, "par", 4, False),
UcodeOpInfo(0x65, "pdef", 4, False),
UcodeOpInfo(0x66, "pmov", 4, False),
UcodeOpInfo(0x67, "pop", 2, False),
UcodeOpInfo(0x68, "regs", 4, False),
UcodeOpInfo(0x69, "rem", 2, False),
UcodeOpInfo(0x6A, "ret", 2, False),
UcodeOpInfo(0x6B, "rlda", 4, False),
UcodeOpInfo(0x6C, "rldc", 4, True),
UcodeOpInfo(0x6D, "rlod", 4, False),
UcodeOpInfo(0x6E, "rnd", 4, False),
UcodeOpInfo(0x6F, "rpar", 4, False),
UcodeOpInfo(0x70, "rstr", 4, False),
UcodeOpInfo(0x71, "sdef", 4, False),
UcodeOpInfo(0x72, "sgs", 4, False),
UcodeOpInfo(0x73, "shl", 2, False),
UcodeOpInfo(0x74, "shr", 2, False),
UcodeOpInfo(0x75, "sign", 2, False),
UcodeOpInfo(0x76, "sqr", 2, False),
UcodeOpInfo(0x77, "sqrt", 2, False),
UcodeOpInfo(0x78, "ssym", 4, True),
UcodeOpInfo(0x79, "step", 2, False),
UcodeOpInfo(0x7A, "stp", 2, False),
UcodeOpInfo(0x7B, "str", 4, False),
UcodeOpInfo(0x7C, "stsp", 2, False),
UcodeOpInfo(0x7D, "sub", 2, False),
UcodeOpInfo(0x7E, "swp", 4, False),
UcodeOpInfo(0x7F, "tjp", 2, False),
UcodeOpInfo(0x80, "tpeq", 2, False),
UcodeOpInfo(0x81, "tpge", 2, False),
UcodeOpInfo(0x82, "tpgt", 2, False),
UcodeOpInfo(0x83, "tple", 2, False),
UcodeOpInfo(0x84, "tplt", 2, False),
UcodeOpInfo(0x85, "tpne", 2, False),
UcodeOpInfo(0x86, "typ", 4, False),
UcodeOpInfo(0x87, "ubd", 2, False),
UcodeOpInfo(0x88, "ujp", 2, False),
UcodeOpInfo(0x89, "unal", 2, False),
UcodeOpInfo(0x8A, "uni", 4, False),
UcodeOpInfo(0x8B, "vreg", 4, False),
UcodeOpInfo(0x8C, "xjp", 8, False),
UcodeOpInfo(0x8D, "xor", 2, False),
UcodeOpInfo(0x8E, "xpar", 2, False),
UcodeOpInfo(0x8F, "mtag", 2, False),
UcodeOpInfo(0x90, "alia", 2, False),
UcodeOpInfo(0x91, "ildi", 4, False),
UcodeOpInfo(0x92, "isti", 4, False),
UcodeOpInfo(0x93, "irld", 4, False),
UcodeOpInfo(0x94, "irst", 4, False),
UcodeOpInfo(0x95, "ldrc", 4, False),
UcodeOpInfo(0x96, "msym", 4, False),
UcodeOpInfo(0x97, "rcuf", 4, False),
UcodeOpInfo(0x98, "ksym", 4, False),
UcodeOpInfo(0x99, "osym", 4, False),
UcodeOpInfo(0x9A, "irlv", 2, False),
UcodeOpInfo(0x9B, "irsv", 2, False),
]
def parse_ucode(ucode: bytes) -> list[UcodeOp]:
ops = []
pos = 0
while pos < len(ucode):
opcode = ucode[pos]
mtype = ucode[pos + 1] >> 5
dtype = ucode[pos + 1] & 0x1F
lexlev = int.from_bytes(ucode[pos + 2 : pos + 4], "big")
i1 = int.from_bytes(ucode[pos + 4 : pos + 8], "big")
pos += 8
info = UCODE_OP_INFO[opcode]
size = 4 * info.length
args = []
for _ in range(info.length - 2):
args.append(int.from_bytes(ucode[pos : pos + 4], "big"))
pos += 4
string = None
if info.has_const:
string_length = int.from_bytes(ucode[pos : pos + 4], "big")
pos += 8
if dtype in (9, 12, 13, 14, 16) or info.name == "comm":
string = ucode[pos : pos + string_length]
pos += (string_length + 7) & ~7
ops.append(UcodeOp(opcode, info.name, mtype, dtype, lexlev, i1, args, string))
return ops
def print_ucode(ucode: list[UcodeOp]):
for op in ucode:
args = " ".join(f"0x{arg:X}" for arg in op.args)
print(
f"{op.opcode_name:<4} mtype={op.mtype:X} dtype={op.dtype:X} lexlev={op.lexlev} i1={op.i1} args={args}",
end="",
)
if op.string is not None:
print(f" string={op.string!r}", end="")
print()
def generate_make_log(oot_version: str) -> list[str]:
is_macos = platform.system() == "Darwin"
make = "gmake" if is_macos else "make"
make_command_line = [
@ -212,30 +458,28 @@ def find_compiler_command_line(filename, oot_version):
"--dry-run",
f"VERSION={oot_version}",
]
return subprocess.check_output(make_command_line).decode("utf-8").splitlines()
print(f"Running {make} to find compiler command line ...", file=sys.stderr)
make_output = (
subprocess.check_output(make_command_line).decode("utf-8").splitlines()
)
def find_compiler_command_line(
make_log: list[str], filename: Path
) -> Optional[list[str]]:
found = 0
for line in make_output:
for line in make_log:
parts = line.split()
if "-o" in parts and str(filename) in parts:
compiler_command_line = parts
found += 1
if found != 1:
print(
f"Could not determine compiler command line for {filename}", file=sys.stderr
)
sys.exit(1)
return None
print(f'Command line: {" ".join(compiler_command_line)}', file=sys.stderr)
return compiler_command_line
def generate_symbol_table(command_line):
def run_cfe(
command_line: list[str], keep_files: bool
) -> Tuple[list[SymbolTableEntry], list[UcodeOp]]:
# Assume command line is of the form:
# python3 tools/preprocess.py [COMPILER] [COMPILER_ARGS] [INPUT_FILE]
input_file = Path(command_line[-1])
@ -251,11 +495,14 @@ def generate_symbol_table(command_line):
subprocess.run(rest + ["-Hf", input_file], check=True)
# Read symbol table
return symbol_table_file.read_bytes()
symbol_table = parse_symbol_table(symbol_table_file.read_bytes())
ucode = parse_ucode(ucode_file.read_bytes())
return (symbol_table, ucode)
finally:
# Cleanup
symbol_table_file.unlink(missing_ok=True)
ucode_file.unlink(missing_ok=True)
if not keep_files:
symbol_table_file.unlink(missing_ok=True)
ucode_file.unlink(missing_ok=True)
def main():
@ -270,12 +517,33 @@ def main():
default="gc-eu-mq-dbg",
help="OOT version (default: gc-eu-mq-dbg)",
)
parser.add_argument(
"--print-ucode", action="store_true", help="Print cfe ucode output"
)
parser.add_argument(
"--keep-files",
action="store_true",
help="Keep temporary files (symbol table and ucode)",
)
args = parser.parse_args()
command_line = find_compiler_command_line(args.filename, args.oot_version)
data = generate_symbol_table(command_line)
print_symbol_table(data)
print(f"Running make to find compiler command line ...", file=sys.stderr)
make_log = generate_make_log(args.oot_version)
command_line = find_compiler_command_line(make_log, args.filename)
if command_line is None:
print(
f"Error: could not determine compiler command line for {filename}",
file=sys.stderr,
)
sys.exit(1)
print(f"Compiler command: {shlex.join(compiler_command_line)}", file=sys.stderr)
symbol_table, ucode = run_cfe(command_line, args.keep_files)
print_symbol_table(symbol_table)
if args.print_ucode:
print_ucode(ucode)
if __name__ == "__main__":

View File

@ -3,13 +3,13 @@
# SPDX-FileCopyrightText: © 2024 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
# Usage: preprocess.py [compile command minus input file...] [single input file]
# Usage: preprocess.py [flags] -- [compile command minus input file...] [single input file]
# Preprocess a C file to:
# * Re-encode from UTF-8 to EUC-JP (the repo uses UTF-8 for text encoding, but
# the strings in the ROM are encoded in EUC-JP)
# * Replace `#pragma increment_block_number N` with `N` fake structs for
# controlling BSS ordering
# * Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering
import argparse
from pathlib import Path
import os
import tempfile
@ -22,39 +22,59 @@ def fail(message):
sys.exit(1)
def process_file(filename, input, output):
def process_file(version, filename, input, output):
output.write(f'#line 1 "{filename}"\n')
for i, line in enumerate(input, start=1):
if line.startswith("#pragma increment_block_number"):
parts = line.split()
if len(parts) != 3:
fail(
f"{filename}:{i}: increment_block_number must be followed by an integer"
)
try:
amount = int(parts[2])
except ValueError:
fail(
f"{filename}:{i}: increment_block_number must be followed by an integer"
)
if line.startswith("#pragma increment_block_number "):
# Grab pragma argument and remove quotes
arg = line.strip()[len("#pragma increment_block_number ") + 1 : -1]
amount = 0
for part in arg.split():
kv = part.split(":")
if len(kv) != 2:
fail(
f"{filename}:{i}: increment_block_number must be followed by a list of version:amount pairs"
)
if kv[0] != version:
continue
try:
amount = int(kv[1])
except ValueError:
fail(
f"{filename}:{i}: increment_block_number amount must be an integer"
)
# Always generate at least one struct so that fix_bss.py can know where the increment_block_number pragmas are
if amount == 0:
amount = 256
# Write fake structs for BSS ordering
for j in range(amount):
output.write(f"struct DummyStruct_{i:05}_{j:03};\n")
output.write(f"struct increment_block_number_{i:05}_{j:03};\n")
output.write(f'#line {i + 1} "{filename}"\n')
else:
output.write(line)
def main():
filename = Path(sys.argv[-1])
parser = argparse.ArgumentParser()
parser.add_argument("-v", "--oot-version", help="Which version should be processed")
parser.add_argument(
"args",
nargs="+",
)
args = parser.parse_args()
filename = Path(args.args[-1])
with tempfile.TemporaryDirectory(prefix="oot_") as tmpdir:
tmpfile = Path(tmpdir) / filename.name
with open(filename, mode="r", encoding="utf-8") as input:
with open(tmpfile, mode="w", encoding="euc-jp") as output:
process_file(filename, input, output)
process_file(args.oot_version, filename, input, output)
compile_command = sys.argv[1:-1] + ["-I", filename.parent, tmpfile]
compile_command = args.args[:-1] + ["-I", filename.parent, tmpfile]
process = subprocess.run(compile_command)
return process.returncode