Automate fixing BSS ordering (#2009)

* Automate fixing BSS ordering * Typo * Some cleanups * Move pragma check after printing BSS info * Some proofreading * multiprocessing, require version, some colors * Tweak output * Black + mypy * Move logging and sys.exit out of helper functions * Use stdout instead of stderr in fix_bss.py * Add suggestion to conflicting offsets error Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com> * Remove var = list[T]() * Improve error handling Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com> * Add error if no pointers to BSS * Add comment about process_file_worker * Only print updates if stdout is a tty * Use new binary-search-esque candidate generation algorithm Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com> * Add Wikipedia link * More comment tweaks --------- Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>
2024-11-23 05:49:50 +00:00 · 2024-08-02 17:31:15 -07:00 · 2024-08-02 17:31:15 -07:00 · 0da402b9de
commit 0da402b9de
parent 078e21f6c6
21 changed files with 1071 additions and 352 deletions
--- a/2
+++ b/2
@ -378,7 +378,7 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD)

 ifeq ($(PERMUTER),)  # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case
 # Handle encoding (UTF-8 -> EUC-JP) and custom pragmas
-$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py $(CC)
+$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py -v $(VERSION) -- $(CC)
 endif

 else
--- a/src/boot/idle.c
+++ b/src/boot/idle.c
@ -1,10 +1,7 @@
 #include "global.h"
 #include "terminal.h"

-// For retail BSS ordering, the block number of sMainThread must be 0 or
-// just above (the exact upper bound depends on the block numbers assigned to
-// extern variables declared in headers).
-#pragma increment_block_number 60
+#pragma increment_block_number "gc-eu:64 gc-eu-mq:64"

 OSThread sMainThread;
 STACK(sMainStack, 0x900);
--- a/src/code/fault.c
+++ b/src/code/fault.c
@ -44,14 +44,7 @@
 #include "terminal.h"
 #include "alloca.h"

-// For retail BSS ordering, the block number of sFaultInstance must be 0 or
-// just above (the exact upper bound depends on the block numbers assigned to
-// extern variables declared in headers).
-#if OOT_DEBUG
-#pragma increment_block_number 0
-#else
-#pragma increment_block_number 20
-#endif
+#pragma increment_block_number "gc-eu:64 gc-eu-mq:64 gc-eu-mq-dbg:0"

 void FaultDrawer_Init(void);
 void FaultDrawer_SetOsSyncPrintfEnabled(u32 enabled);
--- a/src/code/main.c
+++ b/src/code/main.c
@ -7,9 +7,7 @@ s32 gScreenWidth = SCREEN_WIDTH;
 s32 gScreenHeight = SCREEN_HEIGHT;
 u32 gSystemHeapSize = 0;

-// For retail BSS ordering, the block number of gIrqMgr must be greater than the
-// the block numbers assigned to extern variables above (declared in variables.h).
-#pragma increment_block_number 220
+#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"

 PreNmiBuff* gAppNmiBufferPtr;
 Scheduler gScheduler;
--- a/src/code/sys_math3d.c
+++ b/src/code/sys_math3d.c
@ -5,9 +5,7 @@
 #include "macros.h"
 #include "sys_math3d.h"

-// For retail BSS ordering, the block number of cbf in Math3D_CylVsCylOverlapCenterDist
-// must be 0.
-#pragma increment_block_number 108
+#pragma increment_block_number "gc-eu:108 gc-eu-mq:108"

 s32 Math3D_LineVsLineClosestTwoPoints(Vec3f* lineAPointA, Vec3f* lineAPointB, Vec3f* lineBPointA, Vec3f* lineBPointB,
                                      Vec3f* lineAClosestToB, Vec3f* lineBClosestToA);
--- a/src/code/z_actor.c
+++ b/src/code/z_actor.c
@ -8,8 +8,7 @@
 #include "assets/objects/gameplay_dangeon_keep/gameplay_dangeon_keep.h"
 #include "assets/objects/object_bdoor/object_bdoor.h"

-// For retail BSS ordering, the block number of sCurCeilingPoly
-// must be between 2 and 243 inclusive.
+#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"

 static CollisionPoly* sCurCeilingPoly;
 static s32 sCurCeilingBgId;
@ -1906,7 +1905,7 @@ s32 func_8002F9EC(PlayState* play, Actor* actor, CollisionPoly* poly, s32 bgId,
    return false;
 }

-#pragma increment_block_number 22
+#pragma increment_block_number "gc-eu:22 gc-eu-mq:22"

 // Local data used for Farore's Wind light (stored in BSS)
 LightInfo D_8015BC00;
--- a/src/code/z_camera.c
+++ b/src/code/z_camera.c
@ -4,9 +4,7 @@
 #include "terminal.h"
 #include "overlays/actors/ovl_En_Horse/z_en_horse.h"

-// For retail BSS ordering, the block number of D_8015BD7C
-// must be between 88 and 123 inclusive.
-#pragma increment_block_number 30
+#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"

 s16 Camera_RequestSettingImpl(Camera* camera, s16 requestedSetting, s16 flags);
 s32 Camera_RequestModeImpl(Camera* camera, s16 requestedMode, u8 forceModeChange);
@ -3632,7 +3630,7 @@ s32 Camera_KeepOn3(Camera* camera) {
    return 1;
 }

-#pragma increment_block_number 100
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 s32 Camera_KeepOn4(Camera* camera) {
    static Vec3f D_8015BD50;
--- a/src/code/z_collision_check.c
+++ b/src/code/z_collision_check.c
@ -12,9 +12,7 @@ typedef s32 (*ColChkLineFunc)(PlayState*, CollisionCheckContext*, Collider*, Vec

 #define SAC_ENABLE (1 << 0)

-// For retail BSS ordering, the block number of sparkInit in CollisionCheck_BlueBlood
-// must be between 183 and 255 inclusive.
-#pragma increment_block_number 50
+#pragma increment_block_number "gc-eu:64 gc-eu-mq:64"

 #if OOT_DEBUG
 /**
@ -2695,7 +2693,7 @@ typedef enum {
    /* 2 */ MASSTYPE_NORMAL
 } ColChkMassType;

-#pragma increment_block_number 253
+#pragma increment_block_number "gc-eu:252 gc-eu-mq:252"

 /**
 * Get mass type. Immovable colliders cannot be pushed, while heavy colliders can only be pushed by heavy and immovable
--- a/src/code/z_common_data.c
+++ b/src/code/z_common_data.c
@ -1,9 +1,6 @@
 #include "global.h"

-// For retail BSS ordering, the block number of D_8015FA88 must be 0 or
-// just above (the exact upper bound depends on the block numbers assigned to
-// extern variables declared in headers).
-#pragma increment_block_number 60
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 ALIGNED(16) SaveContext gSaveContext;
 u32 D_8015FA88;
--- a/src/code/z_demo.c
+++ b/src/code/z_demo.c
@ -120,9 +120,7 @@ u16 gCamAtSplinePointsAppliedFrame;
 u16 gCamEyePointAppliedFrame;
 u16 gCamAtPointAppliedFrame;

-// For retail BSS ordering, the block number of sReturnToCamId must be greater
-// than that of gCamAtPointAppliedFrame (declared in variables.h).
-#pragma increment_block_number 180
+#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"

 // Cam ID to return to when a scripted cutscene is finished
 s16 sReturnToCamId;
--- a/src/code/z_kaleido_scope_call.c
+++ b/src/code/z_kaleido_scope_call.c
@ -1,10 +1,7 @@
 #include "global.h"
 #include "terminal.h"

-// For retail BSS ordering, the block number of sKaleidoScopeUpdateFunc must be 0 or
-// just above (the exact upper bound depends on the block numbers assigned to
-// extern variables declared in headers).
-#pragma increment_block_number 60
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 void (*sKaleidoScopeUpdateFunc)(PlayState* play);
 void (*sKaleidoScopeDrawFunc)(PlayState* play);
--- a/src/code/z_kankyo.c
+++ b/src/code/z_kankyo.c
@ -7,10 +7,7 @@
 #include "assets/objects/gameplay_keep/gameplay_keep.h"
 #include "assets/objects/gameplay_field_keep/gameplay_field_keep.h"

-// For retail BSS ordering, the block number of sLensFlareUnused must be lower
-// than the extern variables declared in the header (e.g. gLightningStrike)
-// while the block number of sNGameOverLightNode must be higher.
-#pragma increment_block_number 80
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 typedef enum {
    /* 0x00 */ LIGHTNING_BOLT_START,
@ -215,10 +212,7 @@ s16 sLightningFlashAlpha;
 s16 sSunDepthTestX;
 s16 sSunDepthTestY;

-// These variables could be moved farther down in the file to reduce the amount
-// of block number padding here, but currently this causes BSS ordering issues
-// for debug.
-#pragma increment_block_number 217
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 LightNode* sNGameOverLightNode;
 LightInfo sNGameOverLightInfo;
--- a/src/overlays/actors/ovl_Boss_Ganon/z_boss_ganon.c
+++ b/src/overlays/actors/ovl_Boss_Ganon/z_boss_ganon.c
@ -100,21 +100,19 @@ static ColliderCylinderInit sLightBallCylinderInit = {
 static u8 D_808E4C58[] = { 0, 12, 10, 12, 14, 16, 12, 14, 16, 12, 14, 16, 12, 14, 16, 10, 16, 14 };
 static Vec3f sZeroVec = { 0.0f, 0.0f, 0.0f };

-// For retail BSS ordering, the block number of sGanondorf must be 0 or just above.
-
-// TODO: There's probably a way to do this with less padding by spreading the variables out and moving
-// data around. It would be easier if we had more options for controlling BSS ordering in debug.
-#pragma increment_block_number 50
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 static EnGanonMant* sCape;

-#pragma increment_block_number 200
+// TODO: There's probably a way to match BSS ordering with less padding by spreading the variables out and moving
+// data around. It would be easier if we had more options for controlling BSS ordering in debug.
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 static s32 sSeed1;
 static s32 sSeed2;
 static s32 sSeed3;

-#pragma increment_block_number 200
+#pragma increment_block_number "gc-eu:192 gc-eu-mq:192"

 static BossGanon* sGanondorf;

--- a/src/overlays/actors/ovl_En_Wonder_Item/z_en_wonder_item.c
+++ b/src/overlays/actors/ovl_En_Wonder_Item/z_en_wonder_item.c
@ -53,7 +53,7 @@ ActorProfile En_Wonder_Item_Profile = {
    /**/ NULL,
 };

-#pragma increment_block_number 1
+#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"

 static Vec3f sTagPointsFree[9];
 static Vec3f sTagPointsOrdered[9];
--- a/src/overlays/actors/ovl_En_Xc/z_en_xc.c
+++ b/src/overlays/actors/ovl_En_Xc/z_en_xc.c
@ -14,8 +14,7 @@
 #include "assets/scenes/dungeons/ice_doukutu/ice_doukutu_scene.h"
 #include "terminal.h"

-// For retail BSS ordering, the block number of sSfxPos
-// must be between 0 and 213 inclusive.
+#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"

 #define FLAGS ACTOR_FLAG_4

@ -1396,7 +1395,7 @@ void func_80B3F3D8(void) {
    Sfx_PlaySfxCentered2(NA_SE_PL_SKIP);
 }

-#pragma increment_block_number 20
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 void EnXc_PlayDiveSFX(Vec3f* src, PlayState* play) {
    static Vec3f D_80B42DA0;
--- a/src/overlays/actors/ovl_Fishing/z_fishing.c
+++ b/src/overlays/actors/ovl_Fishing/z_fishing.c
@ -11,8 +11,7 @@
 #include "ichain.h"
 #include "terminal.h"

-// For retail BSS ordering, the block number of sStreamSfxProjectedPos must be 0.
-#pragma increment_block_number 206
+#pragma increment_block_number "gc-eu:206 gc-eu-mq:206"

 #define FLAGS ACTOR_FLAG_4

--- a/src/overlays/actors/ovl_player_actor/z_player.c
+++ b/src/overlays/actors/ovl_player_actor/z_player.c
@ -354,22 +354,19 @@ void Player_Action_CsAction(Player* this, PlayState* play);

 // .bss part 1

-// For retail BSS ordering, the block number of sDogSpawnPos in Player_Update
-// must be between 0 and 53 inclusive.
-
-// TODO: There's probably a way to do this with less padding by spreading the variables out and moving
-// data around. It would be easier if we had more options for controlling BSS ordering in debug.
-#pragma increment_block_number 30
+#pragma increment_block_number "gc-eu:0 gc-eu-mq:0"

 static s32 D_80858AA0;

-#pragma increment_block_number 250
+// TODO: There's probably a way to match BSS ordering with less padding by spreading the variables out and moving
+// data around. It would be easier if we had more options for controlling BSS ordering in debug.
+#pragma increment_block_number "gc-eu:128 gc-eu-mq:128"

 static s32 D_80858AA4;
 static Vec3f sInteractWallCheckResult;
 static Input* sControlInput;

-#pragma increment_block_number 50
+#pragma increment_block_number "gc-eu:192 gc-eu-mq:192"

 // .data

--- a/tools/check_ordering.py
+++ b/tools/check_ordering.py
@ -1,234 +0,0 @@
-#!/usr/bin/env python3
-
-# SPDX-FileCopyrightText: 2024 zeldaret
-# SPDX-License-Identifier: CC0-1.0
-
-
-from __future__ import annotations
-
-import argparse
-import dataclasses
-import enum
-from pathlib import Path
-import sys
-from typing import BinaryIO
-
-import elftools.elf.elffile
-import mapfile_parser.mapfile
-
-
-@dataclasses.dataclass
-class Reloc:
-    name: str
-    offset_32: int | None
-    offset_hi16: int | None
-    offset_lo16: int | None
-    addend: int
-
-
-@dataclasses.dataclass
-class Pointer:
-    name: str
-    addend: int
-    base_value: int
-    build_value: int
-
-
-def read_relocs(object_path: Path, section_name: str) -> list[Reloc]:
-    with open(object_path, "rb") as f:
-        elffile = elftools.elf.elffile.ELFFile(f)
-        symtab = elffile.get_section_by_name(".symtab")
-        data = elffile.get_section_by_name(section_name).data()
-
-        reloc_section = elffile.get_section_by_name(f".rel{section_name}")
-        if reloc_section is None:
-            return []
-
-        relocs = []
-        offset_hi16 = 0
-        for reloc in reloc_section.iter_relocations():
-            reloc_offset = reloc.entry["r_offset"]
-            reloc_type = reloc.entry["r_info_type"]
-            reloc_name = symtab.get_symbol(reloc.entry["r_info_sym"]).name
-
-            if reloc_type == 2:  # R_MIPS_32
-                offset_32 = reloc_offset
-                addend = int.from_bytes(
-                    data[reloc_offset : reloc_offset + 4], "big", signed=True
-                )
-                relocs.append(Reloc(reloc_name, offset_32, None, None, addend))
-            elif reloc_type == 4:  # R_MIPS_26
-                pass
-            elif reloc_type == 5:  # R_MIPS_HI16
-                offset_hi16 = reloc_offset
-            elif reloc_type == 6:  # R_MIPS_LO16
-                offset_lo16 = reloc_offset
-                addend_hi16 = int.from_bytes(
-                    data[offset_hi16 + 2 : offset_hi16 + 4], "big", signed=False
-                )
-                addend_lo16 = int.from_bytes(
-                    data[offset_lo16 + 2 : offset_lo16 + 4], "big", signed=True
-                )
-                addend = (addend_hi16 << 16) + addend_lo16
-                relocs.append(Reloc(reloc_name, None, offset_hi16, offset_lo16, addend))
-            else:
-                raise NotImplementedError(f"Unsupported relocation type: {reloc_type}")
-
-        return relocs
-
-
-def read_u32(f: BinaryIO, offset: int) -> int:
-    f.seek(offset)
-    return int.from_bytes(f.read(4), "big")
-
-
-def read_u16(f: BinaryIO, offset: int) -> int:
-    f.seek(offset)
-    return int.from_bytes(f.read(2), "big")
-
-
-def read_s16(f: BinaryIO, offset: int) -> int:
-    f.seek(offset)
-    return int.from_bytes(f.read(2), "big", signed=True)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Report bss reorderings between the baserom and the current build "
-        "by parsing relocations from the built object files and comparing their final values "
-        "between the baserom and the current build. "
-        "Assumes that the only differences are due to ordering and that the text sections of the "
-        "ROMS are not shifted."
-    )
-    parser.add_argument(
-        "--oot-version",
-        "-v",
-        type=str,
-        default="gc-eu-mq-dbg",
-        help="OOT version (default: gc-eu-mq-dbg)",
-    )
-    parser.add_argument(
-        "--segment",
-        type=str,
-        help="ROM segment to check, e.g. 'boot', 'code', or 'ovl_player_actor' (default: all)",
-    )
-    parser.add_argument(
-        "--all-sections",
-        action="store_true",
-        help="Check ordering for all section types, not just .bss",
-    )
-
-    args = parser.parse_args()
-    version = args.oot_version
-
-    mapfile = mapfile_parser.mapfile.MapFile()
-    mapfile.readMapFile(f"build/{version}/oot-{version}.map")
-
-    # Segments built from source code (filtering out assets)
-    source_code_segments = []
-    for mapfile_segment in mapfile:
-        if (
-            args.segment
-            and mapfile_segment.name != f"..{args.segment}"
-            and mapfile_segment.name != f"..{args.segment}.bss"
-        ):
-            continue
-        if not (
-            mapfile_segment.name.startswith("..boot")
-            or mapfile_segment.name.startswith("..code")
-            or mapfile_segment.name.startswith("..buffers")
-            or mapfile_segment.name.startswith("..ovl_")
-        ):
-            continue
-        source_code_segments.append(mapfile_segment)
-
-    base = open(f"baseroms/{version}/baserom-decompressed.z64", "rb")
-    build = open(f"build/{version}/oot-{version}.z64", "rb")
-
-    # Find all pointers with different values
-    pointers = []
-    for mapfile_segment in source_code_segments:
-        for file in mapfile_segment:
-            if not str(file.filepath).endswith(".o"):
-                continue
-            if file.sectionType == ".bss":
-                continue
-            for reloc in read_relocs(file.filepath, file.sectionType):
-                if reloc.offset_32 is not None:
-                    base_value = read_u32(base, file.vrom + reloc.offset_32)
-                    build_value = read_u32(build, file.vrom + reloc.offset_32)
-                elif reloc.offset_hi16 is not None and reloc.offset_lo16 is not None:
-                    if (
-                        read_u16(base, file.vrom + reloc.offset_hi16)
-                        != read_u16(build, file.vrom + reloc.offset_hi16)
-                    ) or (
-                        read_u16(base, file.vrom + reloc.offset_lo16)
-                        != read_u16(build, file.vrom + reloc.offset_lo16)
-                    ):
-                        print(
-                            f"Error: Reference to {reloc.name} in {file.filepath} is in a shifted (or non-matching even ignoring relocs) portion of the ROM.\n"
-                            "Please ensure that the only differences between the baserom and the current build are due to data ordering.",
-                            file=sys.stderr,
-                        )
-                        sys.exit(1)
-
-                    base_value = (
-                        read_u16(base, file.vrom + reloc.offset_hi16 + 2) << 16
-                    ) + read_s16(base, file.vrom + reloc.offset_lo16 + 2)
-                    build_value = (
-                        read_u16(build, file.vrom + reloc.offset_hi16 + 2) << 16
-                    ) + read_s16(build, file.vrom + reloc.offset_lo16 + 2)
-                else:
-                    assert False, "Invalid relocation"
-
-                pointers.append(
-                    Pointer(reloc.name, reloc.addend, base_value, build_value)
-                )
-
-    # Remove duplicates and sort by baserom address
-    pointers = list({p.base_value: p for p in pointers}.values())
-    pointers.sort(key=lambda p: p.base_value)
-
-    # Go through sections and report differences
-    for mapfile_segment in source_code_segments:
-        for file in mapfile_segment:
-            if not args.all_sections and not file.sectionType == ".bss":
-                continue
-
-            pointers_in_section = [
-                p
-                for p in pointers
-                if file.vram <= p.build_value < file.vram + file.size
-            ]
-            if not pointers_in_section:
-                continue
-
-            # Try to detect if the section is shifted by comparing the lowest
-            # address among any pointer into the section between base and build
-            base_min_address = min(p.base_value for p in pointers_in_section)
-            build_min_address = min(p.build_value for p in pointers_in_section)
-            section_shift = build_min_address - base_min_address
-            if all(
-                p.build_value == p.base_value + section_shift
-                for p in pointers_in_section
-            ):
-                continue
-
-            print(f"{file.filepath} {file.sectionType} is reordered:")
-            for i, p in enumerate(pointers_in_section):
-                if p.addend > 0:
-                    addend_str = f"+0x{p.addend:X}"
-                elif p.addend < 0:
-                    addend_str = f"-0x{-p.addend:X}"
-                else:
-                    addend_str = ""
-
-                if i > 0 and p.build_value < pointers_in_section[i - 1].build_value:
-                    print("  --------------------")  # BSS wraps around
-                print(
-                    f"  {p.base_value:08X} -> {p.build_value:08X} {p.name}{addend_str}"
-                )
-
-
-if __name__ == "__main__":
-    main()
--- a/tools/fix_bss.py
+++ b/tools/fix_bss.py
@ -0,0 +1,705 @@
+#!/usr/bin/env python3
+
+# SPDX-FileCopyrightText: 2024 zeldaret
+# SPDX-License-Identifier: CC0-1.0
+
+
+from __future__ import annotations
+
+import argparse
+from collections import Counter
+import colorama
+from dataclasses import dataclass
+import io
+import itertools
+import multiprocessing
+import multiprocessing.pool
+from pathlib import Path
+import re
+import shlex
+import sys
+import time
+from typing import BinaryIO, Iterator
+
+from ido_block_numbers import (
+    generate_make_log,
+    find_compiler_command_line,
+    run_cfe,
+    SymbolTableEntry,
+    UcodeOp,
+)
+
+import elftools.elf.elffile
+import mapfile_parser.mapfile
+
+
+def read_u32(f: BinaryIO, offset: int) -> int:
+    f.seek(offset)
+    return int.from_bytes(f.read(4), "big")
+
+
+def read_u16(f: BinaryIO, offset: int) -> int:
+    f.seek(offset)
+    return int.from_bytes(f.read(2), "big")
+
+
+def read_s16(f: BinaryIO, offset: int) -> int:
+    f.seek(offset)
+    return int.from_bytes(f.read(2), "big", signed=True)
+
+
+class FixBssException(Exception):
+    pass
+
+
+@dataclass
+class Reloc:
+    name: str
+    offset_32: int | None
+    offset_hi16: int | None
+    offset_lo16: int | None
+    addend: int
+
+
+@dataclass
+class Pointer:
+    name: str
+    addend: int
+    base_value: int
+    build_value: int
+
+
+# Read relocations from an ELF file section
+def read_relocs(object_path: Path, section_name: str) -> list[Reloc]:
+    with open(object_path, "rb") as f:
+        elffile = elftools.elf.elffile.ELFFile(f)
+        symtab = elffile.get_section_by_name(".symtab")
+        data = elffile.get_section_by_name(section_name).data()
+
+        reloc_section = elffile.get_section_by_name(f".rel{section_name}")
+        if reloc_section is None:
+            return []
+
+        relocs = []
+        offset_hi16 = 0
+        for reloc in reloc_section.iter_relocations():
+            reloc_offset = reloc.entry["r_offset"]
+            reloc_type = reloc.entry["r_info_type"]
+            reloc_name = symtab.get_symbol(reloc.entry["r_info_sym"]).name
+
+            if reloc_type == 2:  # R_MIPS_32
+                offset_32 = reloc_offset
+                addend = int.from_bytes(
+                    data[reloc_offset : reloc_offset + 4], "big", signed=True
+                )
+                relocs.append(Reloc(reloc_name, offset_32, None, None, addend))
+            elif reloc_type == 4:  # R_MIPS_26
+                pass
+            elif reloc_type == 5:  # R_MIPS_HI16
+                offset_hi16 = reloc_offset
+            elif reloc_type == 6:  # R_MIPS_LO16
+                offset_lo16 = reloc_offset
+                addend_hi16 = int.from_bytes(
+                    data[offset_hi16 + 2 : offset_hi16 + 4], "big", signed=False
+                )
+                addend_lo16 = int.from_bytes(
+                    data[offset_lo16 + 2 : offset_lo16 + 4], "big", signed=True
+                )
+                addend = (addend_hi16 << 16) + addend_lo16
+                relocs.append(Reloc(reloc_name, None, offset_hi16, offset_lo16, addend))
+            else:
+                raise NotImplementedError(f"Unsupported relocation type: {reloc_type}")
+
+        return relocs
+
+
+def get_file_pointers(
+    file: mapfile_parser.mapfile.File,
+    base: BinaryIO,
+    build: BinaryIO,
+) -> list[Pointer]:
+    pointers = []
+    # TODO: open each ELF file only once instead of once per section?
+    for reloc in read_relocs(file.filepath, file.sectionType):
+        if reloc.offset_32 is not None:
+            base_value = read_u32(base, file.vrom + reloc.offset_32)
+            build_value = read_u32(build, file.vrom + reloc.offset_32)
+        elif reloc.offset_hi16 is not None and reloc.offset_lo16 is not None:
+            if (
+                read_u16(base, file.vrom + reloc.offset_hi16)
+                != read_u16(build, file.vrom + reloc.offset_hi16)
+            ) or (
+                read_u16(base, file.vrom + reloc.offset_lo16)
+                != read_u16(build, file.vrom + reloc.offset_lo16)
+            ):
+                raise FixBssException(
+                    f"Reference to {reloc.name} in {file.filepath} is in a shifted or non-matching portion of the ROM.\n"
+                    "Please ensure that the only differences between the baserom and the current build are due to BSS ordering."
+                )
+
+            base_value = (
+                read_u16(base, file.vrom + reloc.offset_hi16 + 2) << 16
+            ) + read_s16(base, file.vrom + reloc.offset_lo16 + 2)
+            build_value = (
+                read_u16(build, file.vrom + reloc.offset_hi16 + 2) << 16
+            ) + read_s16(build, file.vrom + reloc.offset_lo16 + 2)
+        else:
+            assert False, "Invalid relocation"
+
+        pointers.append(Pointer(reloc.name, reloc.addend, base_value, build_value))
+    return pointers
+
+
+base = None
+build = None
+
+
+def get_file_pointers_worker_init(version: str):
+    global base
+    global build
+    base = open(f"baseroms/{version}/baserom-decompressed.z64", "rb")
+    build = open(f"build/{version}/oot-{version}.z64", "rb")
+
+
+def get_file_pointers_worker(file: mapfile_parser.mapfile.File) -> list[Pointer]:
+    assert base is not None
+    assert build is not None
+    return get_file_pointers(file, base, build)
+
+
+# Compare pointers between the baserom and the current build, returning a dictionary from
+# C files to a list of pointers into their BSS sections
+def compare_pointers(version: str) -> dict[Path, list[Pointer]]:
+    mapfile_path = Path(f"build/{version}/oot-{version}.map")
+    if not mapfile_path.exists():
+        raise FixBssException(f"Could not open {mapfile_path}")
+
+    mapfile = mapfile_parser.mapfile.MapFile()
+    mapfile.readMapFile(mapfile_path)
+
+    # Segments built from source code (filtering out assets)
+    source_code_segments = []
+    for mapfile_segment in mapfile:
+        if not (
+            mapfile_segment.name.startswith("..boot")
+            or mapfile_segment.name.startswith("..code")
+            or mapfile_segment.name.startswith("..buffers")
+            or mapfile_segment.name.startswith("..ovl_")
+        ):
+            continue
+        source_code_segments.append(mapfile_segment)
+
+    # Find all pointers with different values
+    if not sys.stdout.isatty():
+        print(f"Comparing pointers between baserom and build ...")
+    pointers = []
+    file_results = []
+    with multiprocessing.Pool(
+        initializer=get_file_pointers_worker_init,
+        initargs=(version,),
+    ) as p:
+        for mapfile_segment in source_code_segments:
+            for file in mapfile_segment:
+                if not str(file.filepath).endswith(".o"):
+                    continue
+                if file.sectionType == ".bss":
+                    continue
+                file_result = p.apply_async(get_file_pointers_worker, (file,))
+                file_results.append(file_result)
+
+        # Report progress and wait until all files are done
+        num_files = len(file_results)
+        while True:
+            time.sleep(0.010)
+            num_files_done = sum(file_result.ready() for file_result in file_results)
+            if sys.stdout.isatty():
+                print(
+                    f"Comparing pointers between baserom and build ... {num_files_done:>{len(f'{num_files}')}}/{num_files}",
+                    end="\r",
+                )
+            if num_files_done == num_files:
+                break
+        if sys.stdout.isatty():
+            print("")
+
+        # Collect results and check for errors
+        for file_result in file_results:
+            try:
+                pointers.extend(file_result.get())
+            except FixBssException as e:
+                print(f"{colorama.Fore.RED}Error: {str(e)}{colorama.Fore.RESET}")
+                sys.exit(1)
+
+    # Remove duplicates and sort by baserom address
+    pointers = list({p.base_value: p for p in pointers}.values())
+    pointers.sort(key=lambda p: p.base_value)
+
+    # Go through sections and collect differences
+    pointers_by_file = {}
+    for mapfile_segment in source_code_segments:
+        for file in mapfile_segment:
+            if not file.sectionType == ".bss":
+                continue
+
+            pointers_in_section = [
+                p
+                for p in pointers
+                if file.vram <= p.build_value < file.vram + file.size
+            ]
+            if not pointers_in_section:
+                continue
+
+            c_file = file.filepath.relative_to(f"build/{version}").with_suffix(".c")
+            pointers_by_file[c_file] = pointers_in_section
+
+    return pointers_by_file
+
+
+@dataclass
+class Pragma:
+    line_number: int
+    block_number: int
+    amount: int
+
+
+# A BSS variable in the source code
+@dataclass
+class BssVariable:
+    block_number: int
+    name: str
+    size: int
+    align: int
+
+
+# A BSS variable with its offset in the compiled .bss section
+@dataclass
+class BssSymbol:
+    name: str
+    offset: int
+    size: int
+    align: int
+
+
+INCREMENT_BLOCK_NUMBER_RE = re.compile(r"increment_block_number_(\d+)_(\d+)")
+
+
+# Find increment_block_number pragmas by parsing the symbol names generated by preprocess.py.
+# This is pretty ugly but it seems more reliable than trying to determine the line numbers of
+# BSS variables in the C file.
+def find_pragmas(symbol_table: list[SymbolTableEntry]) -> list[Pragma]:
+    # Keep track of first block number and count for each line number
+    first_block_number = {}
+    amounts: Counter[int] = Counter()
+    for block_number, entry in enumerate(symbol_table):
+        if match := INCREMENT_BLOCK_NUMBER_RE.match(entry.name):
+            line_number = int(match.group(1))
+            if line_number not in first_block_number:
+                first_block_number[line_number] = block_number
+            amounts[line_number] += 1
+
+    pragmas = []
+    for line_number, block_number in sorted(first_block_number.items()):
+        pragmas.append(Pragma(line_number, block_number, amounts[line_number]))
+    return pragmas
+
+
+# Find all BSS variables from IDO's symbol table and U-Code output.
+def find_bss_variables(
+    symbol_table: list[SymbolTableEntry], ucode: list[UcodeOp]
+) -> list[BssVariable]:
+    bss_variables = []
+    init_block_numbers = set(op.i1 for op in ucode if op.opcode_name == "init")
+    last_function_name = None
+
+    for op in ucode:
+        # gsym: file-level global symbol
+        # lsym: file-level static symbol
+        # fsym: function-level static symbol
+        if op.opcode_name in ("gsym", "lsym", "fsym"):
+            block_number = op.i1
+            if block_number in init_block_numbers:
+                continue  # not BSS
+
+            name = symbol_table[block_number].name
+            if op.opcode_name == "fsym":
+                name = f"{last_function_name}::{name}"
+
+            size = op.args[0]
+            align = 1 << op.lexlev
+            # TODO: IDO seems to automatically align anything with size 8 or more to
+            # an 8-byte boundary in BSS. Is this correct?
+            if size >= 8:
+                align = 8
+
+            bss_variables.append(BssVariable(block_number, name, size, align))
+        elif op.opcode_name == "ent":
+            last_function_name = symbol_table[op.i1].name
+
+    bss_variables.sort(key=lambda var: var.block_number)
+    return bss_variables
+
+
+# Predict offsets of BSS variables in the build.
+def predict_bss_ordering(variables: list[BssVariable]) -> list[BssSymbol]:
+    bss_symbols = []
+    offset = 0
+    # Sort by block number mod 256 (for ties, the original order is preserved)
+    for var in sorted(variables, key=lambda var: var.block_number % 256):
+        size = var.size
+        align = var.align
+        offset = (offset + align - 1) & ~(align - 1)
+        bss_symbols.append(BssSymbol(var.name, offset, size, align))
+        offset += size
+    return bss_symbols
+
+
+# Match up BSS variables between the baserom and the build using the pointers from relocations.
+# Note that we may not be able to match all variables if a variable is not referenced by any pointer.
+def determine_base_bss_ordering(
+    build_bss_symbols: list[BssSymbol], pointers: list[Pointer]
+) -> list[BssSymbol]:
+    # Assume that the lowest address is the start of the BSS section
+    base_section_start = min(p.base_value for p in pointers)
+    build_section_start = min(p.build_value for p in pointers)
+
+    found_symbols: dict[str, BssSymbol] = {}
+    for p in pointers:
+        base_offset = p.base_value - base_section_start
+        build_offset = p.build_value - build_section_start
+
+        new_symbol = None
+        new_offset = 0
+        for symbol in build_bss_symbols:
+            if (
+                symbol.offset <= build_offset
+                and build_offset < symbol.offset + symbol.size
+            ):
+                new_symbol = symbol
+                new_offset = base_offset - (build_offset - symbol.offset)
+                break
+
+        if new_symbol is None:
+            if p.addend > 0:
+                addend_str = f"+0x{p.addend:X}"
+            elif p.addend < 0:
+                addend_str = f"-0x{-p.addend:X}"
+            else:
+                addend_str = ""
+            raise FixBssException(
+                f"Could not find BSS symbol for pointer {p.name}{addend_str} "
+                f"(base address 0x{p.base_value:08X}, build address 0x{p.build_value:08X})"
+            )
+
+        if new_symbol.name in found_symbols:
+            # Sanity check that offsets agree
+            existing_offset = found_symbols[new_symbol.name].offset
+            if new_offset != existing_offset:
+                raise FixBssException(
+                    f"BSS symbol {new_symbol.name} found at conflicting offsets in this baserom "
+                    f"(0x{existing_offset:04X} and 0x{new_offset:04X}). Is the build up-to-date?"
+                )
+        else:
+            found_symbols[new_symbol.name] = BssSymbol(
+                new_symbol.name, new_offset, new_symbol.size, new_symbol.align
+            )
+
+    return list(sorted(found_symbols.values(), key=lambda symbol: symbol.offset))
+
+
+# Generate a sequence of integers in the range [0, 256) with a 2-adic valuation of exactly `nu`.
+# The 2-adic valuation of an integer n is the largest k such that 2^k divides n
+# (see https://en.wikipedia.org/wiki/P-adic_valuation), and for convenience we define
+# the 2-adic valuation of 0 to be 8. Here's what the sequences look like for nu = 0..8:
+#   8: 0
+#   7: 128
+#   6: 64, 192
+#   5: 32, 96, 160, 224
+#   4: 16, 48, 80, 112, ...
+#   3: 8, 24, 40, 56, ...
+#   2: 4, 12, 20, 28, ...
+#   1: 2, 6, 10, 14, ...
+#   0: 1, 3, 5, 7, ...
+def gen_seq(nu: int) -> Iterator[int]:
+    if nu == 8:
+        yield 0
+    else:
+        for i in range(1 << (7 - nu)):
+            yield (2 * i + 1) * (1 << nu)
+
+
+# Yields all n-tuples of integers in the range [0, 256) with minimum 2-adic valuation
+# of exactly `min_nu`.
+def gen_candidates_impl(n: int, min_nu: int) -> Iterator[tuple[int, ...]]:
+    if n == 1:
+        for n in gen_seq(min_nu):
+            yield (n,)
+    else:
+        # (a, *b) has min 2-adic valuation = min_nu if and only if either:
+        #   a has 2-adic valuation >  min_nu and b has min 2-adic valuation == min_nu
+        #   a has 2-adic valuation == min_nu and b has min 2-adic valuation >= min_nu
+        for min_nu_a in reversed(range(min_nu + 1, 9)):
+            for a in gen_seq(min_nu_a):
+                for b in gen_candidates_impl(n - 1, min_nu):
+                    yield (a, *b)
+        for a in gen_seq(min_nu):
+            for min_nu_b in reversed(range(min_nu, 9)):
+                for b in gen_candidates_impl(n - 1, min_nu_b):
+                    yield (a, *b)
+
+
+# Yields all n-tuples of integers in the range [0, 256), ordered by descending minimum
+# 2-adic valuation of the elements in the tuple. For example, for n = 2 the sequence is:
+#   (0, 0), (0, 128), (128, 0), (128, 128), (0, 64), (0, 192), (128, 64), (128, 192), ...
+def gen_candidates(n: int) -> Iterator[tuple[int, ...]]:
+    for nu in reversed(range(9)):
+        yield from gen_candidates_impl(n, nu)
+
+
+# Determine a new set of increment_block_number pragmas that will fix the BSS ordering.
+def solve_bss_ordering(
+    pragmas: list[Pragma],
+    bss_variables: list[BssVariable],
+    base_bss_symbols: list[BssSymbol],
+) -> list[Pragma]:
+    base_symbols_by_name = {symbol.name: symbol for symbol in base_bss_symbols}
+
+    # Our "algorithm" just tries all possible combinations of increment_block_number amounts,
+    # which can get very slow with more than a few pragmas. But, we order the candidates in a
+    # binary-search-esque way to try to find a solution faster.
+    for new_amounts in gen_candidates(len(pragmas)):
+        # Generate new block numbers
+        new_bss_variables = []
+        for var in bss_variables:
+            new_block_number = var.block_number
+            for pragma, new_amount in zip(pragmas, new_amounts):
+                if var.block_number >= pragma.block_number:
+                    new_block_number += new_amount - pragma.amount
+            new_bss_variables.append(
+                BssVariable(new_block_number, var.name, var.size, var.align)
+            )
+
+        # Predict new BSS and check if new ordering matches
+        new_bss_symbols = predict_bss_ordering(new_bss_variables)
+
+        bss_ordering_matches = True
+        for symbol in new_bss_symbols:
+            base_symbol = base_symbols_by_name.get(symbol.name)
+            if base_symbol is None:
+                continue
+            if symbol.offset != base_symbol.offset:
+                bss_ordering_matches = False
+                break
+
+        if bss_ordering_matches:
+            new_pragmas = []
+            for pragma, new_amount in zip(pragmas, new_amounts):
+                new_pragmas.append(
+                    Pragma(pragma.line_number, pragma.block_number, new_amount)
+                )
+            return new_pragmas
+
+    raise FixBssException("Could not find any solutions")
+
+
+def update_source_file(version_to_update: str, file: Path, new_pragmas: list[Pragma]):
+    with open(file, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    for pragma in new_pragmas:
+        line = lines[pragma.line_number - 1]
+        if not line.startswith("#pragma increment_block_number "):
+            raise FixBssException(
+                f"Expected #pragma increment_block_number on line {pragma.line_number}"
+            )
+
+        # Grab pragma argument and remove quotes
+        arg = line.strip()[len("#pragma increment_block_number ") + 1 : -1]
+
+        amounts_by_version = {}
+        for part in arg.split():
+            version, amount_str = part.split(":")
+            amounts_by_version[version] = int(amount_str)
+
+        amounts_by_version[version_to_update] = pragma.amount
+        new_arg = " ".join(
+            f"{version}:{amount}" for version, amount in amounts_by_version.items()
+        )
+        new_line = f'#pragma increment_block_number "{new_arg}"\n'
+
+        lines[pragma.line_number - 1] = new_line
+
+    with open(file, "w", encoding="utf-8") as f:
+        f.writelines(lines)
+
+
+def process_file(
+    file: Path,
+    pointers: list[Pointer],
+    make_log: list[str],
+    dry_run: bool,
+    version: str,
+):
+    print(f"{colorama.Fore.CYAN}Processing {file} ...{colorama.Fore.RESET}")
+
+    command_line = find_compiler_command_line(make_log, file)
+    if command_line is None:
+        raise FixBssException(f"Could not determine compiler command line for {file}")
+
+    print(f"Compiler command: {shlex.join(command_line)}")
+    symbol_table, ucode = run_cfe(command_line, keep_files=False)
+
+    bss_variables = find_bss_variables(symbol_table, ucode)
+    print("BSS variables:")
+    for var in bss_variables:
+        i = var.block_number
+        print(
+            f"  {i:>6} [{i%256:>3}]: size=0x{var.size:04X} align=0x{var.align:X} {var.name}"
+        )
+
+    build_bss_symbols = predict_bss_ordering(bss_variables)
+    print("Current build BSS ordering:")
+    for symbol in build_bss_symbols:
+        print(
+            f"  offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}"
+        )
+
+    if not pointers:
+        raise FixBssException(f"No pointers to BSS found in ROM for {file}")
+
+    base_bss_symbols = determine_base_bss_ordering(build_bss_symbols, pointers)
+    print("Baserom BSS ordering:")
+    for symbol in base_bss_symbols:
+        print(
+            f"  offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}"
+        )
+
+    pragmas = find_pragmas(symbol_table)
+    max_pragmas = 3
+    if not pragmas:
+        raise FixBssException(f"No increment_block_number pragmas found in {file}")
+    elif len(pragmas) > max_pragmas:
+        raise FixBssException(
+            f"Too many increment_block_number pragmas found in {file} (found {len(pragmas)}, max {max_pragmas})"
+        )
+
+    print("Solving BSS ordering ...")
+    new_pragmas = solve_bss_ordering(pragmas, bss_variables, base_bss_symbols)
+    print("New increment_block_number amounts:")
+    for pragma in new_pragmas:
+        print(f"  line {pragma.line_number}: {pragma.amount}")
+
+    if not dry_run:
+        update_source_file(version, file, new_pragmas)
+        print(f"{colorama.Fore.GREEN}Updated {file}{colorama.Fore.RESET}")
+
+
+def process_file_worker(*x):
+    # Collect output in a buffer to avoid interleaving output when processing multiple files
+    old_stdout = sys.stdout
+    fake_stdout = io.StringIO()
+    try:
+        sys.stdout = fake_stdout
+        process_file(*x)
+    except Exception as e:
+        print(f"{colorama.Fore.RED}Error: {str(e)}{colorama.Fore.RESET}")
+        raise
+    finally:
+        sys.stdout = old_stdout
+        print()
+        print(fake_stdout.getvalue(), end="")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Automatically fix BSS ordering by editing increment_block_number pragmas. "
+        "Assumes that the build is up-to-date and that only differences between the baserom and "
+        "the current build are due to BSS ordering."
+    )
+    parser.add_argument(
+        "--oot-version",
+        "-v",
+        type=str,
+        required=True,
+        help="OOT version",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print changes instead of editing source files",
+    )
+    parser.add_argument(
+        "files",
+        metavar="FILE",
+        nargs="*",
+        type=Path,
+        help="Fix BSS ordering for a particular C file (default: all files with BSS differences)",
+    )
+
+    args = parser.parse_args()
+    version = args.oot_version
+
+    pointers_by_file = compare_pointers(version)
+
+    files_with_reordering = []
+    for file, pointers in pointers_by_file.items():
+        # Try to detect if the section is shifted by comparing the lowest
+        # address among any pointer into the section between base and build
+        base_min_address = min(p.base_value for p in pointers)
+        build_min_address = min(p.build_value for p in pointers)
+        if not all(
+            p.build_value - build_min_address == p.base_value - base_min_address
+            for p in pointers
+        ):
+            files_with_reordering.append(file)
+
+    if files_with_reordering:
+        print("Files with BSS reordering:")
+        for file in files_with_reordering:
+            print(f"  {file}")
+    else:
+        print("No BSS reordering found.")
+
+    if args.files:
+        files_to_fix = args.files
+    else:
+        files_to_fix = files_with_reordering
+    if not files_to_fix:
+        return
+
+    print(f"Running make to find compiler command line ...")
+    make_log = generate_make_log(version)
+
+    with multiprocessing.Pool() as p:
+        file_results = []
+        for file in files_to_fix:
+            file_result = p.apply_async(
+                process_file_worker,
+                (
+                    file,
+                    pointers_by_file.get(file, []),
+                    make_log,
+                    args.dry_run,
+                    version,
+                ),
+            )
+            file_results.append(file_result)
+
+        # Wait until all files are done
+        while not all(file_result.ready() for file_result in file_results):
+            time.sleep(0.010)
+
+        # Collect results and check for errors
+        num_successes = sum(file_result.successful() for file_result in file_results)
+        if num_successes == len(file_results):
+            print()
+            print(f"Updated {num_successes}/{len(file_results)} files.")
+        else:
+            print()
+            print(
+                f"{colorama.Fore.RED}Updated {num_successes}/{len(file_results)} files.{colorama.Fore.RESET}"
+            )
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/ido_block_numbers.py
+++ b/tools/ido_block_numbers.py
@ -14,14 +14,20 @@
 #   https://github.com/decompals/ultralib/blob/main/tools/mdebug.py
 #   https://www.cs.unibo.it/~solmi/teaching/arch_2002-2003/AssemblyLanguageProgDoc.pdf
 #   https://github.com/decompals/IDO/blob/main/IDO_7.1/dist/compiler_eoe/usr/include/sym.h
+#   https://github.com/Synray/ido-ucode-utils
+
+from __future__ import annotations

 import argparse
+from dataclasses import dataclass
 import itertools
 from pathlib import Path
 import platform
 import struct
 import subprocess
+import shlex
 import sys
+from typing import Optional, Tuple


 class Header:
@ -157,10 +163,15 @@ def read_string(data, start):
    return data[start : start + size].decode("ascii")


-def print_symbol_table(data):
-    header = Header(data[0 : Header.SIZE])
+@dataclass
+class SymbolTableEntry:
+    symbol: Optional[Symbol]
+    name: str
+    extern: bool

-    print(f"block [mod 256]: linkage  type        class      name")
+
+def parse_symbol_table(data: bytes) -> list[SymbolTableEntry]:
+    header = Header(data[0 : Header.SIZE])

    # File descriptors
    fds = []
@ -170,15 +181,14 @@ def print_symbol_table(data):
        )

    # Symbol identifiers ("dense numbers")
+    entries = []
    for i in range(header.idnMax):
        ifd, isym = struct.unpack(">II", read_entry(data, header.cbDnOffset, i, 8))

        if isym == 0xFFFFF:
-            # TODO: is this always a string?
-            extern = False
+            sym = None
            sym_name = ""
-            st = "string"
-            sc = ""
+            extern = False
        else:
            extern = ifd == 0x7FFFFFFF
            if extern:
@ -195,15 +205,251 @@ def print_symbol_table(data):
                    )
                )
                sym_name = read_string(data, header.cbSsOffset + fd.issBase + sym.iss)
-            st = sym.symbol_type()
-            sc = sym.symbol_storage_class()

+        entries.append(SymbolTableEntry(sym, sym_name, extern))
+
+    return entries
+
+
+def print_symbol_table(symbol_table: list[SymbolTableEntry]):
+    print(f"block [mod 256]: linkage  type        class      name")
+    for i, entry in enumerate(symbol_table):
+        if not entry.symbol:
+            # TODO: is this always a string?
+            st = "string"
+            sc = ""
+        else:
+            st = entry.symbol.symbol_type()
+            sc = entry.symbol.symbol_storage_class()
        print(
-            f'{i:>9} [{i%256:>3}]: {"extern" if extern else "":<7}  {st:<10}  {sc:<9}  {sym_name:<40}'
+            f'{i:>9} [{i%256:>3}]: {"extern" if entry.extern else "":<7}  {st:<10}  {sc:<9}  {entry.name:<40}'
        )


-def find_compiler_command_line(filename, oot_version):
+@dataclass
+class UcodeOp:
+    opcode: int
+    opcode_name: str
+    mtype: int
+    dtype: int
+    lexlev: int
+    i1: int
+    args: list[int]
+    string: Optional[bytes]
+
+
+@dataclass
+class UcodeOpInfo:
+    opcode: int
+    name: str
+    length: int
+    has_const: bool
+
+
+UCODE_OP_INFO = [
+    UcodeOpInfo(0x00, "abs", 2, False),
+    UcodeOpInfo(0x01, "add", 2, False),
+    UcodeOpInfo(0x02, "adj", 4, False),
+    UcodeOpInfo(0x03, "aent", 4, False),
+    UcodeOpInfo(0x04, "and", 2, False),
+    UcodeOpInfo(0x05, "aos", 2, False),
+    UcodeOpInfo(0x06, "asym", 4, False),
+    UcodeOpInfo(0x07, "bgn", 4, False),
+    UcodeOpInfo(0x08, "bgnb", 2, False),
+    UcodeOpInfo(0x09, "bsub", 2, False),
+    UcodeOpInfo(0x0A, "cg1", 2, False),
+    UcodeOpInfo(0x0B, "cg2", 2, False),
+    UcodeOpInfo(0x0C, "chkh", 2, False),
+    UcodeOpInfo(0x0D, "chkl", 2, False),
+    UcodeOpInfo(0x0E, "chkn", 2, False),
+    UcodeOpInfo(0x0F, "chkt", 2, False),
+    UcodeOpInfo(0x10, "cia", 4, True),
+    UcodeOpInfo(0x11, "clab", 4, False),
+    UcodeOpInfo(0x12, "clbd", 2, False),
+    UcodeOpInfo(0x13, "comm", 4, True),
+    UcodeOpInfo(0x14, "csym", 4, False),
+    UcodeOpInfo(0x15, "ctrl", 4, False),
+    UcodeOpInfo(0x16, "cubd", 2, False),
+    UcodeOpInfo(0x17, "cup", 4, False),
+    UcodeOpInfo(0x18, "cvt", 4, False),
+    UcodeOpInfo(0x19, "cvtl", 2, False),
+    UcodeOpInfo(0x1A, "dec", 2, False),
+    UcodeOpInfo(0x1B, "def", 4, False),
+    UcodeOpInfo(0x1C, "dif", 4, False),
+    UcodeOpInfo(0x1D, "div", 2, False),
+    UcodeOpInfo(0x1E, "dup", 2, False),
+    UcodeOpInfo(0x1F, "end", 2, False),
+    UcodeOpInfo(0x20, "endb", 2, False),
+    UcodeOpInfo(0x21, "ent", 4, False),
+    UcodeOpInfo(0x22, "ueof", 2, False),
+    UcodeOpInfo(0x23, "equ", 2, False),
+    UcodeOpInfo(0x24, "esym", 4, False),
+    UcodeOpInfo(0x25, "fill", 4, False),
+    UcodeOpInfo(0x26, "fjp", 2, False),
+    UcodeOpInfo(0x27, "fsym", 4, False),
+    UcodeOpInfo(0x28, "geq", 2, False),
+    UcodeOpInfo(0x29, "grt", 2, False),
+    UcodeOpInfo(0x2A, "gsym", 4, False),
+    UcodeOpInfo(0x2B, "hsym", 4, False),
+    UcodeOpInfo(0x2C, "icuf", 4, False),
+    UcodeOpInfo(0x2D, "idx", 2, False),
+    UcodeOpInfo(0x2E, "iequ", 4, False),
+    UcodeOpInfo(0x2F, "igeq", 4, False),
+    UcodeOpInfo(0x30, "igrt", 4, False),
+    UcodeOpInfo(0x31, "ijp", 2, False),
+    UcodeOpInfo(0x32, "ilda", 6, False),
+    UcodeOpInfo(0x33, "ildv", 4, False),
+    UcodeOpInfo(0x34, "ileq", 4, False),
+    UcodeOpInfo(0x35, "iles", 4, False),
+    UcodeOpInfo(0x36, "ilod", 4, False),
+    UcodeOpInfo(0x37, "inc", 2, False),
+    UcodeOpInfo(0x38, "ineq", 4, False),
+    UcodeOpInfo(0x39, "init", 6, True),
+    UcodeOpInfo(0x3A, "inn", 4, False),
+    UcodeOpInfo(0x3B, "int", 4, False),
+    UcodeOpInfo(0x3C, "ior", 2, False),
+    UcodeOpInfo(0x3D, "isld", 4, False),
+    UcodeOpInfo(0x3E, "isst", 4, False),
+    UcodeOpInfo(0x3F, "istr", 4, False),
+    UcodeOpInfo(0x40, "istv", 4, False),
+    UcodeOpInfo(0x41, "ixa", 2, False),
+    UcodeOpInfo(0x42, "lab", 4, False),
+    UcodeOpInfo(0x43, "lbd", 2, False),
+    UcodeOpInfo(0x44, "lbdy", 2, False),
+    UcodeOpInfo(0x45, "lbgn", 2, False),
+    UcodeOpInfo(0x46, "lca", 4, True),
+    UcodeOpInfo(0x47, "lda", 6, False),
+    UcodeOpInfo(0x48, "ldap", 2, False),
+    UcodeOpInfo(0x49, "ldc", 4, True),
+    UcodeOpInfo(0x4A, "ldef", 4, False),
+    UcodeOpInfo(0x4B, "ldsp", 2, False),
+    UcodeOpInfo(0x4C, "lend", 2, False),
+    UcodeOpInfo(0x4D, "leq", 2, False),
+    UcodeOpInfo(0x4E, "les", 2, False),
+    UcodeOpInfo(0x4F, "lex", 2, False),
+    UcodeOpInfo(0x50, "lnot", 2, False),
+    UcodeOpInfo(0x51, "loc", 2, False),
+    UcodeOpInfo(0x52, "lod", 4, False),
+    UcodeOpInfo(0x53, "lsym", 4, False),
+    UcodeOpInfo(0x54, "ltrm", 2, False),
+    UcodeOpInfo(0x55, "max", 2, False),
+    UcodeOpInfo(0x56, "min", 2, False),
+    UcodeOpInfo(0x57, "mod", 2, False),
+    UcodeOpInfo(0x58, "mov", 4, False),
+    UcodeOpInfo(0x59, "movv", 2, False),
+    UcodeOpInfo(0x5A, "mpmv", 4, False),
+    UcodeOpInfo(0x5B, "mpy", 2, False),
+    UcodeOpInfo(0x5C, "mst", 2, False),
+    UcodeOpInfo(0x5D, "mus", 4, False),
+    UcodeOpInfo(0x5E, "neg", 2, False),
+    UcodeOpInfo(0x5F, "neq", 2, False),
+    UcodeOpInfo(0x60, "nop", 2, False),
+    UcodeOpInfo(0x61, "not", 2, False),
+    UcodeOpInfo(0x62, "odd", 2, False),
+    UcodeOpInfo(0x63, "optn", 4, False),
+    UcodeOpInfo(0x64, "par", 4, False),
+    UcodeOpInfo(0x65, "pdef", 4, False),
+    UcodeOpInfo(0x66, "pmov", 4, False),
+    UcodeOpInfo(0x67, "pop", 2, False),
+    UcodeOpInfo(0x68, "regs", 4, False),
+    UcodeOpInfo(0x69, "rem", 2, False),
+    UcodeOpInfo(0x6A, "ret", 2, False),
+    UcodeOpInfo(0x6B, "rlda", 4, False),
+    UcodeOpInfo(0x6C, "rldc", 4, True),
+    UcodeOpInfo(0x6D, "rlod", 4, False),
+    UcodeOpInfo(0x6E, "rnd", 4, False),
+    UcodeOpInfo(0x6F, "rpar", 4, False),
+    UcodeOpInfo(0x70, "rstr", 4, False),
+    UcodeOpInfo(0x71, "sdef", 4, False),
+    UcodeOpInfo(0x72, "sgs", 4, False),
+    UcodeOpInfo(0x73, "shl", 2, False),
+    UcodeOpInfo(0x74, "shr", 2, False),
+    UcodeOpInfo(0x75, "sign", 2, False),
+    UcodeOpInfo(0x76, "sqr", 2, False),
+    UcodeOpInfo(0x77, "sqrt", 2, False),
+    UcodeOpInfo(0x78, "ssym", 4, True),
+    UcodeOpInfo(0x79, "step", 2, False),
+    UcodeOpInfo(0x7A, "stp", 2, False),
+    UcodeOpInfo(0x7B, "str", 4, False),
+    UcodeOpInfo(0x7C, "stsp", 2, False),
+    UcodeOpInfo(0x7D, "sub", 2, False),
+    UcodeOpInfo(0x7E, "swp", 4, False),
+    UcodeOpInfo(0x7F, "tjp", 2, False),
+    UcodeOpInfo(0x80, "tpeq", 2, False),
+    UcodeOpInfo(0x81, "tpge", 2, False),
+    UcodeOpInfo(0x82, "tpgt", 2, False),
+    UcodeOpInfo(0x83, "tple", 2, False),
+    UcodeOpInfo(0x84, "tplt", 2, False),
+    UcodeOpInfo(0x85, "tpne", 2, False),
+    UcodeOpInfo(0x86, "typ", 4, False),
+    UcodeOpInfo(0x87, "ubd", 2, False),
+    UcodeOpInfo(0x88, "ujp", 2, False),
+    UcodeOpInfo(0x89, "unal", 2, False),
+    UcodeOpInfo(0x8A, "uni", 4, False),
+    UcodeOpInfo(0x8B, "vreg", 4, False),
+    UcodeOpInfo(0x8C, "xjp", 8, False),
+    UcodeOpInfo(0x8D, "xor", 2, False),
+    UcodeOpInfo(0x8E, "xpar", 2, False),
+    UcodeOpInfo(0x8F, "mtag", 2, False),
+    UcodeOpInfo(0x90, "alia", 2, False),
+    UcodeOpInfo(0x91, "ildi", 4, False),
+    UcodeOpInfo(0x92, "isti", 4, False),
+    UcodeOpInfo(0x93, "irld", 4, False),
+    UcodeOpInfo(0x94, "irst", 4, False),
+    UcodeOpInfo(0x95, "ldrc", 4, False),
+    UcodeOpInfo(0x96, "msym", 4, False),
+    UcodeOpInfo(0x97, "rcuf", 4, False),
+    UcodeOpInfo(0x98, "ksym", 4, False),
+    UcodeOpInfo(0x99, "osym", 4, False),
+    UcodeOpInfo(0x9A, "irlv", 2, False),
+    UcodeOpInfo(0x9B, "irsv", 2, False),
+]
+
+
+def parse_ucode(ucode: bytes) -> list[UcodeOp]:
+    ops = []
+    pos = 0
+    while pos < len(ucode):
+        opcode = ucode[pos]
+        mtype = ucode[pos + 1] >> 5
+        dtype = ucode[pos + 1] & 0x1F
+        lexlev = int.from_bytes(ucode[pos + 2 : pos + 4], "big")
+        i1 = int.from_bytes(ucode[pos + 4 : pos + 8], "big")
+        pos += 8
+
+        info = UCODE_OP_INFO[opcode]
+        size = 4 * info.length
+
+        args = []
+        for _ in range(info.length - 2):
+            args.append(int.from_bytes(ucode[pos : pos + 4], "big"))
+            pos += 4
+
+        string = None
+        if info.has_const:
+            string_length = int.from_bytes(ucode[pos : pos + 4], "big")
+            pos += 8
+            if dtype in (9, 12, 13, 14, 16) or info.name == "comm":
+                string = ucode[pos : pos + string_length]
+                pos += (string_length + 7) & ~7
+
+        ops.append(UcodeOp(opcode, info.name, mtype, dtype, lexlev, i1, args, string))
+    return ops
+
+
+def print_ucode(ucode: list[UcodeOp]):
+    for op in ucode:
+        args = " ".join(f"0x{arg:X}" for arg in op.args)
+        print(
+            f"{op.opcode_name:<4} mtype={op.mtype:X} dtype={op.dtype:X} lexlev={op.lexlev} i1={op.i1} args={args}",
+            end="",
+        )
+        if op.string is not None:
+            print(f" string={op.string!r}", end="")
+        print()
+
+
+def generate_make_log(oot_version: str) -> list[str]:
    is_macos = platform.system() == "Darwin"
    make = "gmake" if is_macos else "make"
    make_command_line = [
@ -212,30 +458,28 @@ def find_compiler_command_line(filename, oot_version):
        "--dry-run",
        f"VERSION={oot_version}",
    ]
+    return subprocess.check_output(make_command_line).decode("utf-8").splitlines()

-    print(f"Running {make} to find compiler command line ...", file=sys.stderr)
-    make_output = (
-        subprocess.check_output(make_command_line).decode("utf-8").splitlines()
-    )

+def find_compiler_command_line(
+    make_log: list[str], filename: Path
+) -> Optional[list[str]]:
    found = 0
-    for line in make_output:
+    for line in make_log:
        parts = line.split()
        if "-o" in parts and str(filename) in parts:
            compiler_command_line = parts
            found += 1

    if found != 1:
-        print(
-            f"Could not determine compiler command line for {filename}", file=sys.stderr
-        )
-        sys.exit(1)
+        return None

-    print(f'Command line: {" ".join(compiler_command_line)}', file=sys.stderr)
    return compiler_command_line


-def generate_symbol_table(command_line):
+def run_cfe(
+    command_line: list[str], keep_files: bool
+) -> Tuple[list[SymbolTableEntry], list[UcodeOp]]:
    # Assume command line is of the form:
    # python3 tools/preprocess.py [COMPILER] [COMPILER_ARGS] [INPUT_FILE]
    input_file = Path(command_line[-1])
@ -251,11 +495,14 @@ def generate_symbol_table(command_line):
        subprocess.run(rest + ["-Hf", input_file], check=True)

        # Read symbol table
-        return symbol_table_file.read_bytes()
+        symbol_table = parse_symbol_table(symbol_table_file.read_bytes())
+        ucode = parse_ucode(ucode_file.read_bytes())
+        return (symbol_table, ucode)
    finally:
        # Cleanup
-        symbol_table_file.unlink(missing_ok=True)
-        ucode_file.unlink(missing_ok=True)
+        if not keep_files:
+            symbol_table_file.unlink(missing_ok=True)
+            ucode_file.unlink(missing_ok=True)


 def main():
@ -270,12 +517,33 @@ def main():
        default="gc-eu-mq-dbg",
        help="OOT version (default: gc-eu-mq-dbg)",
    )
+    parser.add_argument(
+        "--print-ucode", action="store_true", help="Print cfe ucode output"
+    )
+    parser.add_argument(
+        "--keep-files",
+        action="store_true",
+        help="Keep temporary files (symbol table and ucode)",
+    )

    args = parser.parse_args()

-    command_line = find_compiler_command_line(args.filename, args.oot_version)
-    data = generate_symbol_table(command_line)
-    print_symbol_table(data)
+    print(f"Running make to find compiler command line ...", file=sys.stderr)
+    make_log = generate_make_log(args.oot_version)
+
+    command_line = find_compiler_command_line(make_log, args.filename)
+    if command_line is None:
+        print(
+            f"Error: could not determine compiler command line for {filename}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    print(f"Compiler command: {shlex.join(compiler_command_line)}", file=sys.stderr)
+
+    symbol_table, ucode = run_cfe(command_line, args.keep_files)
+    print_symbol_table(symbol_table)
+    if args.print_ucode:
+        print_ucode(ucode)


 if __name__ == "__main__":
--- a/tools/preprocess.py
+++ b/tools/preprocess.py
@ -3,13 +3,13 @@
 # SPDX-FileCopyrightText: © 2024 ZeldaRET
 # SPDX-License-Identifier: CC0-1.0

-# Usage: preprocess.py [compile command minus input file...] [single input file]
+# Usage: preprocess.py [flags] -- [compile command minus input file...] [single input file]
 # Preprocess a C file to:
 # * Re-encode from UTF-8 to EUC-JP (the repo uses UTF-8 for text encoding, but
 #   the strings in the ROM are encoded in EUC-JP)
-# * Replace `#pragma increment_block_number N` with `N` fake structs for
-#   controlling BSS ordering
+# * Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering

+import argparse
 from pathlib import Path
 import os
 import tempfile
@ -22,39 +22,59 @@ def fail(message):
    sys.exit(1)


-def process_file(filename, input, output):
+def process_file(version, filename, input, output):
    output.write(f'#line 1 "{filename}"\n')
    for i, line in enumerate(input, start=1):
-        if line.startswith("#pragma increment_block_number"):
-            parts = line.split()
-            if len(parts) != 3:
-                fail(
-                    f"{filename}:{i}: increment_block_number must be followed by an integer"
-                )
-            try:
-                amount = int(parts[2])
-            except ValueError:
-                fail(
-                    f"{filename}:{i}: increment_block_number must be followed by an integer"
-                )
+        if line.startswith("#pragma increment_block_number "):
+            # Grab pragma argument and remove quotes
+            arg = line.strip()[len("#pragma increment_block_number ") + 1 : -1]
+            amount = 0
+            for part in arg.split():
+                kv = part.split(":")
+                if len(kv) != 2:
+                    fail(
+                        f"{filename}:{i}: increment_block_number must be followed by a list of version:amount pairs"
+                    )
+                if kv[0] != version:
+                    continue
+                try:
+                    amount = int(kv[1])
+                except ValueError:
+                    fail(
+                        f"{filename}:{i}: increment_block_number amount must be an integer"
+                    )
+
+            # Always generate at least one struct so that fix_bss.py can know where the increment_block_number pragmas are
+            if amount == 0:
+                amount = 256
+
            # Write fake structs for BSS ordering
            for j in range(amount):
-                output.write(f"struct DummyStruct_{i:05}_{j:03};\n")
+                output.write(f"struct increment_block_number_{i:05}_{j:03};\n")
            output.write(f'#line {i + 1} "{filename}"\n')
        else:
            output.write(line)


 def main():
-    filename = Path(sys.argv[-1])
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-v", "--oot-version", help="Which version should be processed")
+    parser.add_argument(
+        "args",
+        nargs="+",
+    )
+
+    args = parser.parse_args()
+
+    filename = Path(args.args[-1])
    with tempfile.TemporaryDirectory(prefix="oot_") as tmpdir:
        tmpfile = Path(tmpdir) / filename.name

        with open(filename, mode="r", encoding="utf-8") as input:
            with open(tmpfile, mode="w", encoding="euc-jp") as output:
-                process_file(filename, input, output)
+                process_file(args.oot_version, filename, input, output)

-        compile_command = sys.argv[1:-1] + ["-I", filename.parent, tmpfile]
+        compile_command = args.args[:-1] + ["-I", filename.parent, tmpfile]
        process = subprocess.run(compile_command)
        return process.returncode