Incremental DLL decomp (#86)

* [WIP] Initial support for compiling DLLs from extracted ASM files

* Incremental DLL decomp

* Fix dll_split.py only working for DLL 121...

* Fix DLL section size calculations

* More DLL tool fixes and tweaks

* Optimize DLL parsing

* Add verbose and quiet flags to dll_split.py

* Fix executable bit for dll_split.py and dlltab.py

* Misc tool fixes

* Fix remaining issues with DLL splitting/recomp

* Add dllimports.py

* Let DLLs have optional custom linker scripts

* Make final ROM ELF link dependencies implicit

* Match DLL 82

* Clean and fixup dllimports.py

* Use existing syms.txt names when splitting DLLs

* Convert $gp initializer when parsing DLLs

* Update files under 'expected/' if the source file changed

* When auto-generating DLL syms.txt, emit initial function symbols

* Match DLL 83

* Fix .text relocations being out of order due to asm_processor

* Change auto generated symbol names for DLL code

* Nearly match DLL 223 (CannonClaw)
This commit is contained in:
Ethan Lafrenais 2022-04-24 00:29:21 -04:00 committed by GitHub
parent fab8756a61
commit 37ddaccf9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 1326 additions and 305 deletions

2
.gitmodules vendored
View File

@ -4,7 +4,7 @@
branch = master
[submodule "tools/asm_processor"]
path = tools/asm_processor
url = https://github.com/simonlindholm/asm-processor.git
url = https://github.com/Francessco121/asm-processor.git
branch = main
[submodule "tools/asm_differ"]
path = tools/asm_differ

124
dino.py
View File

@ -13,6 +13,7 @@ TARGET = "dino"
SCRIPT_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
ASM_PATH = SCRIPT_DIR.joinpath("asm/")
BIN_PATH = SCRIPT_DIR.joinpath("bin/")
SRC_PATH = SCRIPT_DIR.joinpath("src/")
BUILD_PATH = SCRIPT_DIR.joinpath("build/")
EXPECTED_PATH = SCRIPT_DIR.joinpath("expected/")
TOOLS_PATH = SCRIPT_DIR.joinpath("tools/")
@ -40,6 +41,7 @@ BUILD_ARTIFACTS = [
SPLIT_PY = TOOLS_PATH.joinpath("splat/split.py")
DINO_DLL_PY = TOOLS_PATH.joinpath("dino_dll.py")
DLL_SPLIT_PY = TOOLS_PATH.joinpath("dll_split.py")
CONFIGURE_PY = TOOLS_PATH.joinpath("configure.py")
DIFF_PY = TOOLS_PATH.joinpath("asm_differ/diff.py")
M2CTX_PY = TOOLS_PATH.joinpath("m2ctx.py")
@ -69,6 +71,7 @@ class DinoCommandRunner:
def extract(self, use_cache: bool):
print("Extracting...")
# If not using cache, clear existing extracted content
if not use_cache:
if ASM_PATH.exists():
if self.verbose:
@ -79,6 +82,7 @@ class DinoCommandRunner:
print(f"rm {BIN_PATH}")
shutil.rmtree(BIN_PATH)
# Run splat
args = [
"python3", str(SPLIT_PY),
"--target", "baserom.z64",
@ -93,6 +97,7 @@ class DinoCommandRunner:
args.append("splat.yaml")
self.__run_cmd(args)
# Unpack DLLs
print()
print("Unpacking DLLs...")
self.__run_cmd([
@ -103,28 +108,29 @@ class DinoCommandRunner:
str(BIN_PATH.joinpath("assets/DLLS_tab.bin"))
])
# Extract DLLs
print()
self.configure(skip_dlls=False)
print("Extracting DLLs...")
self.__extract_dlls()
def configure(self, skip_dlls: bool):
print()
self.configure()
def configure(self):
print("Configuring build script...")
self.__assert_project_built()
args = [
self.__run_cmd([
"python3", str(CONFIGURE_PY),
"--base-dir", str(SCRIPT_DIR),
"--target", TARGET
]
if skip_dlls:
args.append("--skip-dlls")
])
self.__run_cmd(args)
def build(self, configure: bool, force: bool, skip_expected: bool):
def build(self, configure: bool, force: bool, skip_expected: bool, target: "str | None"):
# Configure build script if it's missing
if configure or not BUILD_SCRIPT_PATH.exists():
self.configure(skip_dlls=False)
self.configure()
print()
# If force is given, delete build artifacts first
@ -142,7 +148,10 @@ class DinoCommandRunner:
path.unlink()
# Build
print("Building ROM...")
if target is None:
print("Building ROM...")
else:
print(f"Building {target}...")
args = ["ninja"]
if SCRIPT_DIR != Path.cwd():
@ -150,8 +159,15 @@ class DinoCommandRunner:
if self.verbose:
args.append("-v")
if target is not None:
args.append(target)
self.__run_cmd(args)
# Stop here if the full ROM isn't being built
if target is not None and target != str(BUILD_PATH.joinpath(f"{TARGET}.z64").relative_to(SCRIPT_DIR)):
return
# Verify
print()
self.verify()
@ -171,7 +187,7 @@ class DinoCommandRunner:
except subprocess.CalledProcessError:
print()
print("The 'expected' output directory can only be created from a matching build!")
return
sys.exit(1)
# If force is given, remove any existing files
if force:
@ -186,7 +202,7 @@ class DinoCommandRunner:
to_create: "list[tuple[Path, Path]]" = []
for in_path in obj_paths:
out_path = EXPECTED_PATH.joinpath(in_path)
if not os.path.exists(out_path):
if not os.path.exists(out_path) or os.path.getmtime(in_path) > os.path.getmtime(out_path):
to_create.append((in_path, out_path))
if len(to_create) == 0:
@ -247,6 +263,36 @@ class DinoCommandRunner:
invoked_as = Path(invoked_as).name
print(f"Done! Run '{invoked_as} build' to build the ROM.")
def setup_dll(self, number: int):
src_dir = SRC_PATH.joinpath(f"dlls/{number}")
if src_dir.exists():
print(f"An environment already exists at {src_dir.relative_to(SCRIPT_DIR)}!")
sys.exit(1)
print(f"Creating environment for DLL {number}...")
# Create directory
os.makedirs(src_dir)
# Create DLL config
dll_config_path = src_dir.joinpath(f"{number}.yaml")
with open(dll_config_path, "w", encoding="utf-8") as dll_config_file:
dll_config_file.write("compile: yes\n")
dll_config_file.write("link_original_rodata: yes\n")
dll_config_file.write("link_original_data: yes\n")
dll_config_file.write("link_original_bss: yes\n")
# Extract DLL
print("Extracting DLL...")
self.__extract_dlls([number], quiet=True)
# Re-configure build script
self.configure()
# Done
print()
print(f"Done! Environment created at {src_dir.relative_to(SCRIPT_DIR)}.")
def diff(self, args: "list[str]"):
self.__assert_project_built()
@ -271,29 +317,54 @@ class DinoCommandRunner:
print(">", " ".join(args))
subprocess.check_call(args)
def __extract_dlls(self, dlls: "list[str | int]"=[], quiet: bool=False):
args = [
"python3", str(DLL_SPLIT_PY),
"--base-dir", str(SCRIPT_DIR),
]
if self.verbose:
args.append("--verbose")
if not self.verbose and quiet:
args.append("--quiet")
args.extend([str(dll) for dll in dlls])
self.__run_cmd(args)
def main():
parser = argparse.ArgumentParser(description="Quick commands for working on the Dinosaur Planet decompilation.")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.", default=False)
subparsers = parser.add_subparsers(dest="command", required=True)
subparsers.add_parser("setup", help="Initialize/update Git submodules, verify the base ROM, and extract the ROM.")
setup_dll_cmd = subparsers.add_parser("setup-dll", help="Set up a new environment for decomping a DLL.")
setup_dll_cmd.add_argument("number", type=int, help="The number of the DLL.")
extract_cmd = subparsers.add_parser("extract", help="Split ROM and unpack DLLs.")
extract_cmd.add_argument("--use-cache", action="store_true", dest="use_cache", help="Only split changed segments in splat config.", default=False)
subparsers.add_parser("configure", help="Re-configure the build script.")
build_cmd = subparsers.add_parser("build", help="Build ROM and verify that it matches.")
build_cmd.add_argument("-c", "--configure", action="store_true", help="Re-configure the build script before building.", default=False)
build_cmd.add_argument("-f", "--force", action="store_true", help="Force a full rebuild.", default=False)
build_cmd.add_argument("--no-expected", dest="skip_expected", action="store_true", help="Don't update the 'expected' directory after a matching build.", default=False)
configure_cmd = subparsers.add_parser("configure", help="Re-configure the build script.")
configure_cmd.add_argument("--skip-dlls", dest="skip_dlls", action="store_true", help="Don't recopile DLLs (use original)", default=False)
build_cmd.add_argument("target", nargs="?", help="The target to build. Don\'t specify to build the full ROM.")
build_exp_cmd = subparsers.add_parser("build-expected", help="Update the 'expected' directory for diff. Requires a verified build.")
build_exp_cmd.add_argument("-f", "--force", action="store_true", help="Fully recreate the directory instead of updating it.", default=False)
subparsers.add_parser("verify", help="Verify that the re-built ROM matches the base ROM.")
subparsers.add_parser("baseverify", help="Verify that the base ROM is correct.")
subparsers.add_parser("clean", help="Remove extracted files, build artifacts, and build scripts.")
subparsers.add_parser("submodules", help="Initialize and update Git submodules.")
subparsers.add_parser("diff", help="Diff the re-rebuilt ROM with the original (redirects to asm-differ).", add_help=False)
ctx_cmd = subparsers.add_parser("context", help="Create a context file that can be used for mips2c/decomp.me.")
ctx_cmd.add_argument("file", help="The C file to create context for.")
build_exp_cmd = subparsers.add_parser("build-expected", help="Update the 'expected' directory for diff. Requires a verified build.")
build_exp_cmd.add_argument("-f", "--force", action="store_true", help="Fully recreate the directory instead of updating it.", default=False)
args, _ = parser.parse_known_args()
cmd = args.command
@ -302,12 +373,21 @@ def main():
runner = DinoCommandRunner(args.verbose)
if cmd == "setup":
runner.setup()
elif cmd == "setup-dll":
runner.setup_dll(number=args.number)
elif cmd == "extract":
runner.extract(use_cache=args.use_cache)
elif cmd == "build":
runner.build(configure=args.configure, force=args.force, skip_expected=args.skip_expected)
runner.build(
configure=args.configure,
force=args.force,
skip_expected=args.skip_expected,
target=args.target
)
elif cmd == "build-expected":
runner.create_expected_dir(force=args.force)
elif cmd == "configure":
runner.configure(skip_dlls=args.skip_dlls)
runner.configure()
elif cmd == "verify":
runner.verify()
elif cmd == "baseverify":
@ -322,10 +402,8 @@ def main():
runner.diff(args=full_args)
elif cmd =="context":
runner.make_context(args.file)
elif cmd == "build-expected":
runner.create_expected_dir(force=args.force)
except subprocess.CalledProcessError:
pass
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -4,10 +4,7 @@
#include "ultra64.h"
#include "libc/string.h"
typedef u8 UNK_TYPE_8;
typedef u16 UNK_TYPE_16;
typedef u32 UNK_TYPE_32;
typedef void UNK_PTR;
#include "unktypes.h"
#include "constants.h"
#include "sys/fs.h"

View File

@ -1,6 +1,8 @@
#ifndef _FUNCTIONS_H
#define _FUNCTIONS_H
#include "game/actor/actor.h"
void bootproc(void);
void idle(void * arg);
void mainproc(void * arg);

View File

@ -1,3 +1,14 @@
#ifndef _SYS_GAME_ACTOR_ACTOR_H
#define _SYS_GAME_ACTOR_ACTOR_H
#include <PR/gbi.h>
#include "sys/gfx/model.h"
#include "sys/dll.h"
#include "sys/math.h"
#include "objdata.h"
#include "hitbox.h"
#include "unktypes.h"
/** Actor/Game Object system
*/
@ -7,6 +18,11 @@ struct Vec3_Int{
u32 i; //seems to be a 32-bit bool. (for player pos buffer it's a frame count)
};
typedef struct {
/*0000*/ u8 unk_0x0[0x1a - 0x0];
/*001A*/ s16 unk0x1a;
} ActorUnk0x1a;
typedef struct {
/*0000*/ u8 unk_0x0[0x10 - 0x0];
/*0010*/ u8 *unk_0x10;
@ -45,7 +61,7 @@ typedef struct TActor {
/*0044*/ UNK_TYPE_16 objId; //guessed from SFA
/*0046*/ s16 unk0x46;
/*0048*/ void* ptr0x48;
/*004C*/ UNK_TYPE_32 unk0x4c;
/*004C*/ ActorUnk0x1a *unk0x4c;
/*0050*/ ObjData* data;
/*0054*/ ActorObjhitInfo* objhitInfo;
/*0058*/ UNK_TYPE_32 unk0x58;
@ -76,7 +92,9 @@ typedef struct TActor {
UNK_TYPE_32 unk0xc4;
/*00C8*/ struct TActor *linkedActor2;
/*00CC*/ void* ptr0xcc;
/*00D0*/ u8 unk_0xd0[0xe4 - 0xd0];
/*00D0*/ u8 unk_0xd0[0xdc - 0xd0];
/*00DC*/ s32 unk0xdc;
/*00E0*/ u8 unk_0xe0[4];
} TActor; // size is 0xe4; other actor-related data is placed in the following memory
typedef struct ObjListItem {
@ -96,3 +114,5 @@ extern float inverseDelayMirror; // why the mirrors, if they aren't used?
extern struct TActor * object_pointer_array[]; //first is always player character.
extern u16 objectCount;
extern struct Vec3_Int Vec3_Int_array[];
#endif //_SYS_GAME_ACTOR_ACTOR_H

View File

@ -1,3 +1,6 @@
#ifndef _SYS_GAME_ACTOR_HITBOX_H
#define _SYS_GAME_ACTOR_HITBOX_H
/** Actor hit detection system
*/
@ -7,3 +10,5 @@ typedef struct {
/*005B*/ u8 unk_0x5b[0x9f - 0x5b];
/*009F*/ s8 unk_0x9f;
} ActorObjhitInfo;
#endif //_SYS_GAME_ACTOR_HITBOX_H

View File

@ -1,3 +1,9 @@
#ifndef _SYS_GAME_ACTOR_OBJDATA_H
#define _SYS_GAME_ACTOR_OBJDATA_H
#include "sys/math.h"
#include "unktypes.h"
/** Object (actor) data in OBJECTS.BIN.
*/
@ -155,3 +161,5 @@ typedef struct ObjData { //copied from SFA; may be incorrect
/*90*/ u8 hitbox_flagsB6; // < 0xE = invincible (HitboxFlags62)
/*91*/ char name[11];
} ObjData;
#endif //_SYS_GAME_ACTOR_OBJDATA_H

11
include/unktypes.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef _UNKTYPES_H
#define _UNKTYPES_H
#include "ultra64.h"
typedef u8 UNK_TYPE_8;
typedef u16 UNK_TYPE_16;
typedef u32 UNK_TYPE_32;
typedef void UNK_PTR;
#endif

View File

@ -76,15 +76,15 @@ u32* dll_load(u16 id, u16 exportCount, s32 arg2)
if (id >= 0x8000) {
id -= 0x8000;
// bank2
id += gFile_DLLS_TAB->entries[1].bssSize;
id += gFile_DLLS_TAB->entries[1].bssSize; // bank 2 end
} else if (id >= 0x2000) {
id -= 0x2000;
// bank1
id += gFile_DLLS_TAB->entries[0].bssSize + 1;
id += gFile_DLLS_TAB->entries[0].bssSize + 1; // bank 1 end + 1
} else if (id >= 0x1000) {
id -= 0x1000;
// bank0
id += gFile_DLLS_TAB->entries[0].offset + 1;
id += gFile_DLLS_TAB->entries[0].offset + 1; // bank 0 end + 1
}
// Check if DLL is already loaded, and if so, increment the reference count
@ -282,9 +282,11 @@ void dll_relocate(DLLFile* dll)
target = (u32*)((u8*)dll + dll->code);
// Relocate constructor and destructor to absolute addresses
*(u32*)&dll->ctor += (u32)target;
*(u32*)&dll->dtor += (u32)target;
// Relocate exports to absolute addresses
exports = (u32*)((u8*)dll + sizeof(DLLFile));
for (exportCount = dll->exportCount; exportCount != 0; exportCount--)
{
@ -293,6 +295,7 @@ void dll_relocate(DLLFile* dll)
if (dll->rodata != -1)
{
// Relocate global offset table (GOT)
relocations = (s32*)((u8*)dll + dll->rodata);
currRelocation = relocations;
@ -311,6 +314,7 @@ void dll_relocate(DLLFile* dll)
currRelocation++;
// Relocate $gp initializer
while (*currRelocation != -3)
{
u32* fn = ((u32)*currRelocation / 4) + (tmp_target = target);
@ -322,6 +326,7 @@ void dll_relocate(DLLFile* dll)
currRelocation++;
// Relocate .data
exports = &((u8 *) dll)[dll->data];
target = (u32 *) exports;

4
src/dlls/16/16.yaml Normal file
View File

@ -0,0 +1,4 @@
compile: yes
link_original_rodata: no
link_original_data: no
link_original_bss: no

0
src/dlls/16/syms.txt Normal file
View File

107
src/dlls/223/223.c Normal file
View File

@ -0,0 +1,107 @@
#include <PR/ultratypes.h>
#include "game/actor/actor.h"
typedef struct {
/* 0x0 */ s16 unk0x0;
/* 0x4 */ s32 unk0x4;
/* 0x8 */ s32 unk0x8;
/* 0xC */ f32 unk0xC;
/* 0x10 */ f32 unk0x10;
} UnkDll223Struct1;
typedef union {
void (*func_58)(TActor*,s32,s32);
} UnkDllFuncs1;
typedef struct {
UnkDllFuncs1 *func;
} UnkDllInst;
typedef void (*self_func_1B4)(TActor *a0);
extern f32 delayFloat;
extern void func_800267A4();
extern void draw_actor(TActor*,s32,s32,s32,s32,float);
extern TActor *func_8002394C();
extern s32 func_80023D30(TActor*,s32,f32,s32);
extern s32 func_80024108(TActor*,f32,f32,s32);
extern s32 func_80025F40(TActor*,s32,s32,s32);
s32 mainGetBit(s32);
void mainSetBits(s32, s32);
void dll_cannon_claw_func_1B4(TActor *a0);
void dll_cannon_claw_ctor(void *arg) {
}
void dll_cannon_claw_dtor(void *arg) {
}
void dll_cannon_claw_func_18(UnkDll223Struct1 *a0, UnkDll223Struct1 *a1, void *a2) {
a0->unk0x0 = -0x8000;
a0->unk0x10 = a1->unk0xC + 2.0f;
}
#if 1
#pragma GLOBAL_ASM("asm/nonmatchings/dlls/223/dll_cannon_claw_func_4C.s")
#else
// Realloc
void dll_cannon_claw_func_4C(TActor *s0) {
UnkDllInst *dll_inst;
TActor *func_8002394C_ret = func_8002394C();
if (s0->unk0xdc != 0) {
return;
}
if (s0->curAnimId != 0x208) {
func_80023D30(s0, 0x208, 0, 0);
}
func_80024108(s0, 0.0049999998f, delayFloat, 0);
if (func_80025F40(s0, 0, 0, 0) != 0) {
if (s0->linkedActor != 0) {
mainSetBits(s0->linkedActor->unk0x4c->unk0x1a, 1);
dll_inst = ((UnkDllInst*)func_8002394C_ret->dll);
dll_inst->func[0x16].func_58(func_8002394C_ret, 0, 0);
}
// TODO: this matches but is awful
((self_func_1B4)(((int)&dll_cannon_claw_ctor) + 0x1b4))(s0);
} else if (s0->linkedActor != 0 && mainGetBit(s0->linkedActor->unk0x4c->unk0x1a) != 0) {
((self_func_1B4)(((int)&dll_cannon_claw_ctor) + 0x1b4))(s0);
}
}
#endif
void dll_cannon_claw_func_1B4(TActor *a0) {
a0->unk0xdc = 1;
a0->unk0xaf = a0->unk0xaf | 8;
func_800267A4();
}
void dll_cannon_claw_func_200(void *a0) {
}
void dll_cannon_claw_func_20C(TActor *a0, s32 a1, s32 a2, s32 a3, s32 a4, s8 a5) {
if (a5 != 0 && a0->unk0xdc == 0) {
draw_actor(a0, a1, a2, a3, a4, 1.0f);
}
}
void dll_cannon_claw_func_26C(void *a0, void *a1) {
}
s32 dll_cannon_claw_func_27C(void *a0) {
return 0;
}
s32 dll_cannon_claw_func_28C(void *a0, void *a1) {
return 0;
}

4
src/dlls/223/223.yaml Normal file
View File

@ -0,0 +1,4 @@
compile: yes
link_original_rodata: no
link_original_data: no
link_original_bss: no

15
src/dlls/223/exports.s Normal file
View File

@ -0,0 +1,15 @@
.option pic2
.section ".exports"
.global _exports
_exports:
.dword dll_cannon_claw_ctor
.dword dll_cannon_claw_dtor
.dword dll_cannon_claw_func_18
.dword dll_cannon_claw_func_4C
.dword dll_cannon_claw_func_200
.dword dll_cannon_claw_func_20C
.dword dll_cannon_claw_func_26C
.dword dll_cannon_claw_func_27C
.dword dll_cannon_claw_func_28C

20
src/dlls/223/syms.txt Normal file
View File

@ -0,0 +1,20 @@
dll_cannon_claw_ctor = 0x0;
dll_cannon_claw_dtor = 0xC;
dll_cannon_claw_func_18 = 0x18;
dll_cannon_claw_func_4C = 0x4C;
dll_cannon_claw_func_1B4 = 0x1B4;
dll_cannon_claw_func_200 = 0x200;
dll_cannon_claw_func_20C = 0x20C;
dll_cannon_claw_func_26C = 0x26C;
dll_cannon_claw_func_27C = 0x27C;
dll_cannon_claw_func_28C = 0x28C;
func_8002394C = 0x80000004;
func_80023D30 = 0x80000050;
func_80024108 = 0x8000004B;
delayFloat = 0x80000028;
func_80025F40 = 0x8000014D;
mainSetBits = 0x8000005C;
mainGetBit = 0x80000025;
func_800267A4 = 0x80000109;
draw_actor = 0x80000046;

4
src/dlls/365/365.yaml Normal file
View File

@ -0,0 +1,4 @@
compile: yes
link_original_rodata: no
link_original_data: no
link_original_bss: no

1
src/dlls/365/syms.txt Normal file
View File

@ -0,0 +1 @@
draw_actor = 0x80000046;

13
src/dlls/82/82.c Normal file
View File

@ -0,0 +1,13 @@
#include <PR/ultratypes.h>
void ctor(void *arg) {
}
void dtor(void *arg) {
}
void func_18(void *a0, void *a1, void *a2, void *a3) {
}

7
src/dlls/82/82.ld Normal file
View File

@ -0,0 +1,7 @@
INCLUDE "src/dlls/dll.ld"
SEARCH_DIR ("build/src/dlls/82")
INPUT (
./82.o
./strings.o
./exports.o
)

4
src/dlls/82/82.yaml Normal file
View File

@ -0,0 +1,4 @@
compile: yes
link_original_rodata: no
link_original_data: no
link_original_bss: no

9
src/dlls/82/exports.s Normal file
View File

@ -0,0 +1,9 @@
.option pic2
.section ".exports"
.global _exports
_exports:
.dword ctor
.dword dtor
.dword func_18

7
src/dlls/82/strings.s Normal file
View File

@ -0,0 +1,7 @@
.text
# Unused strings at the end of the .text section
.asciz "in cave\n"
.align 2, 0
.asciz "out of cave\n"
.align 2, 0

0
src/dlls/82/syms.txt Normal file
View File

27
src/dlls/83/83.c Normal file
View File

@ -0,0 +1,27 @@
#include <PR/ultratypes.h>
void mainSetBits(s32, s32);
void ctor(void *arg) {
}
void dtor(void *arg) {
}
void func_18(void *a0, void *a1, s8 a2, void *a3) {
if (a2 == 1) {
mainSetBits(0x3eb, 1);
} else if (a2 == -1) {
mainSetBits(0x3eb, 0);
}
}
void func_A4(void *a0, void *a1, s8 a2, void *a3) {
if (a2 == 1) {
mainSetBits(0x3ea, 1);
} else if (a2 == -1) {
mainSetBits(0x3ea, 0);
}
}

4
src/dlls/83/83.yaml Normal file
View File

@ -0,0 +1,4 @@
compile: yes
link_original_rodata: no
link_original_data: no
link_original_bss: no

10
src/dlls/83/exports.s Normal file
View File

@ -0,0 +1,10 @@
.option pic2
.section ".exports"
.global _exports
_exports:
.dword ctor
.dword dtor
.dword func_18
.dword func_A4

5
src/dlls/83/syms.txt Normal file
View File

@ -0,0 +1,5 @@
ctor = 0x0;
dtor = 0xC;
func_18 = 0x18;
func_A4 = 0xA4;
mainSetBits = 0x8000005C;

View File

@ -1,38 +1,46 @@
PHDRS
{
text PT_LOAD FLAGS(5);
rodata PT_LOAD FLAGS(4);
data PT_LOAD FLAGS(6);
}
/**
* Base linker script for DLLs.
*
* The specifics of this script aren't important since the resulting
* ELF is converted to a Dinosaur Planet .dll with elf2dll. What matters
* is that the following sections are present (if not empty, in any order):
* - .text
* - .rodata
* - .data
* - .bss
* - .exports
* - .rel.text
* - .rel.rodata
* - .rel.data
* - .rel.exports
*/
SECTIONS
{
draw_actor = 0x80000046;
. = 0x0;
.text : {
*(.text)
*(.text.*)
} : text
*(.text)
*(.text.*)
}
.rodata : {
*(.rodata)
*(.rodata.*)
} : rodata
*(.rodata)
*(.rodata.*)
}
.data : {
*(.data)
*(.data.*)
} : data
*(.data)
*(.data.*)
}
.bss : {
*(.bss)
*(.bss.*)
} : data
*(.bss)
*(.bss.*)
}
/DISCARD/ : {
*(.reginfo)
*(.MIPS.abiflags)
*(.reginfo)
*(.MIPS.abiflags)
}
/*========================================= */

2
src/dlls/names.txt Normal file
View File

@ -0,0 +1,2 @@
# Note: This file doesn't do anything right now, it's just for record-keeping
223 = cannon_claw

@ -1 +1 @@
Subproject commit fd28ec1d7ec94ae75f77671ec44df30de62819c2
Subproject commit e9e3dd053c207268fe18536b40e0c696046af872

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python3
# Builds the build.ninja script for the Dinosaur Planet decomp
import argparse
from enum import Enum
@ -9,6 +10,7 @@ from pathlib import Path
from shutil import which
import sys
from ninja import ninja_syntax
import yaml
class BuildFileType(Enum):
C = 1
@ -133,6 +135,15 @@ class BuildNinjaWriter:
"-mabi=32",
]))
self.writer.variable("AS_FLAGS_DLL", " ".join([
"$INCLUDES",
"-EB",
"-mtune=vr4300",
"-march=vr4300",
"-modd-spreg",
"-KPIC",
]))
self.writer.variable("GCC_FLAGS", " ".join([
"$CC_DEFINES",
"$INCLUDES",
@ -178,7 +189,6 @@ class BuildNinjaWriter:
]))
self.writer.variable("LD_FLAGS_DLL", " ".join([
"-T $LINK_SCRIPT_DLL",
"-nostartfiles",
"-nodefaultlibs",
"-r",
@ -197,6 +207,7 @@ class BuildNinjaWriter:
self.writer.variable("CC", "tools/ido_recomp/linux/5.3/cc")
self.writer.variable("ASM_PROCESSOR", "python3 tools/asm_processor/build.py")
self.writer.variable("CC_PREPROCESSED", "$ASM_PROCESSOR $CC -- $AS $AS_FLAGS --")
self.writer.variable("CC_PREPROCESSED_DLL", "$ASM_PROCESSOR --sort-text-relocs $CC -- $AS $AS_FLAGS_DLL --")
self.writer.variable("GCC", "gcc")
self.writer.variable("ELF2DLL", "tools/elf2dll")
self.writer.variable("DINODLL", "python3 tools/dino_dll.py")
@ -210,13 +221,14 @@ class BuildNinjaWriter:
"Compiling $in...",
depfile="$out.d")
self.writer.rule("cc_dll",
"$GCC -MM -MF $out.d -MT $out $GCC_FLAGS_DLL $in && $CC_PREPROCESSED -c $CC_FLAGS_DLL $OPT_FLAGS -o $out $in",
"$GCC -MM -MF $out.d -MT $out $GCC_FLAGS_DLL $in && $CC_PREPROCESSED_DLL -c $CC_FLAGS_DLL $OPT_FLAGS -o $out $in",
"Compiling $in...",
depfile="$out.d")
self.writer.rule("as", "$AS $AS_FLAGS -o $out $in", "Assembling $in...")
self.writer.rule("as_dll", "$AS $AS_FLAGS_DLL -o $out $in", "Assembling $in...")
self.writer.rule("preprocess_linker_script", "cpp -P -DBUILD_DIR=$BUILD_DIR -o $out $in", "Pre-processing linker script...")
self.writer.rule("ld", "$LD $LD_FLAGS -o $out", "Linking...")
self.writer.rule("ld_dll", "$LD $LD_FLAGS_DLL $in -o $out", "Linking...")
self.writer.rule("ld_dll", "$LD $LD_FLAGS_DLL -T $SYMS_TXT -T $LINK_SCRIPT_DLL $in -o $out", "Linking...")
self.writer.rule("ld_bin", "$LD -r -b binary -o $out $in", "Linking binary $in...")
self.writer.rule("to_bin", "$OBJCOPY $in $out -O binary", "Converting $in to $out...")
self.writer.rule("file_copy", "cp $in $out", "Copying $in to $out...")
@ -271,7 +283,7 @@ class BuildNinjaWriter:
if file.type == BuildFileType.C:
command = "cc_dll"
elif file.type == BuildFileType.ASM:
command = "as"
command = "as_dll"
elif file.type == BuildFileType.BIN:
command = "ld_bin"
else:
@ -283,10 +295,25 @@ class BuildNinjaWriter:
# Link
elf_path = f"{obj_dir}/{dll.number}.elf"
self.writer.build(elf_path, "ld_dll", dll_link_deps, implicit="$LINK_SCRIPT_DLL")
syms_txt_path = f"{dll.dir}/syms.txt"
custom_link_script = Path(f"{dll.dir}/{dll.number}.ld")
# Convert .elf to .bin
#self.writer.build(f"{obj_dir}/{dll.number}.bin", "to_bin", elf_path)
if custom_link_script.exists():
# Use DLL's custom link script
# Note: Assume custom script lists all inputs
implicit_deps = [str(custom_link_script), syms_txt_path]
implicit_deps.extend(dll_link_deps)
self.writer.build(elf_path, "ld_dll", [],
implicit=implicit_deps,
variables={
"SYMS_TXT": syms_txt_path,
"LINK_SCRIPT_DLL": str(custom_link_script)
})
else:
# Use default DLL link script
self.writer.build(elf_path, "ld_dll", dll_link_deps,
implicit=["$LINK_SCRIPT_DLL", syms_txt_path],
variables={"SYMS_TXT": syms_txt_path})
# Convert ELF to Dinosaur Planet DLL
dll_asset_path = f"$BUILD_DIR/bin/assets/dlls/{dll.number}.dll"
@ -342,7 +369,7 @@ class BuildNinjaWriter:
# Link
self.link_deps.append("$BUILD_DIR/$TARGET.ld")
self.writer.build("$BUILD_DIR/$TARGET.elf", "ld", self.link_deps)
self.writer.build("$BUILD_DIR/$TARGET.elf", "ld", [], implicit=self.link_deps)
# Convert .elf to .bin
self.writer.build("$BUILD_DIR/$TARGET.bin", "to_bin", "$BUILD_DIR/$TARGET.elf")
@ -408,8 +435,14 @@ class InputScanner:
for dir in dll_dirs:
dir_parts = dir.split("/")
number = dir_parts[-1]
# Skip if this DLL is configured to use the original DLL instead of recompiling
if not self.__should_compile_dll(Path(dir), number):
continue
c_paths = [Path(path) for path in glob.glob(f"{dir}/**/*.c", recursive=True)]
asm_paths = [Path(path) for path in glob.glob(f"{dir}/**/*.s", recursive=True)]
asm_paths.extend([Path(path) for path in glob.glob(f"asm/nonmatchings/dlls/{number}/data/*.s")])
files: "list[BuildFile]" = []
for src_path in c_paths:
@ -447,6 +480,19 @@ class InputScanner:
return self.config.default_opt_flags
def __should_compile_dll(self, dll_dir: Path, number: str) -> bool:
yaml_path = dll_dir.joinpath(f"{number}.yaml")
if not yaml_path.exists():
print(f"WARN: Missing {yaml_path}!")
return True
dll_config = self.__parse_dll_yaml(yaml_path)
return "compile" in dll_config and dll_config["compile"]
def __parse_dll_yaml(self, path: Path):
with open(path, "r") as file:
return yaml.safe_load(file)
def main():
parser = argparse.ArgumentParser(description="Creates the Ninja build script for the Dinosaur Planet decompilation project.")
parser.add_argument("--base-dir", type=str, dest="base_dir", help="The root of the project (default=..).", default="..")

View File

@ -3,42 +3,105 @@ import struct
from capstone import CS_ARCH_MIPS, CS_MODE_BIG_ENDIAN, CS_MODE_MIPS64, Cs, CsInsn
class DLL:
functions: "list[DLLFunction] | None"
"""A Dinosaur Planet DLL"""
def __init__(self,
number: str,
size_aligned: int,
header: "DLLHeader",
reloc_table: "DLLRelocationTable",
functions: "list[DLLFunction] | None") -> None:
reloc_table: "DLLRelocationTable") -> None:
self.number = number
self.size_aligned = size_aligned
"""The total size of this DLL (in bytes), 16-byte aligned"""
self.header = header
self.reloc_table = reloc_table
self.functions = functions
def has_data(self) -> bool:
"""Returns whether this DLL has a .data section"""
return self.header.data_offset != 0xFFFF_FFFF
def has_rodata(self) -> bool:
"""Returns whether this DLL has a .rodata section"""
return self.header.rodata_offset != 0xFFFF_FFFF
def get_text_size(self) -> int:
"""Calculates the size (in bytes) of this DLL's .text section"""
start = self.header.size
if self.has_rodata():
end = self.header.rodata_offset
elif self.has_data():
end = self.header.data_offset
else:
return ((self.size_aligned - start) // 16) * 16
return end - start
def get_rodata_size(self) -> int:
"""Calculates the size (in bytes) of this DLL's .rodata section,
excluding the GOT and relocation tables"""
if not self.has_rodata():
return 0
start = self.header.rodata_offset + self.reloc_table.get_size()
if self.has_data():
end = self.header.data_offset
else:
return ((self.size_aligned - start) // 16) * 16
return end - start
def get_data_size(self) -> int:
"""Calculates the size (in bytes) of this DLL's .data section"""
if not self.has_data():
return 0
start = self.header.data_offset
end = self.size_aligned
return ((end - start) // 16) * 16
def get_rom_size(self) -> int:
"""Calculates the total ROM size (in bytes, unaligned) of this DLL"""
return self.header.size + \
self.get_text_size() + \
self.get_rodata_size() + \
self.reloc_table.get_size() + \
self.get_data_size()
def get_ram_size(self) -> int:
"""Calculates the total size (in bytes) of this DLL when loaded into RAM (**excluding BSS!**)"""
return self.get_text_size() + \
self.get_rodata_size() + \
self.get_data_size()
@staticmethod
def parse(data: bytearray,
number: str,
include_funcs=True,
vram: int=0x8000_0000):
known_symbols: "dict[int, str]"={}):
header = DLLHeader.parse(data)
reloc_table = DLLRelocationTable.parse(data, header)
dll = DLL(number, len(data), header, reloc_table)
if include_funcs:
functions = parse_functions(data, header, reloc_table, vram)
else:
functions = None
dll.functions = parse_functions(data, dll, reloc_table, known_symbols)
return DLL(number, header, reloc_table, functions)
return dll
class DLLHeader:
"""The header section (including exports)"""
def __init__(self,
header_size: int,
size: int,
data_offset: int,
rodata_offset: int,
export_count: int,
ctor_offset: int,
dtor_offset: int,
export_offsets: "list[int]") -> None:
self.header_size = header_size
self.size = size
"""Header size in bytes"""
self.data_offset = data_offset
"""DATA offset (relative to start of header or 0xFFFFFFFF if section is not present)"""
@ -52,7 +115,7 @@ class DLLHeader:
"""Destructor offset (relative to end of header)"""
self.export_offsets = export_offsets
"""List of exports (the offsets they specify)"""
@staticmethod
def parse(data: bytearray) -> "DLLHeader":
"""Given a DLL file, parses and returns the header"""
@ -63,7 +126,7 @@ class DLLHeader:
export_offsets = struct.unpack_from(">" + ("I" * export_count), data, offset=0x1C)
return DLLHeader(
header_size=header_size,
size=header_size,
data_offset=data_offsets[0],
rodata_offset=data_offsets[1],
export_count=export_count,
@ -75,15 +138,20 @@ class DLLHeader:
class DLLRelocationTable:
"""The relocation table (including global offset table)"""
def __init__(self,
exists: bool,
global_offset_table: "list[int]",
gp_relocations: "list[int]",
data_relocations: "list[int]") -> None:
self.exists = exists
self.global_offset_table = global_offset_table
self.gp_relocations = gp_relocations
self.data_relocations = data_relocations
def size(self) -> int:
def get_size(self) -> int:
"""Calculates the size of the relocation table in bytes"""
if not self.exists:
return 0
# +4 to include table section end markers
return len(self.global_offset_table) * 4 + 4 \
+ len(self.gp_relocations) * 4 + 4 \
@ -94,7 +162,7 @@ class DLLRelocationTable:
"""Given a DLL file, parses and returns the relocation section"""
if header.rodata_offset == 0xFFFF_FFFF:
# No relocation table
return DLLRelocationTable([], [], [])
return DLLRelocationTable(False, [], [], [])
offset = header.rodata_offset
global_offset_table: "list[int]" = []
@ -114,7 +182,7 @@ class DLLRelocationTable:
data_relocations.append(value)
offset += 0x4
return DLLRelocationTable(global_offset_table, gp_relocations, data_relocations)
return DLLRelocationTable(True, global_offset_table, gp_relocations, data_relocations)
class DLLInst:
def __init__(self,
@ -123,31 +191,47 @@ class DLLInst:
mnemonic: str,
op_str: str,
is_branch_delay_slot: bool,
label: "str | None") -> None:
has_relocation: bool,
label: "str | None",
ref: "str | None") -> None:
self.original = original
self.address = address
self.mnemonic = mnemonic
self.op_str = op_str
self.is_branch_delay_slot = is_branch_delay_slot
self.has_relocation = has_relocation
self.label = label
self.ref = ref
def is_op_modified(self):
"""Whether the operand string was modified during parsing.
Use original.op_str to get the real value."""
return self.op_str != self.original.op_str
class DLLRelocation:
def __init__(self, offset: int, type: str, expression: str, got_index: int) -> None:
self.offset = offset
self.type = type
self.expression = expression
self.got_index = got_index
class DLLFunction:
def __init__(self,
insts: "list[DLLInst]",
address: int,
symbol: str,
is_static: bool,
auto_symbols: "OrderedDict[str, int]") -> None:
auto_symbols: "OrderedDict[str, int]",
relocations: "list[DLLRelocation]") -> None:
self.insts = insts
self.address = address
self.symbol = symbol
self.is_static = is_static
self.auto_symbols = auto_symbols
"""A map of symbols (to their address) automatically generated
while parsing the function."""
self.relocations = relocations
"""All instruction relocations in the function, sorted by their position in the original DLL's GOT."""
def __mnemonic_has_delay_slot(mnemonic: str) -> bool:
return (mnemonic.startswith("b") or mnemonic.startswith("j")) and mnemonic != "break"
@ -156,18 +240,14 @@ def __mnemonic_is_branch(mnemonic: str) -> bool:
return (mnemonic.startswith("b") or mnemonic == "j") and mnemonic != "break"
def parse_functions(data: bytearray,
header: DLLHeader,
dll: DLL,
reloc_table: DLLRelocationTable,
vram: int=0x8000_0000) -> "list[DLLFunction]":
known_symbols: "dict[int, str]"={}) -> "list[DLLFunction]":
"""Parses and returns all functions in the given Dinosaur Planet DLL."""
# Convert exported function addresses to VRAM
ctor_vram = header.ctor_offset + vram
dtor_vram = header.dtor_offset + vram
exports_vram = [ctor_vram, dtor_vram]
exports_vram.extend([o + vram for o in header.export_offsets])
header = dll.header
# Determine where in the file the .text section ends
text_end = min(header.rodata_offset, header.data_offset)
text_end = header.size + dll.get_text_size()
# Vars
new_func = True
@ -175,21 +255,26 @@ def parse_functions(data: bytearray,
# Disassemble
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
insts = [i for i in md.disasm(data[header.header_size:text_end], vram)]
insts = [i for i in md.disasm(data[header.size:text_end], 0x0)]
# Extract all branches
branches: "list[tuple[int, int]]" = []
branch_dests: "set[int]" = set()
for i in insts:
if __mnemonic_is_branch(i.mnemonic):
branch_target = int(i.op_str.split(" ")[-1], 0)
branches.append((i.address, branch_target))
branch_dests.add(branch_target)
# Extract functions
funcs: "list[DLLFunction]" = []
cur_func_insts: "list[DLLInst]" = []
cur_func_name = ""
cur_func_addr = 0
cur_func_is_static = False
cur_func_has_gp_init = False
cur_func_auto_syms: "OrderedDict[str, int]" = OrderedDict()
cur_func_relocs: "list[DLLRelocation]" = []
cur_func_inst_index = 0
for i in insts:
# Check if this instruction is a branch delay slot of the previous instruction
@ -198,74 +283,127 @@ def parse_functions(data: bytearray,
if new_func and i.mnemonic != "nop" and not is_delay_slot:
# Add previous function
if cur_func_name != "":
cur_func_relocs.sort(key=lambda r: r.got_index)
funcs.append(DLLFunction(
insts=cur_func_insts,
address=cur_func_addr,
symbol=cur_func_name,
is_static=cur_func_is_static,
auto_symbols=cur_func_auto_syms
auto_symbols=cur_func_auto_syms,
relocations=cur_func_relocs
))
# New function, determine name and type
if i.address == ctor_vram:
cur_func_name = "ctor"
elif i.address == dtor_vram:
cur_func_name = "dtor"
if i.address == header.ctor_offset:
cur_func_name = known_symbols.get(i.address, "dll_{}_ctor".format(dll.number))
elif i.address == header.dtor_offset:
cur_func_name = known_symbols.get(i.address, "dll_{}_dtor".format(dll.number))
else:
cur_func_name = ("func_%x" %(i.address))
cur_func_is_static = not i.address in exports_vram
cur_func_name = known_symbols.get(i.address, "dll_{}_func_{:X}".format(dll.number, i.address))
cur_func_is_static = not i.address in header.export_offsets
cur_func_addr = i.address
cur_func_insts = []
cur_func_has_gp_init = False
cur_func_auto_syms = OrderedDict()
cur_func_relocs = []
new_func = False
cur_func_inst_index = 0
# Pre-process operand string
# Pre-process instruction
mnemonic = i.mnemonic
op_str: str = i.op_str
operands = [op.strip() for op in op_str.split(",")]
num_operands = len(operands)
ref: "str | None" = None
has_relocation = False
if __mnemonic_is_branch(i.mnemonic):
if __mnemonic_is_branch(mnemonic):
# Replace branch address with label
branch_target = int(operands[-1], 0)
op_label = (".L%x" %(branch_target))
op_label = ".L{:X}".format(branch_target)
op_str = ", ".join(operands[:-1] + [op_label])
elif cur_func_inst_index < 2 and num_operands > 0 and operands[0] == "$gp":
# Add _gp_disp to $gp initializer stub
# Note: The $gp initializer stub gets modified when compiled,
# we need to convert it back to the original code
cur_func_has_gp_init = True
if cur_func_inst_index == 0:
assert num_operands == 2
op_str = ", ".join(operands[:-1] + [r"%hi(_gp_disp)"])
elif cur_func_inst_index == 1:
assert num_operands == 3
op_str = ", ".join(operands[:-1] + [r"%lo(_gp_disp)"])
# Additionally, change the %lo mnemonic from ori to addiu
assert mnemonic == "ori"
mnemonic = "addiu"
elif cur_func_inst_index == 2 and cur_func_has_gp_init:
# Change $gp initializer nop to 'addu gp,gp,t9'
assert mnemonic == "nop"
mnemonic = "addu"
op_str = "$gp, $gp, $t9"
elif num_operands > 0 and operands[-1].endswith("($gp)"):
# Replace $gp references with %got (if not referencing a section)
# Replace offset($gp) with %got(symbol)($gp)
gp_mem_op = operands[-1]
offset = 0 if gp_mem_op == "($gp)" else int(gp_mem_op[:-5], 0)
# TODO: can we include section symbols?
# Exclude the first four GOT entries (which are just sections)
if offset >= 16:
symbol_addr = reloc_table.global_offset_table[offset // 4]
symbol = ("D_%X" %(symbol_addr))
op_str = ", ".join(operands[:-1] + [rf"%got({symbol})"])
# Make symbol
got_index = offset // 4
symbol_addr = reloc_table.global_offset_table[got_index]
symbol = known_symbols.get(symbol_addr, "GOT_{:X}".format(symbol_addr))
cur_func_auto_syms[symbol] = symbol_addr
ref = symbol
# Modify operand
op_str = ", ".join(operands[:-1] + [rf"%got({symbol})($gp)"])
# Add relocation entry
has_relocation = True
cur_func_relocs.append(DLLRelocation(
offset=i.address,
type="R_MIPS_GOT16",
expression=symbol,
got_index=got_index
))
elif mnemonic == "move":
# Replace with the actual instruction
# TODO: make constants for some of these
opcode = i.bytes[3] & 0b00111111
op_str += ", $zero"
if opcode == 37:
mnemonic = "or"
elif opcode == 45:
mnemonic = "daddu"
elif opcode == 33:
mnemonic = "addu"
else:
raise NotImplementedError(f"INVALID INSTRUCTION {i} {opcode}")
elif mnemonic in ["mtc0", "mfc0", "mtc2", "mfc2"]:
# TODO: what is this doing?
rd = (i.bytes[2] & 0xF8) >> 3
op_str = op_str.split(" ")[0] + " $" + str(rd)
# Determine whether this instruction address is branched to
label: "str | None" = None
for branch in branches:
if branch[1] == i.address:
label = (".L%x" %(i.address))
break
if i.address in branch_dests:
label = ".L{:X}".format(i.address)
# Add instruction
cur_func_insts.append(DLLInst(
original=i,
address=i.address,
mnemonic=i.mnemonic,
mnemonic=mnemonic,
op_str=op_str,
is_branch_delay_slot=is_delay_slot,
label=label
label=label,
ref=ref,
has_relocation=has_relocation
))
# Check for function end
if i.mnemonic == "jr" and i.op_str == "$ra":
# TODO: this is very slow for large functions
if mnemonic == "jr" and i.op_str == "$ra":
new_func = True
for branch in branches:
if (branch[0] > i.address and branch[1] <= i.address) or (branch[0] <= i.address and branch[1] > i.address):
@ -274,16 +412,19 @@ def parse_functions(data: bytearray,
break
# Track last instruction
last_mnemonic = i.mnemonic
last_mnemonic = mnemonic
cur_func_inst_index += 1
# Add final function
if cur_func_name != "":
cur_func_relocs.sort(key=lambda r: r.got_index)
funcs.append(DLLFunction(
insts=cur_func_insts,
address=cur_func_addr,
symbol=cur_func_name,
is_static=cur_func_is_static,
auto_symbols=cur_func_auto_syms
auto_symbols=cur_func_auto_syms,
relocations=cur_func_relocs
))
return funcs

View File

@ -0,0 +1,18 @@
import struct
class DLLImportsTab:
"""The full DLLIMPORTSTAB file."""
def __init__(self, imports: "list[int]") -> None:
self.imports = imports
"""A mapping of import index to base executable pointer. Each pointer
refers to non-DLL VRAM (e.g. >=0x80000000)"""
@staticmethod
def parse(data: bytearray) -> "DLLImportsTab":
imports: "list[int]" = []
for i in range(0, len(data), 4):
ptr = struct.unpack_from(">I", data, offset=i)[0]
imports.append(ptr)
return DLLImportsTab(imports)

61
tools/dino/dll_tab.py Normal file
View File

@ -0,0 +1,61 @@
import struct
class DLLTab:
"""The full DLLS.tab describing the location of each DLL."""
def __init__(self,
bank0: "tuple[int, int]",
bank1: "tuple[int, int]",
bank2: "tuple[int, int]",
bank3: "tuple[int, int]",
entries: "list[DLLTabEntry]") -> None:
self.bank0 = bank0
self.bank1 = bank1
self.bank2 = bank2
self.bank3 = bank3
self.banks = [bank0, bank1, bank2, bank3]
self.bank_names = ["engine", "modgfx", "projgfx", "objects"]
self.entries = entries
@staticmethod
def parse(data: bytearray):
# Read header
header = struct.unpack_from(">IIII", data, offset=0)
# Read entries
offset = 4 * 4
entries: "list[DLLTabEntry]" = []
file_size = len(data)
while offset < file_size:
# Note: The end offset is actually the start offset of the next entry.
# The final entry is not actually a DLL, but its start offset is used as the end
# for the second to last entry.
start_offset, bss_size, end_offset = struct.unpack_from(">III", data, offset)
# DLLS.tab ends with 16 bytes of 0xF
if end_offset == 0xFFFF_FFFF:
break
entry = DLLTabEntry(start_offset, end_offset, bss_size)
entries.append(entry)
offset += 8
return DLLTab(
bank0=(0, header[0]),
bank1=(header[0] + 1, header[1]),
bank2=(header[1] + 1, header[3]),
bank3=(header[3] + 1, len(entries) - 1),
entries=entries
)
class DLLTabEntry:
"""An individual DLLS.tab entry."""
def __init__(self,
start_offset: int,
end_offset: int,
bss_size: int) -> None:
self.start_offset = start_offset
self.end_offset = end_offset
self.size = end_offset - start_offset
self.bss_size = bss_size

View File

@ -21,11 +21,11 @@ class dino_dll():
with open(name, "rb") as file:
data = bytearray(file.read())
bss = struct.unpack_from(">I", data, 0x18)[0]
bss = struct.unpack_from(">I", data, 0x18)[0] # HACK: uses unused field to get BSS size from elf2dll
struct.pack_into(">I", data, 0x18, 0)
fbin.write(data)
offset = (index * 8) + (4 * 4)
offset = (index * 8) + (4 * 4) # (index * entry_size) + tab_header_size
if bss == 0: bss = struct.unpack_from(">2I", ftab, offset)[1]
if bss == 0xFFFFFFFF: bss = 0
@ -34,8 +34,9 @@ class dino_dll():
index += 1
# Write "fake" final entry
offset = (index * 8) + (4 * 4)
struct.pack_into(">2I", ftab, offset, pos, bss)
struct.pack_into(">2I", ftab, offset, pos, 0x0)
open(tab_out, "wb").write(ftab)
fbin.close()
@ -58,6 +59,7 @@ class dino_dll():
next = data[2]
size = next - offset
# Note: final entry before 0xFFFFFFFF is not an actual DLL
if next == 0xFFFFFFFF: break
name = "%d.dll" % i

View File

@ -1,140 +0,0 @@
#!/usr/bin/env python3
import argparse
from io import BufferedReader
import os
from pathlib import Path
from dino.dll import DLL, DLLHeader, DLLRelocationTable, DLLFunction
def create_exports_s(path: Path, functions: "list[DLLFunction]"):
with open(path, "w", encoding="utf-8") as exports_s:
exports_s.write(".option pic2\n")
exports_s.write(".section \".exports\"\n")
exports_s.write(".global _exports\n")
exports_s.write("_exports:\n")
exports_s.write("\n")
for func in functions:
exports_s.write(f".dword {func.symbol}\n")
def create_c_stub(c_path: Path, asm_path: Path, functions: "list[DLLFunction]"):
with open(c_path, "w", encoding="utf-8") as c_file:
c_file.write("#include <PR/ultratypes.h>\n")
for func in functions:
c_file.write("\n")
c_file.write(f'#pragma GLOBAL_ASM("{asm_path}/{func.symbol}.s")\n')
def create_syms_txt(syms_path: Path, dll: DLL):
assert dll.functions is not None
with open(syms_path, "w", encoding="utf-8") as syms_file:
syms_added = 0
for func in dll.functions:
for name, value in func.auto_symbols.items():
syms_file.write("%s = 0x%X;\n" %(name, value))
syms_added += 1
assert syms_added == len(dll.reloc_table.global_offset_table) - 4
def extract_asm(dir: Path, dll: DLL):
assert dll.functions is not None
functions = dll.functions
for func in functions:
s_path = dir.joinpath(f"{func.symbol}.s")
with open(s_path, "w", encoding="utf-8") as s_file:
s_file.write("glabel %s\n" %(func.symbol))
for i in func.insts:
if i.label is not None:
s_file.write("%s:\n" %(i.label))
s_file.write(
"/* %04X %X %s */ %s%s%s\n"
%(
(i.address + dll.header.header_size) - 0x8000_0000,
i.address,
i.original.bytes.hex().upper(),
' ' if i.is_branch_delay_slot else '',
i.mnemonic.ljust(10 if i.is_branch_delay_slot else 11),
i.op_str))
def extract_data(dir: Path, dll: DLL, data: bytearray):
# Calculate section sizes
# TODO: This all assumes that the first four entires of the GOT are .text, .rodata, .data, and .bss!
# Can we verify this?
rodata_size = dll.reloc_table.global_offset_table[2] - dll.reloc_table.global_offset_table[1]
data_size = dll.reloc_table.global_offset_table[3] - dll.reloc_table.global_offset_table[2]
# Calculate section starts/ends
rodata_start = dll.header.rodata_offset + dll.reloc_table.size() # exclude relocation table
rodata_end = rodata_start + rodata_size
data_start = dll.header.data_offset
data_end = data_start + data_size
bss_start = data_end
bss_end = len(data)
# Write .rodata
rodata_path = dir.joinpath(f"{dll.number}.rodata.bin")
with open(rodata_path, "wb") as rodata_file:
rodata_file.write(data[rodata_start:rodata_end])
# Write .data
data_path = dir.joinpath(f"{dll.number}.data.bin")
with open(data_path, "wb") as data_file:
data_file.write(data[data_start:data_end])
# Write .bss
bss_path = dir.joinpath(f"{dll.number}.bss.bin")
with open(bss_path, "wb") as bss_file:
bss_file.write(data[bss_start:bss_end])
def extract(dll: DLL, data: bytearray, src_path: Path, asm_path: Path):
assert dll.functions is not None
# Create directories if they don't exist
os.makedirs(src_path, exist_ok=True)
os.makedirs(asm_path, exist_ok=True)
# Extract assembly
extract_asm(asm_path, dll)
# Extract .data and .rodata
# TODO: where should these files go?
extract_data(src_path, dll, data)
# Create exports.s if it doesn't exist
exports_s_path = src_path.joinpath("exports.s")
if not exports_s_path.exists():
create_exports_s(exports_s_path, dll.functions)
# Create syms.txt if it doens't exist
syms_txt_path = src_path.joinpath("syms.txt")
if not syms_txt_path.exists():
create_syms_txt(syms_txt_path, dll)
# Create <dll>.c stub if it doesn't exist
c_file_path = src_path.joinpath(f"{dll.number}.c")
if not c_file_path.exists():
create_c_stub(c_file_path, asm_path, dll.functions)
def main():
parser = argparse.ArgumentParser(description="Extract assembly from a Dinosaur Planet DLL and set up a directory for it that is ready for recompilation.")
parser.add_argument("dll", type=argparse.FileType("rb"), help="The Dinosaur Planet .dll file to read.")
parser.add_argument("--src", type=str, help="A directory to create source file stubs in (e.g. ./src/dlls/12).", required=True)
parser.add_argument("--asm", type=str, help="A directory extract assembly code into (e.g. ./asm/nonmatchings/dlls/12).", required=True)
args = parser.parse_args()
with args.dll as dll_file:
dll_file: BufferedReader
# Read DLL
number = Path(dll_file.name).name.split(".")[0]
data = bytearray(dll_file.read())
dll = DLL.parse(data, number)
# Extract
extract(dll, data, Path(args.src), Path(args.asm))
if __name__ == "__main__":
main()

357
tools/dll_split.py Executable file
View File

@ -0,0 +1,357 @@
#!/usr/bin/env python3
# A mini version of splat specifically for Dinosaur Planet DLLs
# dll_split.py is responsible for:
# - Extracting assembly and data from Dinosaur Planet DLLs
# - Extracting in such a way that is ready for the decomp project's build system
# - Creating .c, exports.s, and syms.txt stubs
import argparse
from genericpath import isdir
import glob
import os
from pathlib import Path
import re
import struct
from timeit import default_timer as timer
import yaml
from dino.dll import DLL, DLLHeader, DLLRelocation, DLLRelocationTable, DLLFunction
from dino.dll_tab import DLLTab
ASM_PATH = Path("asm")
BIN_PATH = Path("bin")
SRC_PATH = Path("src")
global_asm_pattern = re.compile(r"#pragma GLOBAL_ASM\(\"asm\/nonmatchings\/dlls\/[0-9]+\/(.+)\.s\"\)")
symbol_pattern = re.compile(r"(\S+)\s*=\s*(\S+);")
class DLLSplitter:
def __init__(self, verbose: bool) -> None:
self.verbose = verbose
def extract_dlls(self, only_dlls: "list[str]"):
# Load DLLS.tab
if self.verbose:
print("Loading DLLS.tab...")
tab_path = BIN_PATH.joinpath("assets/DLLS_tab.bin")
with open(tab_path, "rb") as tab_file:
tab = DLLTab.parse(bytearray(tab_file.read()))
# Extract each DLL that has a src directory
dll_dirs = [Path(dir) for dir in glob.glob(f"{SRC_PATH}/dlls/*") if isdir(dir)]
for dir in dll_dirs:
number = dir.name
# Skip DLL if not in list
if len(only_dlls) > 0 and not number in only_dlls:
continue
# Load DLL config
dll_config_path = dir.joinpath(f"{number}.yaml")
if not dll_config_path.exists():
print(f"WARN: Missing {dll_config_path}!")
continue
with open(dll_config_path, "r") as file:
dll_config = yaml.safe_load(file)
link_original_rodata = "link_original_rodata" in dll_config and dll_config["link_original_rodata"] or False
link_original_data = "link_original_data" in dll_config and dll_config["link_original_data"] or False
link_original_bss = "link_original_bss" in dll_config and dll_config["link_original_bss"] or False
# Load known symbols for DLL
syms_txt_path = SRC_PATH.joinpath(f"dlls/{number}/syms.txt")
known_symbols = self.__get_existing_symbols(syms_txt_path)
# Load DLL
dll_path = BIN_PATH.joinpath(f"assets/dlls/{number}.dll")
if not dll_path.exists():
print(f"WARN: No such DLL {dll_path}!")
continue
with open(dll_path, "rb") as dll_file:
if self.verbose:
print("[{}] Parsing...".format(number))
start = timer()
data = bytearray(dll_file.read())
dll = DLL.parse(data, number, known_symbols=known_symbols)
bss_size = tab.entries[int(number) - 1].bss_size
end = timer()
if self.verbose:
print("[{}] Parsing complete (took {:.3} seconds).".format(number, end - start))
# Extract DLL
if self.verbose:
print("[{}] Extracting...".format(number))
start = timer()
self.extract_dll(dll, data,
bss_size=bss_size,
skip_rodata=not link_original_rodata,
skip_data=not link_original_data,
skip_bss=not link_original_bss,
)
end = timer()
if self.verbose:
print("[{}] Extracting complete (took {:.3} seconds).".format(number, end - start))
def extract_dll(self, dll: DLL,
data: bytearray,
bss_size: int,
skip_data: bool,
skip_rodata: bool,
skip_bss: bool):
assert dll.functions is not None
# Determine paths
src_path = SRC_PATH.joinpath(f"dlls/{dll.number}")
asm_path = ASM_PATH.joinpath(f"nonmatchings/dlls/{dll.number}")
asm_data_path = ASM_PATH.joinpath(f"nonmatchings/dlls/{dll.number}/data")
# Determine what needs to be extracted
c_file_path = src_path.joinpath(f"{dll.number}.c")
emit_funcs = self.__get_functions_to_extract(c_file_path, dll.number)
rodata_size = dll.get_rodata_size()
data_size = dll.get_data_size()
emit_rodata = not skip_rodata and dll.has_rodata() and rodata_size > 0
emit_data = not skip_data and dll.has_data() and data_size > 0
emit_bss = not skip_bss and bss_size > 0
# Create directories if necessary
if emit_funcs is None or len(emit_funcs) > 0:
os.makedirs(asm_path, exist_ok=True)
if emit_rodata or emit_data or emit_bss:
os.makedirs(asm_data_path, exist_ok=True)
# Extract .text
if emit_funcs is None or len(emit_funcs) > 0:
self.__extract_text_asm(asm_path, dll, emit_funcs)
# Extract .rodata
if emit_rodata:
rodata_start = dll.header.rodata_offset + dll.reloc_table.get_size() # exclude relocation tables
rodata_end = rodata_start + rodata_size
self.__extract_rodata_asm(asm_data_path, dll, data[rodata_start:rodata_end])
# Extract .data
if emit_data:
data_start = dll.header.data_offset
data_end = data_start + data_size
self.__extract_data_asm(asm_data_path, dll, data[data_start:data_end])
# Extract .bss
if emit_bss:
self.__extract_bss_asm(asm_data_path, dll, bss_size)
# Create exports.s if it doesn't exist
exports_s_path = src_path.joinpath("exports.s")
if not exports_s_path.exists():
self.__create_exports_s(exports_s_path, dll)
# Create syms.txt if it doens't exist
syms_txt_path = src_path.joinpath("syms.txt")
if not syms_txt_path.exists():
self.__create_syms_txt(syms_txt_path, dll)
# Create <dll>.c stub if it doesn't exist
if not c_file_path.exists():
self.__create_c_stub(c_file_path, asm_path, dll.functions)
def __create_exports_s(self, path: Path, dll: DLL):
assert dll.functions is not None
funcs_by_address: "dict[int, str]" = {}
for func in dll.functions:
funcs_by_address[func.address] = func.symbol
with open(path, "w", encoding="utf-8") as exports_s:
exports_s.write(".option pic2\n")
exports_s.write(".section \".exports\"\n")
exports_s.write(".global _exports\n")
exports_s.write("_exports:\n")
exports_s.write("\n")
exports_s.write(f".dword ctor\n")
exports_s.write(f".dword dtor\n")
exports_s.write("\n")
for offset in dll.header.export_offsets:
func_symbol = funcs_by_address[offset]
exports_s.write(f".dword {func_symbol}\n")
def __create_c_stub(self, c_path: Path, asm_path: Path, functions: "list[DLLFunction]"):
with open(c_path, "w", encoding="utf-8") as c_file:
c_file.write("#include <PR/ultratypes.h>\n")
for func in functions:
c_file.write("\n")
c_file.write(f'#pragma GLOBAL_ASM("{asm_path}/{func.symbol}.s")\n')
def __create_syms_txt(self, syms_path: Path, dll: DLL):
assert dll.functions is not None
with open(syms_path, "w", encoding="utf-8") as syms_file:
addrs_found: "set[int]" = set()
var_syms_added = 0
# Write function symbols
for func in dll.functions:
syms_file.write("{} = 0x{:X};\n".format(func.symbol, func.address))
# Write global variable symbols
for func in dll.functions:
for name, value in func.auto_symbols.items():
if not value in addrs_found:
addrs_found.add(value)
syms_file.write("{} = 0x{:X};\n".format(name, value))
var_syms_added += 1
assert var_syms_added == max(0, len(dll.reloc_table.global_offset_table) - 4)
def __extract_text_asm(self, dir: Path, dll: DLL, funcs: "list[str] | None"):
assert dll.functions is not None
functions = dll.functions
for func in functions:
if funcs is not None and not func.symbol in funcs:
continue
s_path = dir.joinpath(f"{func.symbol}.s")
with open(s_path, "w", encoding="utf-8") as s_file:
# Write relocations
for reloc in func.relocations:
s_file.write(".reloc {}+0x{:X}, {}, {}-0x{:X}\n"
.format(func.symbol, reloc.offset - func.address, reloc.type, reloc.expression, reloc.got_index * 4))
if len(func.relocations) > 0:
s_file.write("\n")
# Write instructions
s_file.write(f"glabel {func.symbol}\n")
for i in func.insts:
if i.label is not None:
s_file.write(f"{i.label}:\n")
rom_addr = i.address + dll.header.size
ram_addr = i.address
inst_bytes = i.original.bytes.hex().upper()
mnemonic = (' ' + i.mnemonic) if i.is_branch_delay_slot else i.mnemonic
# Note: Use original operand string if the instruction has a relocation since we're
# specifying relocations with separate directives (need to emit the original $gp addend
# rather than something like %got to avoid duplicate relocation entries)
op_str = i.original.op_str if i.has_relocation else i.op_str
ref = (f' /* ref: {i.ref} */') if i.ref is not None else ''
s_file.write("/* {:0>4X} {:0>6X} {} */ {:<11}{}{}\n"
.format(rom_addr, ram_addr, inst_bytes, mnemonic, op_str, ref))
def __extract_rodata_asm(self, dir: Path, dll: DLL, data: bytearray):
rodata_path = dir.joinpath(f"{dll.number}.rodata.s")
with open(rodata_path, "w", encoding="utf-8") as rodata_file:
# Set .rodata section
rodata_file.write(".section .rodata, \"a\"\n")
# Write data
rodata_rom_offset = dll.header.rodata_offset + dll.reloc_table.get_size()
rodata_ram_offset = dll.header.rodata_offset - dll.header.size
for i in range(0, len(data), 4):
word = struct.unpack_from(">I", data, offset=i)[0]
rom_addr = rodata_rom_offset + i
ram_addr = rodata_ram_offset + i
rodata_file.write("/* {:0>4X} {:0>6X} */ .4byte 0x{:X}\n"
.format(rom_addr, ram_addr, word))
def __extract_data_asm(self, dir: Path, dll: DLL, data: bytearray):
data_path = dir.joinpath(f"{dll.number}.data.s")
with open(data_path, "w", encoding="utf-8") as data_file:
# Set .data section
data_file.write(".data\n")
# Write relocations
for offset in dll.reloc_table.data_relocations:
data_file.write(".reloc 0x{:X}, \"R_MIPS_32\", .data\n".format(offset))
# Write data
data_rom_offset = dll.header.data_offset
data_ram_offset = dll.header.data_offset - dll.header.size - dll.reloc_table.get_size()
for i in range(0, len(data), 4):
word = struct.unpack_from(">I", data, offset=i)[0]
rom_addr = data_rom_offset + i
ram_addr = data_ram_offset + i
data_file.write("/* {:0>4X} {:0>6X} */ .4byte 0x{:X}\n"
.format(rom_addr, ram_addr, word))
def __extract_bss_asm(self, dir: Path, dll: DLL, bss_size: int):
assert bss_size > 0
bss_path = dir.joinpath(f"{dll.number}.bss.s")
with open(bss_path, "w", encoding="utf-8") as bss_file:
# Set .bss section
bss_file.write(".bss\n")
# Write .bss size
bss_file.write(".ds.s 0x{:X}, 0\n".format(bss_size))
def __get_functions_to_extract(self, path: Path, dll_number: str) -> "list[str] | None":
"""Returns None if all functions should be extracted (i.e. there is no .c file to derive the list from)"""
if not path.exists():
return None
emit_funcs: "list[str]" = []
with open(path, "r", encoding="utf-8") as c_file:
for line in c_file.readlines():
symbols = global_asm_pattern.findall(line.strip())
for symbol in symbols:
emit_funcs.append(symbol)
return emit_funcs
def __get_existing_symbols(self, path: Path) -> "dict[int, str]":
if not path.exists():
return {}
symbols: "dict[int, str]" = {}
with open(path, "r", encoding="utf-8") as syms_file:
for line in syms_file.readlines():
pairs = symbol_pattern.findall(line.strip())
for pair in pairs:
addr_str: str = pair[1]
if addr_str.lower().startswith("0x"):
addr = int(addr_str, base=16)
else:
addr = int(addr_str)
symbols[addr] = pair[0]
return symbols
def main():
parser = argparse.ArgumentParser(description="Extract assembly and data from Dinosaur Planet DLLs and stub out an environment for recompiling each.")
parser.add_argument("--base-dir", type=str, dest="base_dir", help="The root of the project (default=..).", default="..")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.", default=False)
parser.add_argument("-q", "--quiet", action="store_true", help="Don't display informational messages.", default=False)
parser.add_argument("dlls", nargs="*", action="extend", help="The numbers of each DLL to extract. Don't specify any to extract all that have src directories.")
args = parser.parse_args()
# Do all path lookups from the base directory
os.chdir(Path(args.base_dir).resolve())
# Extract DLLs
start = timer()
splitter = DLLSplitter(verbose=args.verbose)
splitter.extract_dlls(args.dlls)
end = timer()
if not args.quiet:
print("DLL extraction complete (took {:.3} seconds).".format(end - start))
if __name__ == "__main__":
main()

View File

@ -11,52 +11,73 @@ def dump_header(dll: DLL):
print("===================")
header = dll.header
print(f"Header size: {hex(header.header_size)} ({header.header_size} bytes)")
text_size = dll.get_text_size()
rodata_size = dll.get_rodata_size()
rodata_rom_size = rodata_size + dll.reloc_table.get_size()
data_size = dll.get_data_size()
dll_size = dll.get_rom_size()
print(f"DLL ROM size: {hex(dll_size)} ({dll_size} bytes, unaligned)")
print(f"Header size: {hex(header.size)} ({header.size} bytes)")
print(f"TEXT size: {hex(text_size)} ({text_size} bytes)")
print(f"DATA size: {hex(data_size)} ({data_size} bytes)")
print(f"RODATA size: {hex(rodata_size)} ({rodata_size} bytes)")
print(f"RODATA ROM size: {hex(rodata_rom_size)} ({rodata_rom_size} bytes)")
print()
print(f"TEXT offset: {hex(header.size)}")
print(f"DATA offset: {hex(header.data_offset)}{' (not present)' if header.data_offset == 0xFFFF_FFFF else ''}")
print(f"RODATA offset: {hex(header.rodata_offset)}{' (not present)' if header.rodata_offset == 0xFFFF_FFFF else ''}")
print(f"Export count: {hex(header.export_count)} ({header.export_count})")
print(f"Constructor offset: {hex(header.ctor_offset)}")
print(f"Destructor offset: {hex(header.dtor_offset)}")
print()
print("Export offsets:")
for offset in header.export_offsets:
print(f" {hex(offset)}")
def dump_relocation_table(dll: DLL):
print("RELOCATION TABLE")
print("RELOCATIONS")
print("===================")
print("Global offset table:")
table = dll.reloc_table
got_i = 0
for offset in table.global_offset_table:
if got_i == 0:
print(f" {hex(offset).ljust(12)}(.text)")
elif got_i == 1:
print(f" {hex(offset).ljust(12)}(.rodata)")
elif got_i == 2:
print(f" {hex(offset).ljust(12)}(.data)")
elif got_i == 3:
print(f" {hex(offset).ljust(12)}(.bss)")
else:
if dll.reloc_table.exists:
reloc_size = dll.reloc_table.get_size()
print(f"Size: {hex(reloc_size)} ({reloc_size} bytes)")
print()
print("Global offset table:")
table = dll.reloc_table
got_i = 0
for offset in table.global_offset_table:
if got_i == 0:
print(f" {hex(offset).ljust(12)}(.text)")
elif got_i == 1:
print(f" {hex(offset).ljust(12)}(.rodata)")
elif got_i == 2:
print(f" {hex(offset).ljust(12)}(.data)")
elif got_i == 3:
print(f" {hex(offset).ljust(12)}(.bss)")
else:
print(f" {hex(offset)}")
got_i += 1
if len(table.global_offset_table) == 0:
print(" (none)")
print("$gp relocations:")
for offset in table.gp_relocations:
print(f" {hex(offset)}")
got_i += 1
if len(table.global_offset_table) == 0:
print(" (none)")
print("$gp relocations:")
for offset in table.gp_relocations:
print(f" {hex(offset)}")
if len(table.gp_relocations) == 0:
print(" (none)")
print("DATA relocations:")
for offset in table.data_relocations:
print(f" {hex(offset)}")
if len(table.gp_relocations) == 0:
print(" (none)")
print("DATA relocations:")
for offset in table.data_relocations:
print(f" {hex(offset)}")
if len(table.data_relocations) == 0:
print(" (none)")
if len(table.data_relocations) == 0:
print(" (none)")
else:
print("No GOT or relocation tables present.")
def dump_text_disassembly(dll: DLL,
only_symbols: "list[str] | None",
@ -77,27 +98,19 @@ def dump_text_disassembly(dll: DLL,
else:
first = False
print("glabel %s%s" %(func.symbol, ' # (static)' if func.is_static else ''))
print("glabel {}{}".format(func.symbol, " (static)" if func.is_static else ""))
for i in func.insts:
if i.label is not None:
print("%s:" %(i.label))
print(f"{i.label}:")
mnemonic = (' ' + i.mnemonic) if i.is_branch_delay_slot else i.mnemonic
if orig_operands and i.is_op_modified():
print(
"0x%x:\t%s%s%s%s" %(
i.address,
' ' if i.is_branch_delay_slot else '',
i.mnemonic.ljust(10 if i.is_branch_delay_slot else 11),
i.op_str.ljust(24),
f' # (original: {i.original.op_str})'))
print("0x{:x}:\t{:<11}{:<24} # (original: {})"
.format(i.address, mnemonic, i.op_str, i.original.op_str))
else:
print(
"0x%x:\t%s%s%s" %(
i.address,
' ' if i.is_branch_delay_slot else '',
i.mnemonic.ljust(10 if i.is_branch_delay_slot else 11),
i.op_str))
print("0x{:x}:\t{:<11}{:<24}".format(i.address, mnemonic, i.op_str))
if only_symbols is not None and first:
print("(no matching symbols found)")
@ -108,7 +121,7 @@ def main():
parser.add_argument("-x", "--header", action="store_true", help="Display the contents of the header.")
parser.add_argument("-r", "--reloc", action="store_true", help="Display the contents of the relocation table.")
parser.add_argument("-d", "--disassemble", action="store_true", help="Display assembler contents of the executable section.")
parser.add_argument("--symbols", action="extend", nargs="+", type=str, help="When disassembling, only show these symbols.")
parser.add_argument("--symbols", action="append", type=str, help="When disassembling, only show these symbols.")
parser.add_argument("--orig", action="store_true", help="Also show unmodified instruction operands.")
args = parser.parse_args()

65
tools/dllimports.py Executable file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env python3
import argparse
from io import BufferedReader
from dino.dll_imports_tab import DLLImportsTab
def dump_all(imports: DLLImportsTab):
for i, ptr in enumerate(imports.imports):
print("{}: 0x{:X}".format(i, ptr))
def lookup_pointer(imports: DLLImportsTab, index: int):
if index > 0x8000_0000:
index = index & 0x7FFF_FFFF
if index < 1 or index >= len(imports.imports):
print(f"Index {index} out of range [1, {len(imports.imports)}]")
else:
ptr = imports.imports[index - 1]
print("{}: 0x{:X}".format(index, ptr))
def lookup_index(imports: DLLImportsTab, pointer: int):
try:
index = imports.imports.index(pointer) + 1
print("{}: 0x{:X}".format(index, pointer))
except ValueError:
print("Pointer not found: 0x{:X}".format(pointer))
def parse_int(str: str):
if str.lower().startswith("0x"):
return int(str[2:], base=16)
else:
return int(str)
def main():
parser = argparse.ArgumentParser(description="Query the Dinosaur Planet DLLSIMPORTTAB file.")
parser.add_argument("imports", type=argparse.FileType("rb"), help="The DLLSIMPORTTAB file.")
parser.add_argument("-a", "--all", action="store_true", help="Display all DLL import mappings.", default=False)
parser.add_argument("-p", "--pointer", action="append", dest="pointers", help="Find the index of the given pointer.")
parser.add_argument("-i", "--index", action="append", dest="indexes", help="Lookup the pointer at the given index.")
args = parser.parse_args()
if not args.all and not args.pointers and not args.indexes:
print("At least one query option must be provided.")
parser.print_help()
return
with args.imports as imports_file:
imports_file: BufferedReader
data = bytearray(imports_file.read())
imports = DLLImportsTab.parse(data)
if args.all:
dump_all(imports)
else:
if args.indexes:
for index in args.indexes:
lookup_pointer(imports, parse_int(index))
if args.pointers:
for pointer in args.pointers:
lookup_index(imports, parse_int(pointer))
if __name__ == "__main__":
main()

81
tools/dlltab.py Executable file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env python3
import argparse
from io import BufferedReader
from dino.dll_tab import DLLTab, DLLTabEntry
def dump_header(tab: DLLTab):
print("HEADER")
print("===================")
print("BANK NAME DLLS COUNT")
for i, bank in enumerate(tab.banks):
print("{:<6d}{:<9s}{:<3d} - {:<5d}{:d}"
.format(i, tab.bank_names[i], bank[0] + 1, bank[1] + 1, (bank[1] + 1) - bank[0]))
print()
print(f"Total DLLs: {len(tab.entries)}")
def dump_bank(tab: DLLTab, index: int):
print(f"BANK {index} ({tab.bank_names[index]})")
print("===================")
write_entry_table_header()
start, end = tab.banks[index]
for i, entry in enumerate(tab.entries[start:end + 1]):
dump_entry(entry, num=i + start + 1)
def write_entry_table_header():
print("DLL START END SIZE BSS SIZE")
def dump_entry(entry: DLLTabEntry, num: int):
print("{:<6d}0x{:<9X}0x{:<9X}0x{:<7X}0x{:X}"
.format(num, entry.start_offset, entry.end_offset, entry.size, entry.bss_size))
def main():
parser = argparse.ArgumentParser(description="Display information from the Dinosaur Planet DLLS.tab file.")
parser.add_argument("tab", type=argparse.FileType("rb"), help="The DLLS.tab file.")
parser.add_argument("-a", "--all", action="store_true", default=False, help="Display all information in the tab file.")
parser.add_argument("-x", "--header", action="store_true", default=False, help="Display the tab header.")
parser.add_argument("-d", "--dll", action="append", help="Display the tab entry for the given DLL(s).")
parser.add_argument("-b", "--bank", action="append", help="Display all DLLs in the given bank(s).")
args = parser.parse_args()
if not args.all and not args.header and args.dll is None and args.bank is None:
print("At least one display option must be provided.")
parser.print_help()
return
with args.tab as tab_file:
tab_file: BufferedReader
data = bytearray(tab_file.read())
tab = DLLTab.parse(data)
if args.all or args.header:
dump_header(tab)
print()
if args.all:
for i in range(0, len(tab.banks)):
dump_bank(tab, i)
print()
elif args.bank is not None:
banks = args.bank
for bank in [int(bank) for bank in banks]:
if bank < 0 or bank >= len(tab.banks):
print(f"No such bank: {bank}")
else:
dump_bank(tab, bank)
print()
elif args.dll is not None:
dlls = args.dll
write_entry_table_header()
for dll in [int(dll) for dll in dlls]:
if dll < 1 or dll > len(tab.entries):
print(f"No such DLL: {dll}")
else:
dump_entry(tab.entries[dll - 1], dll)
print()
if __name__ == "__main__":
main()

Binary file not shown.

View File

@ -36,7 +36,7 @@ def main():
k += 1
if diff:
print("Difference at: 0x%X" %(i))
print("Difference at: 0x{:X}".format(i))
diffs += 1
if max_diffs > 0 and diffs >= max_diffs:
break