From 9bf84176d4dee29f4370f50228dcd1d9ee6d0107 Mon Sep 17 00:00:00 2001 From: Derek Hensley Date: Sat, 18 Jun 2022 19:28:55 -0700 Subject: [PATCH] Load OK (#792) * Progress * cleanup * Fix comment * progress * OK * Review pt1 * Update comments * update comments some more * Renamings * Add headers and some parens cleanup * Remove zelda64 * PR review * bss * Explain each relocation type a bit in the header comment * Relocate_Addr macro * Split off into z64load.h * Adjust comment slightly based on OOT review * OverlayRelocationType -> MIPSRelocationType * Last bit of cleanup from OoT * format * Split off functions --- include/functions.h | 7 +- include/ultra64.h | 1 + include/ultra64/r4300.h | 367 ++++++++++++++++++++++++++++++++ include/variables.h | 4 +- include/z64.h | 9 - include/z64load.h | 31 +++ spec | 1 - src/boot_O2/loadfragment.c | 188 +++++++++++++++- src/boot_O2/loadfragment2.c | 194 +++++++++-------- src/code/z_DLF.c | 1 + src/code/z_actor.c | 1 + src/code/z_effect_soft_sprite.c | 1 + src/code/z_kaleido_manager.c | 1 + src/code/z_overlay.c | 1 + tools/disasm/functions.txt | 2 +- tools/disasm/variables.txt | 4 +- 16 files changed, 699 insertions(+), 114 deletions(-) create mode 100644 include/ultra64/r4300.h create mode 100644 include/z64load.h diff --git a/include/functions.h b/include/functions.h index 737b0ff81..b60aa04fa 100644 --- a/include/functions.h +++ b/include/functions.h @@ -104,12 +104,7 @@ void FaultDrawer_SetInputCallback(FaultDrawerCallback callback); void FaultDrawer_Init(void); void func_80084940(void); void func_80084968(void); -void Load_Relocate(u32 allocatedVRamAddr, OverlayRelocationSection* overlayInfo, u32 vRamStart); -s32 Load_LoadOverlay(u32 vRomStart, u32 vRomEnd, u32 vRamStart, u32 allocatedVRamAddr, u32 allocatedBytes); -void* Load_AllocateAndLoad(u32 vRomStart, u32 vRomEnd, u32 vRamStart); -void Load2_Relocate(u32 allocatedVRamAddr, OverlayRelocationSection* overlayInfo, u32 vRamStart); -s32 Load2_LoadOverlay(u32 vRomStart, u32 vRomEnd, u32 vRamStart, u32 vRamEnd, u32 allocatedVRamAddr); -void* Overlay_AllocateAndLoad(u32 vRomStart, u32 vRomEnd, u32 vRamStart, u32 vRamEnd); + void PadUtils_Init(Input* input); void func_80085150(void); void PadUtils_ResetPressRel(Input* input); diff --git a/include/ultra64.h b/include/ultra64.h index 49323cc2b..6ffb26ff6 100644 --- a/include/ultra64.h +++ b/include/ultra64.h @@ -13,6 +13,7 @@ #include "ultra64/rcp.h" #include "ultra64/rdp.h" #include "ultra64/rsp.h" +#include "ultra64/r4300.h" #include "ultra64/vi.h" #endif diff --git a/include/ultra64/r4300.h b/include/ultra64/r4300.h new file mode 100644 index 000000000..30e8a9ea8 --- /dev/null +++ b/include/ultra64/r4300.h @@ -0,0 +1,367 @@ +#ifndef ULTRA64_R4300_H +#define ULTRA64_R4300_H + +#ifdef _LANGUAGE_C +#include "PR/ultratypes.h" +#define U32(x) ((u32)x) +#define C_REG(x) (x) +#else +#define U32(x) (x) +#define C_REG(x) $x +#endif + +// Segment base addresses and sizes +#define KUBASE 0 +#define KUSIZE 0x80000000 +#define K0BASE 0x80000000 +#define K0SIZE 0x20000000 +#define K1BASE 0xA0000000 +#define K1SIZE 0x20000000 +#define K2BASE 0xC0000000 +#define K2SIZE 0x20000000 + +// Exception vectors +#define SIZE_EXCVEC 0x80 // Size of an exc. vec +#define UT_VEC K0BASE // utlbmiss vector +#define R_VEC (K1BASE + 0x1FC00000) // reset vector +#define XUT_VEC (K0BASE + 0x80) // extended address tlbmiss +#define ECC_VEC (K0BASE + 0x100) // Ecc exception vector +#define E_VEC (K0BASE + 0x180) // Gen. exception vector + +// Address conversion macros +#define K0_TO_K1(x) (U32(x) | 0xA0000000) // kseg0 to kseg1 +#define K1_TO_K0(x) (U32(x) & 0x9FFFFFFF) // kseg1 to kseg0 +#define K0_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) // kseg0 to physical +#define K1_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) // kseg1 to physical +#define KDM_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) // direct mapped to physical +#define PHYS_TO_K0(x) (U32(x) | 0x80000000) // physical to kseg0 +#define PHYS_TO_K1(x) (U32(x) | 0xA0000000) // physical to kseg1 + +// Address predicates +#define IS_KSEG0(x) (U32(x) >= K0BASE && U32(x) < K1BASE) +#define IS_KSEG1(x) (U32(x) >= K1BASE && U32(x) < K2BASE) +#define IS_KSEGDM(x) (U32(x) >= K0BASE && U32(x) < K2BASE) +#define IS_KSEG2(x) (U32(x) >= K2BASE && U32(x) < KPTE_SHDUBASE) +#define IS_KPTESEG(x) (U32(x) >= KPTE_SHDUBASE) +#define IS_KUSEG(x) (U32(x) < K0BASE) + +// TLB size constants +#define NTLBENTRIES 31 /* entry 31 is reserved by rdb */ + +#define TLBHI_VPN2MASK 0xFFFFE000 +#define TLBHI_VPN2SHIFT 13 +#define TLBHI_PIDMASK 0xFF +#define TLBHI_PIDSHIFT 0 +#define TLBHI_NPID 255 // 255 to fit in 8 bits + +#define TLBLO_PFNMASK 0x3FFFFFC0 +#define TLBLO_PFNSHIFT 6 +#define TLBLO_CACHMASK 0x38 // cache coherency algorithm +#define TLBLO_CACHSHIFT 3 +#define TLBLO_UNCACHED 0x10 // not cached +#define TLBLO_NONCOHRNT 0x18 // Cacheable non-coherent +#define TLBLO_EXLWR 0x28 // Exclusive write +#define TLBLO_D 0x4 // writeable +#define TLBLO_V 0x2 // valid bit +#define TLBLO_G 0x1 // global access bit + +#define TLBINX_PROBE 0x80000000 +#define TLBINX_INXMASK 0x3F +#define TLBINX_INXSHIFT 0 + +#define TLBRAND_RANDMASK 0x3F +#define TLBRAND_RANDSHIFT 0 + +#define TLBWIRED_WIREDMASK 0x3F + +#define TLBCTXT_BASEMASK 0xFF800000 +#define TLBCTXT_BASESHIFT 23 +#define TLBCTXT_BASEBITS 9 + +#define TLBCTXT_VPNMASK 0x7FFFF0 +#define TLBCTXT_VPNSHIFT 4 + +#define TLBPGMASK_4K 0x0 +#define TLBPGMASK_16K 0x6000 +#define TLBPGMASK_64K 0x1E000 + +/* + * Status register + */ +#define SR_CUMASK 0xF0000000 // coproc usable bits + +#define SR_CU3 0x80000000 // Coprocessor 3 usable +#define SR_CU2 0x40000000 // Coprocessor 2 usable +#define SR_CU1 0x20000000 // Coprocessor 1 usable +#define SR_CU0 0x10000000 // Coprocessor 0 usable +#define SR_RP 0x08000000 // Reduced power (quarter speed) +#define SR_FR 0x04000000 // MIPS III FP register mode +#define SR_RE 0x02000000 // Reverse endian +#define SR_ITS 0x01000000 // Instruction trace support +#define SR_BEV 0x00400000 // Use boot exception vectors +#define SR_TS 0x00200000 // TLB shutdown +#define SR_SR 0x00100000 // Soft reset occured +#define SR_CH 0x00040000 // Cache hit for last 'cache' op +#define SR_CE 0x00020000 // Create ECC +#define SR_DE 0x00010000 // ECC of parity does not cause error + +// Interrupt enable bits +// (NOTE: bits set to 1 enable the corresponding level interrupt) +#define SR_IMASK 0x0000FF00 // Interrupt mask +#define SR_IMASK8 0x00000000 // mask level 8 +#define SR_IMASK7 0x00008000 // mask level 7 +#define SR_IMASK6 0x0000C000 // mask level 6 +#define SR_IMASK5 0x0000E000 // mask level 5 +#define SR_IMASK4 0x0000F000 // mask level 4 +#define SR_IMASK3 0x0000F800 // mask level 3 +#define SR_IMASK2 0x0000FC00 // mask level 2 +#define SR_IMASK1 0x0000FE00 // mask level 1 +#define SR_IMASK0 0x0000FF00 // mask level 0 + +#define SR_IBIT8 0x00008000 // bit level 8 +#define SR_IBIT7 0x00004000 // bit level 7 +#define SR_IBIT6 0x00002000 // bit level 6 +#define SR_IBIT5 0x00001000 // bit level 5 +#define SR_IBIT4 0x00000800 // bit level 4 +#define SR_IBIT3 0x00000400 // bit level 3 +#define SR_IBIT2 0x00000200 // bit level 2 +#define SR_IBIT1 0x00000100 // bit level 1 + +#define SR_IMASKSHIFT 8 + +#define SR_KX 0x00000080 // extended-addr TLB vec in kernel +#define SR_SX 0x00000040 // xtended-addr TLB vec supervisor +#define SR_UX 0x00000020 // xtended-addr TLB vec in user mode +#define SR_KSU_MASK 0x00000018 // mode mask +#define SR_KSU_USR 0x00000010 // user mode +#define SR_KSU_SUP 0x00000008 // supervisor mode +#define SR_KSU_KER 0x00000000 // kernel mode +#define SR_ERL 0x00000004 // Error level, 1=>cache error +#define SR_EXL 0x00000002 // Exception level, 1=>exception +#define SR_IE 0x00000001 // interrupt enable, 1=>enable + +// Cause Register +#define CAUSE_BD 0x80000000 // Branch delay slot +#define CAUSE_CEMASK 0x30000000 // coprocessor error +#define CAUSE_CESHIFT 28 + +// Interrupt pending bits +#define CAUSE_IP8 0x00008000 // External level 8 pending - COMPARE +#define CAUSE_IP7 0x00004000 // External level 7 pending - INT4 +#define CAUSE_IP6 0x00002000 // External level 6 pending - INT3 +#define CAUSE_IP5 0x00001000 // External level 5 pending - INT2 +#define CAUSE_IP4 0x00000800 // External level 4 pending - INT1 +#define CAUSE_IP3 0x00000400 // External level 3 pending - INT0 +#define CAUSE_SW2 0x00000200 // Software level 2 pending +#define CAUSE_SW1 0x00000100 // Software level 1 pending + +#define CAUSE_IPMASK 0x0000FF00 // Pending interrupt mask +#define CAUSE_IPSHIFT 8 + +#define CAUSE_EXCMASK 0x0000007C // Cause code bits +#define CAUSE_EXCSHIFT 2 + +// Cause register exception codes + +#define EXC_CODE(x) ((x) << 2) + +// Hardware exception codes +#define EXC_INT EXC_CODE(0) // interrupt +#define EXC_MOD EXC_CODE(1) // TLB mod +#define EXC_RMISS EXC_CODE(2) // Read TLB Miss +#define EXC_WMISS EXC_CODE(3) // Write TLB Miss +#define EXC_RADE EXC_CODE(4) // Read Address Error +#define EXC_WADE EXC_CODE(5) // Write Address Error +#define EXC_IBE EXC_CODE(6) // Instruction Bus Error +#define EXC_DBE EXC_CODE(7) // Data Bus Error +#define EXC_SYSCALL EXC_CODE(8) // SYSCALL +#define EXC_BREAK EXC_CODE(9) // BREAKpoint +#define EXC_II EXC_CODE(10) // Illegal Instruction +#define EXC_CPU EXC_CODE(11) // CoProcessor Unusable +#define EXC_OV EXC_CODE(12) // OVerflow +#define EXC_TRAP EXC_CODE(13) // Trap exception +#define EXC_VCEI EXC_CODE(14) // Virt. Coherency on Inst. fetch +#define EXC_FPE EXC_CODE(15) // Floating Point Exception +#define EXC_WATCH EXC_CODE(23) // Watchpoint reference +#define EXC_VCED EXC_CODE(31) // Virt. Coherency on data read + +// C0_PRID Defines +#define C0_IMPMASK 0xFF00 +#define C0_IMPSHIFT 8 +#define C0_REVMASK 0xFF +#define C0_MAJREVMASK 0xF0 +#define C0_MAJREVSHIFT 4 +#define C0_MINREVMASK 0xF + +// Coprocessor 0 operations +#define C0_READI 0x1 // read ITLB entry addressed by C0_INDEX +#define C0_WRITEI 0x2 // write ITLB entry addressed by C0_INDEX +#define C0_WRITER 0x6 // write ITLB entry addressed by C0_RAND +#define C0_PROBE 0x8 // probe for ITLB entry addressed by TLBHI +#define C0_RFE 0x10 // restore for exception + +// 'cache' instruction definitions + +// Target cache +#define CACH_PI 0x0 // specifies primary inst. cache +#define CACH_PD 0x1 // primary data cache +#define CACH_SI 0x2 // secondary instruction cache +#define CACH_SD 0x3 // secondary data cache + +// Cache operations +#define C_IINV 0x0 // index invalidate (inst, 2nd inst) +#define C_IWBINV 0x0 // index writeback inval (d, sd) +#define C_ILT 0x4 // index load tag (all) +#define C_IST 0x8 // index store tag (all) +#define C_CDX 0xC // create dirty exclusive (d, sd) +#define C_HINV 0x10 // hit invalidate (all) +#define C_HWBINV 0x14 // hit writeback inv. (d, sd) +#define C_FILL 0x14 // fill (i) +#define C_HWB 0x18 // hit writeback (i, d, sd) +#define C_HSV 0x1C // hit set virt. (si, sd) + +// Cache size definitions +#define ICACHE_SIZE 0x4000 // 16K +#define ICACHE_LINESIZE 32 // 8 words +#define ICACHE_LINEMASK (ICACHE_LINESIZE - 1) + +#define DCACHE_SIZE 0x2000 // 8K +#define DCACHE_LINESIZE 16 // 4 words +#define DCACHE_LINEMASK (DCACHE_LINESIZE - 1) + +// C0_CONFIG register definitions +#define CONFIG_CM 0x80000000 // 1 == Master-Checker enabled +#define CONFIG_EC 0x70000000 // System Clock ratio +#define CONFIG_EC_1_1 0x6 // System Clock ratio 1 :1 +#define CONFIG_EC_3_2 0x7 // System Clock ratio 1.5 :1 +#define CONFIG_EC_2_1 0x0 // System Clock ratio 2 :1 +#define CONFIG_EC_3_1 0x1 // System Clock ratio 3 :1 +#define CONFIG_EP 0x0F000000 // Transmit Data Pattern +#define CONFIG_SB 0x00C00000 // Secondary cache block size + +#define CONFIG_SS 0x00200000 // Split scache: 0 == I&D combined +#define CONFIG_SW 0x00100000 // scache port: 0==128, 1==64 +#define CONFIG_EW 0x000C0000 // System Port width: 0==64, 1==32 +#define CONFIG_SC 0x00020000 // 0 -> 2nd cache present +#define CONFIG_SM 0x00010000 // 0 -> Dirty Shared Coherency enable +#define CONFIG_BE 0x00008000 // Endian-ness: 1 --> BE +#define CONFIG_EM 0x00004000 // 1 -> ECC mode, 0 -> parity +#define CONFIG_EB 0x00002000 // Block order:1->sequent,0->subblock + +#define CONFIG_IC 0x00000E00 // Primary Icache size +#define CONFIG_DC 0x000001C0 // Primary Dcache size +#define CONFIG_IB 0x00000020 // Icache block size +#define CONFIG_DB 0x00000010 // Dcache block size +#define CONFIG_CU 0x00000008 // Update on Store-conditional +#define CONFIG_K0 0x00000007 // K0SEG Coherency algorithm + +#define CONFIG_UNCACHED 0x00000002 // K0 is uncached +#define CONFIG_NONCOHRNT 0x00000003 +#define CONFIG_COHRNT_EXLWR 0x00000005 +#define CONFIG_SB_SHFT 22 // shift SB to bit position 0 +#define CONFIG_IC_SHFT 9 // shift IC to bit position 0 +#define CONFIG_DC_SHFT 6 // shift DC to bit position 0 +#define CONFIG_BE_SHFT 15 // shift BE to bit position 0 + +// C0_TAGLO definitions for setting/getting cache states and physaddr bits +#define SADDRMASK 0xFFFFE000 // 31..13 -> scache paddr bits 35..17 +#define SVINDEXMASK 0x00000380 // 9..7: prim virt index bits 14..12 +#define SSTATEMASK 0x00001C00 // bits 12..10 hold scache line state +#define SINVALID 0x00000000 // invalid --> 000 == state 0 +#define SCLEANEXCL 0x00001000 // clean exclusive --> 100 == state 4 +#define SDIRTYEXCL 0x00001400 // dirty exclusive --> 101 == state 5 +#define SECC_MASK 0x0000007F // low 7 bits are ecc for the tag +#define SADDR_SHIFT 4 // shift STagLo (31..13) to 35..17 + +#define PADDRMASK 0xFFFFFF00 // PTagLo31..8->prim paddr bits35..12 +#define PADDR_SHIFT 4 // roll bits 35..12 down to 31..8 +#define PSTATEMASK 0x00C0 // bits 7..6 hold primary line state +#define PINVALID 0x0000 // invalid --> 000 == state 0 +#define PCLEANEXCL 0x0080 // clean exclusive --> 10 == state 2 +#define PDIRTYEXCL 0x00C0 // dirty exclusive --> 11 == state 3 +#define PPARITY_MASK 0x0001 // low bit is parity bit (even). + +// C0_CACHE_ERR definitions. +#define CACHERR_ER 0x80000000 // 0: inst ref, 1: data ref +#define CACHERR_EC 0x40000000 // 0: primary, 1: secondary +#define CACHERR_ED 0x20000000 // 1: data error +#define CACHERR_ET 0x10000000 // 1: tag error +#define CACHERR_ES 0x08000000 // 1: external ref, e.g. snoo +#define CACHERR_EE 0x04000000 // error on SysAD bus +#define CACHERR_EB 0x02000000 // complicated, see spec. +#define CACHERR_EI 0x01000000 // complicated, see spec. +#define CACHERR_SIDX_MASK 0x003FFFF8 // secondary cache index +#define CACHERR_PIDX_MASK 0x00000007 // primary cache index +#define CACHERR_PIDX_SHIFT 12 // bits 2..0 are paddr14..12 + +/* + * R4000 family supports hardware watchpoints: + * C0_WATCHLO: + * bits 31..3 are bits 31..3 of physaddr to watch + * bit 2: reserved; must be written as 0. + * bit 1: when set causes a watchpoint trap on load accesses to paddr. + * bit 0: when set traps on stores to paddr; + * C0_WATCHHI + * bits 31..4 are reserved and must be written as zeros. + * bits 3..0 are bits 35..32 of the physaddr to watch + */ +#define WATCHLO_WTRAP 0x00000001 +#define WATCHLO_RTRAP 0x00000002 +#define WATCHLO_ADDRMASK 0xFFFFFFF8 +#define WATCHLO_VALIDMASK 0xFFFFFFFB +#define WATCHHI_VALIDMASK 0x0000000F + +// Coprocessor 0 registers +#define C0_INX C_REG(0) +#define C0_RAND C_REG(1) +#define C0_ENTRYLO0 C_REG(2) +#define C0_ENTRYLO1 C_REG(3) +#define C0_CONTEXT C_REG(4) +#define C0_PAGEMASK C_REG(5) // page mask +#define C0_WIRED C_REG(6) // # wired entries in tlb +#define C0_BADVADDR C_REG(8) +#define C0_COUNT C_REG(9) // free-running counter +#define C0_ENTRYHI C_REG(10) +#define C0_COMPARE C_REG(11) // counter comparison reg. +#define C0_SR C_REG(12) +#define C0_CAUSE C_REG(13) +#define C0_EPC C_REG(14) +#define C0_PRID C_REG(15) // revision identifier +#define C0_CONFIG C_REG(16) // hardware configuration +#define C0_LLADDR C_REG(17) // load linked address +#define C0_WATCHLO C_REG(18) // watchpoint +#define C0_WATCHHI C_REG(19) // watchpoint +#define C0_ECC C_REG(26) // S-cache ECC and primary parity +#define C0_CACHE_ERR C_REG(27) // cache error status +#define C0_TAGLO C_REG(28) // cache operations +#define C0_TAGHI C_REG(29) // cache operations +#define C0_ERROR_EPC C_REG(30) // ECC error prg. counter + +// floating-point status register +#define C1_FPCSR C_REG(31) + +#define FPCSR_FS 0x01000000 // flush denorm to zero +#define FPCSR_C 0x00800000 // condition bit +#define FPCSR_CE 0x00020000 // cause: unimplemented operation +#define FPCSR_CV 0x00010000 // cause: invalid operation +#define FPCSR_CZ 0x00008000 // cause: division by zero +#define FPCSR_CO 0x00004000 // cause: overflow +#define FPCSR_CU 0x00002000 // cause: underflow +#define FPCSR_CI 0x00001000 // cause: inexact operation +#define FPCSR_EV 0x00000800 // enable: invalid operation +#define FPCSR_EZ 0x00000400 // enable: division by zero +#define FPCSR_EO 0x00000200 // enable: overflow +#define FPCSR_EU 0x00000100 // enable: underflow +#define FPCSR_EI 0x00000080 // enable: inexact operation +#define FPCSR_FV 0x00000040 // flag: invalid operation +#define FPCSR_FZ 0x00000020 // flag: division by zero +#define FPCSR_FO 0x00000010 // flag: overflow +#define FPCSR_FU 0x00000008 // flag: underflow +#define FPCSR_FI 0x00000004 // flag: inexact operation +#define FPCSR_RM_MASK 0x00000003 // rounding mode mask +#define FPCSR_RM_RN 0x00000000 // round to nearest +#define FPCSR_RM_RZ 0x00000001 // round to zero +#define FPCSR_RM_RP 0x00000002 // round to positive infinity +#define FPCSR_RM_RM 0x00000003 // round to negative infinity + +#endif diff --git a/include/variables.h b/include/variables.h index c40ba1084..20b42dd71 100644 --- a/include/variables.h +++ b/include/variables.h @@ -35,8 +35,8 @@ extern const char* sCpuExceptions[18]; extern const char* sFpuExceptions[6]; extern FaultDrawer* sFaultDrawContext; extern FaultDrawer sFaultDrawerDefault; -// extern UNK_TYPE4 D_80096C20; -extern UNK_TYPE4 D_80096C30; +extern s32 gLoadLogSeverity; +extern s32 gLoad2LogSeverity; extern StackEntry* sStackInfoListStart; extern StackEntry* sStackInfoListEnd; // extern UNK_TYPE1 sGfxPrintFontTLUT; diff --git a/include/z64.h b/include/z64.h index a54ef333a..a0119f2f8 100644 --- a/include/z64.h +++ b/include/z64.h @@ -290,15 +290,6 @@ typedef enum IRQ_TYPE { IRQ_PRENMI_4 = 0x2A1 } IRQ_TYPE; -typedef struct { - /* 0x00 */ u32 textSize; - /* 0x04 */ u32 dataSize; - /* 0x08 */ u32 rodataSize; - /* 0x0C */ u32 bssSize; - /* 0x10 */ u32 nRelocations; - /* 0x14 */ u32 relocations[1]; -} OverlayRelocationSection; // size >= 0x18 - typedef struct { /* 0x00 */ u32 resetting; /* 0x04 */ u32 resetCount; diff --git a/include/z64load.h b/include/z64load.h new file mode 100644 index 000000000..8570faf25 --- /dev/null +++ b/include/z64load.h @@ -0,0 +1,31 @@ +#ifndef Z64LOAD_H +#define Z64LOAD_H + +#include "PR/ultratypes.h" + +#define RELOCATE_ADDR(addr, vRamStart, allocu32) ((addr) - (vRamStart) + (allocu32)) + +#define RELOC_SECTION(reloc) ((reloc) >> 30) +#define RELOC_OFFSET(reloc) ((reloc) & 0xFFFFFF) +#define RELOC_TYPE_MASK(reloc) ((reloc) & 0x3F000000) +#define RELOC_TYPE_SHIFT 24 + +/* MIPS Relocation Types */ +#define R_MIPS_32 2 +#define R_MIPS_26 4 +#define R_MIPS_HI16 5 +#define R_MIPS_LO16 6 + +typedef struct { + /* 0x00 */ u32 textSize; + /* 0x04 */ u32 dataSize; + /* 0x08 */ u32 rodataSize; + /* 0x0C */ u32 bssSize; + /* 0x10 */ u32 nRelocations; + /* 0x14 */ u32 relocations[1]; +} OverlayRelocationSection; // size >= 0x18 + +size_t Load2_LoadOverlay(uintptr_t vRomStart, uintptr_t vRomEnd, uintptr_t vRamStart, uintptr_t vRamEnd, void* allocatedVRamAddr); +void* Load2_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, uintptr_t vRamStart, uintptr_t vRamEnd); + +#endif diff --git a/spec b/spec index 54a782654..cdf130b1d 100644 --- a/spec +++ b/spec @@ -29,7 +29,6 @@ beginseg include "build/src/boot_O2_g3/fault_drawer.o" include "build/src/boot_O2/boot_80084940.o" include "build/src/boot_O2/loadfragment.o" - include "build/data/boot/loadfragment.data.o" include "build/src/boot_O2/loadfragment2.o" include "build/src/boot_O2/padutils.o" include "build/src/boot_O2/stackcheck.o" diff --git a/src/boot_O2/loadfragment.c b/src/boot_O2/loadfragment.c index 968fb464a..17ed6b8ef 100644 --- a/src/boot_O2/loadfragment.c +++ b/src/boot_O2/loadfragment.c @@ -1,8 +1,190 @@ +/** + * @file loadfragment.c + * + * Functions used to process and relocate overlays + * + * @note: + * These are completly unused in favor of the functions in `loadfragment2.c`. + * + * The main difference between them seems to be the lack of vRamEnd arguments here. + * Instead they are calculated on the fly. + */ #include "global.h" #include "system_malloc.h" +#include "z64load.h" -#pragma GLOBAL_ASM("asm/non_matchings/boot/loadfragment/Load_Relocate.s") +s32 gLoadLogSeverity = 2; -#pragma GLOBAL_ASM("asm/non_matchings/boot/loadfragment/Load_LoadOverlay.s") +void Load_Relocate(void* allocatedVRamAddr, OverlayRelocationSection* ovl, uintptr_t vRamStart) { + u32 sections[4]; + u32* relocDataP; + u32 reloc; + uintptr_t relocatedAddress; + u32 i; + u32* luiInstRef; + uintptr_t allocu32 = (uintptr_t)allocatedVRamAddr; + u32* regValP; + u32* luiRefs[32]; + u32 luiVals[32]; + u32 isLoNeg; -#pragma GLOBAL_ASM("asm/non_matchings/boot/loadfragment/Load_AllocateAndLoad.s") + if (gLoadLogSeverity >= 3) {} + + sections[0] = 0; + sections[1] = allocu32; + sections[2] = allocu32 + ovl->textSize; + sections[3] = sections[2] + ovl->dataSize; + + for (i = 0; i < ovl->nRelocations; i++) { + reloc = ovl->relocations[i]; + relocDataP = (u32*)(sections[RELOC_SECTION(reloc)] + RELOC_OFFSET(reloc)); + + switch (RELOC_TYPE_MASK(reloc)) { + case R_MIPS_32 << RELOC_TYPE_SHIFT: + // Handles 32-bit address relocation, used for things such as jump tables and pointers in data. + // Just relocate the full address + + // Check address is valid for relocation + if ((*relocDataP & 0x0F000000) == 0) { + *relocDataP = RELOCATE_ADDR(*relocDataP, vRamStart, allocu32); + } else if (gLoadLogSeverity >= 3) { + } + break; + + case R_MIPS_26 << RELOC_TYPE_SHIFT: + // Handles 26-bit address relocation, used for jumps and jals. + // Extract the address from the target field of the J-type MIPS instruction. + // Relocate the address and update the instruction. + + *relocDataP = + (*relocDataP & 0xFC000000) | + ((RELOCATE_ADDR(PHYS_TO_K0((*relocDataP & 0x03FFFFFF) << 2), vRamStart, allocu32) & 0x0FFFFFFF) >> + 2); + break; + + case R_MIPS_HI16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 1. + // Store the reference to the LUI instruction (hi) using the `rt` register of the instruction. + // This will be updated later in the `R_MIPS_LO16` section. + + luiRefs[(*relocDataP >> 0x10) & 0x1F] = relocDataP; + luiVals[(*relocDataP >> 0x10) & 0x1F] = *relocDataP; + break; + + case R_MIPS_LO16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 2. + // Grab the stored LUI (hi) from the `R_MIPS_HI16` section using the `rs` register of the instruction. + // The full address is calculated, relocated, and then used to update both the LUI and lo instructions. + // If the lo part is negative, add 1 to the LUI value. + // Note: The lo instruction is assumed to have a signed immediate. + + luiInstRef = luiRefs[(*relocDataP >> 0x15) & 0x1F]; + regValP = &luiVals[(*relocDataP >> 0x15) & 0x1F]; + + // Check address is valid for relocation + if ((((*luiInstRef << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) { + relocatedAddress = RELOCATE_ADDR((*regValP << 0x10) + (s16)*relocDataP, vRamStart, allocu32); + isLoNeg = (relocatedAddress & 0x8000) ? 1 : 0; + *luiInstRef = (*luiInstRef & 0xFFFF0000) | (((relocatedAddress >> 0x10) & 0xFFFF) + isLoNeg); + *relocDataP = (*relocDataP & 0xFFFF0000) | (relocatedAddress & 0xFFFF); + } else if (gLoadLogSeverity >= 3) { + } + break; + } + } +} + +size_t Load_LoadOverlay(uintptr_t vRomStart, uintptr_t vRomEnd, uintptr_t vRamStart, void* allocatedVRamAddr, + size_t allocatedBytes) { + size_t size = vRomEnd - vRomStart; + void* end; + s32 pad; + OverlayRelocationSection* ovl; + + if (gLoadLogSeverity >= 3) {} + if (gLoadLogSeverity >= 3) {} + + end = (uintptr_t)allocatedVRamAddr + size; + DmaMgr_SendRequest0(allocatedVRamAddr, vRomStart, size); + + ovl = (OverlayRelocationSection*)((uintptr_t)end - ((s32*)end)[-1]); + + if (gLoadLogSeverity >= 3) {} + + if (allocatedBytes < ovl->bssSize + size) { + if (gLoadLogSeverity >= 3) {} + return 0; + } + + allocatedBytes = ovl->bssSize + size; + + if (gLoadLogSeverity >= 3) {} + + Load_Relocate(allocatedVRamAddr, ovl, vRamStart); + + if (ovl->bssSize != 0) { + if (gLoadLogSeverity >= 3) {} + bzero(end, ovl->bssSize); + } + + osWritebackDCache(allocatedVRamAddr, allocatedBytes); + osInvalICache(allocatedVRamAddr, allocatedBytes); + + if (gLoadLogSeverity >= 3) {} + + return allocatedBytes; +} + +void* Load_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, uintptr_t vRamStart) { + size_t size = vRomEnd - vRomStart; + void* end; + void* allocatedVRamAddr; + uintptr_t ovlOffset; + OverlayRelocationSection* ovl; + size_t allocatedBytes; + + if (gLoadLogSeverity >= 3) {} + + allocatedVRamAddr = SystemArena_MallocR(size); + end = (uintptr_t)allocatedVRamAddr + size; + + if (gLoadLogSeverity >= 3) {} + + DmaMgr_SendRequest0(allocatedVRamAddr, vRomStart, size); + + if (gLoadLogSeverity >= 3) {} + + ovlOffset = (uintptr_t)end - 4; + ovl = (OverlayRelocationSection*)((uintptr_t)end - ((s32*)end)[-1]); + + if (1) {} + + allocatedBytes = ovl->bssSize + size; + + allocatedVRamAddr = SystemArena_Realloc(allocatedVRamAddr, allocatedBytes); + + if (gLoadLogSeverity >= 3) {} + + if (allocatedVRamAddr == NULL) { + if (gLoadLogSeverity >= 3) {} + return allocatedVRamAddr; + } + + end = (uintptr_t)allocatedVRamAddr + size; + ovl = (OverlayRelocationSection*)((uintptr_t)end - *(uintptr_t*)ovlOffset); + + if (gLoadLogSeverity >= 3) {} + + Load_Relocate(allocatedVRamAddr, ovl, vRamStart); + + if (ovl->bssSize != 0) { + if (gLoadLogSeverity >= 3) {} + bzero(end, ovl->bssSize); + } + + osInvalICache(allocatedVRamAddr, allocatedBytes); + + if (gLoadLogSeverity >= 3) {} + + return allocatedVRamAddr; +} diff --git a/src/boot_O2/loadfragment2.c b/src/boot_O2/loadfragment2.c index c324b80d5..51b651c2b 100644 --- a/src/boot_O2/loadfragment2.c +++ b/src/boot_O2/loadfragment2.c @@ -1,120 +1,134 @@ +/** + * @file loadfragment2.c + * + * Functions used to process and relocate overlays + * + */ #include "global.h" #include "system_malloc.h" +#include "z64load.h" -UNK_TYPE4 D_80096C30 = 2; +s32 gLoad2LogSeverity = 2; -#ifdef NON_MATCHING -// This needs lots of work. Mostly regalloc and getting the address of D_80096C30 placed in s5 at the beginning of the -// function -void Load2_Relocate(u32 allocatedVRamAddr, OverlayRelocationSection* overlayInfo, u32 vRamStart) { - s32 sectionLocations[4]; - u32* regReferences[32]; - u32 regValues[32]; +void Load2_Relocate(void* allocatedVRamAddr, OverlayRelocationSection* ovl, uintptr_t vRamStart) { + u32 sections[4]; + u32* relocDataP; + u32 reloc; + uintptr_t relocatedAddress; u32 i; - u32 relocatedAddress; - s32 signedOffset; - u32* lastInst; - u32* inst; - u32 relocation; - u32 relocationIndex; + u32* luiInstRef; + uintptr_t allocu32 = (uintptr_t)allocatedVRamAddr; + u32* regValP; + u32* luiRefs[32]; + u32 luiVals[32]; + u32 isLoNeg; - sectionLocations[0] = 0; - sectionLocations[1] = allocatedVRamAddr; - sectionLocations[2] = overlayInfo->textSize + allocatedVRamAddr; - sectionLocations[3] = sectionLocations[2] + overlayInfo->dataSize; - for (i = 0, relocationIndex = 0; i < overlayInfo->nRelocations; relocationIndex++) { - relocation = overlayInfo->relocations[relocationIndex]; - i++; - inst = (u32*)(sectionLocations[relocation >> 0x1e] + (relocation & 0xffffff)); + if (gLoad2LogSeverity >= 3) {} - switch (relocation & 0x3f000000) { - case 0x2000000: - if ((*inst & 0xf000000) == 0) { - *inst = (*inst - vRamStart) + allocatedVRamAddr; + sections[0] = 0; + sections[1] = allocu32; + sections[2] = allocu32 + ovl->textSize; + sections[3] = sections[2] + ovl->dataSize; + + for (i = 0; i < ovl->nRelocations; i++) { + reloc = ovl->relocations[i]; + relocDataP = (u32*)(sections[RELOC_SECTION(reloc)] + RELOC_OFFSET(reloc)); + + switch (RELOC_TYPE_MASK(reloc)) { + case R_MIPS_32 << RELOC_TYPE_SHIFT: + // Handles 32-bit address relocation, used for things such as jump tables and pointers in data. + // Just relocate the full address + + // Check address is valid for relocation + if ((*relocDataP & 0x0F000000) == 0) { + *relocDataP = RELOCATE_ADDR(*relocDataP, vRamStart, allocu32); + } else if (gLoad2LogSeverity >= 3) { } - /* - else { - if (D_80096C30 > 2) { - ; - } - } - */ break; - case 0x4000000: - *inst = - (*inst & 0xfc000000) | - ((((((*inst & 0x3ffffff) << 2 | 0x80000000) - vRamStart) + allocatedVRamAddr) & 0xfffffff) >> 2); + + case R_MIPS_26 << RELOC_TYPE_SHIFT: + // Handles 26-bit address relocation, used for jumps and jals. + // Extract the address from the target field of the J-type MIPS instruction. + // Relocate the address and update the instruction. + + *relocDataP = + (*relocDataP & 0xFC000000) | + ((RELOCATE_ADDR(PHYS_TO_K0((*relocDataP & 0x03FFFFFF) << 2), vRamStart, allocu32) & 0x0FFFFFFF) >> + 2); break; - case 0x5000000: - regReferences[*inst >> 0x10 & 0x1f] = inst; - regValues[*inst >> 0x10 & 0x1f] = *inst; + + case R_MIPS_HI16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 1. + // Store the reference to the LUI instruction (hi) using the `rt` register of the instruction. + // This will be updated later in the `R_MIPS_LO16` section. + + luiRefs[(*relocDataP >> 0x10) & 0x1F] = relocDataP; + luiVals[(*relocDataP >> 0x10) & 0x1F] = *relocDataP; break; - case 0x6000000: - lastInst = regReferences[*inst >> 0x15 & 0x1f]; - signedOffset = (s16)*inst; - if (((signedOffset + *lastInst * 0x10000) & 0xf000000) == 0) { - relocatedAddress = - ((signedOffset + regValues[*inst >> 0x15 & 0x1f] * 0x10000) - vRamStart) + allocatedVRamAddr; - *lastInst = (((relocatedAddress >> 0x10) & 0xFFFF) + ((relocatedAddress & 0x8000) ? 1 : 0)) | - (*lastInst & 0xffff0000); - *inst = (*inst & 0xffff0000) | (relocatedAddress & 0xffff); + + case R_MIPS_LO16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 2. + // Grab the stored LUI (hi) from the `R_MIPS_HI16` section using the `rs` register of the instruction. + // The full address is calculated, relocated, and then used to update both the LUI and lo instructions. + // If the lo part is negative, add 1 to the LUI value. + // Note: The lo instruction is assumed to have a signed immediate. + + luiInstRef = luiRefs[(*relocDataP >> 0x15) & 0x1F]; + regValP = &luiVals[(*relocDataP >> 0x15) & 0x1F]; + + // Check address is valid for relocation + if ((((*luiInstRef << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) { + relocatedAddress = RELOCATE_ADDR((*regValP << 0x10) + (s16)*relocDataP, vRamStart, allocu32); + isLoNeg = (relocatedAddress & 0x8000) ? 1 : 0; + *luiInstRef = (*luiInstRef & 0xFFFF0000) | (((relocatedAddress >> 0x10) & 0xFFFF) + isLoNeg); + *relocDataP = (*relocDataP & 0xFFFF0000) | (relocatedAddress & 0xFFFF); + } else if (gLoad2LogSeverity >= 3) { } break; } } } -#else -#pragma GLOBAL_ASM("asm/non_matchings/boot/loadfragment2/Load2_Relocate.s") -#endif -#ifdef NON_MATCHING -// Very minor stack stuff with a saved value -s32 Load2_LoadOverlay(u32 vRomStart, u32 vRomEnd, u32 vRamStart, u32 vRamEnd, u32 allocatedVRamAddr) { - int nbytes; - u32 pad; - size_t size; +size_t Load2_LoadOverlay(uintptr_t vRomStart, uintptr_t vRomEnd, uintptr_t vRamStart, uintptr_t vRamEnd, + void* allocatedVRamAddr) { + s32 pad[2]; + s32 size = vRomEnd - vRomStart; void* end; - OverlayRelocationSection* overlayInfo; + OverlayRelocationSection* ovl; - size = vRomEnd - vRomStart; - - if (1) { - ; - } + if (gLoad2LogSeverity >= 3) {} + if (gLoad2LogSeverity >= 3) {} + end = (uintptr_t)allocatedVRamAddr + size; DmaMgr_SendRequest0(allocatedVRamAddr, vRomStart, size); - end = (void*)(allocatedVRamAddr + size); - overlayInfo = (OverlayRelocationSection*)((int)end - *(int*)((int)end + -4)); + ovl = (OverlayRelocationSection*)((uintptr_t)end - ((s32*)end)[-1]); - if (1) { - ; + if (gLoad2LogSeverity >= 3) {} + if (gLoad2LogSeverity >= 3) {} + + Load2_Relocate(allocatedVRamAddr, ovl, vRamStart); + + if (ovl->bssSize != 0) { + if (gLoad2LogSeverity >= 3) {} + bzero(end, ovl->bssSize); } - Load2_Relocate(allocatedVRamAddr, overlayInfo, vRamStart); - - if (overlayInfo->bssSize != 0) { - bzero(end, overlayInfo->bssSize); - } - - nbytes = vRamEnd - vRamStart; - - osWritebackDCache((void*)allocatedVRamAddr, nbytes); - osInvalICache((void*)allocatedVRamAddr, nbytes); - return nbytes; -} -#else -#pragma GLOBAL_ASM("asm/non_matchings/boot/loadfragment2/Load2_LoadOverlay.s") -#endif - -void* Overlay_AllocateAndLoad(u32 vRomStart, u32 vRomEnd, u32 vRamStart, u32 vRamEnd) { - void* allocatedVRamAddr; - size_t size; - size = vRamEnd - vRamStart; - allocatedVRamAddr = SystemArena_MallocR(size); + + osWritebackDCache(allocatedVRamAddr, size); + osInvalICache(allocatedVRamAddr, size); + + if (gLoad2LogSeverity >= 3) {} + + return size; +} + +void* Load2_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, uintptr_t vRamStart, uintptr_t vRamEnd) { + void* allocatedVRamAddr = SystemArena_MallocR(vRamEnd - vRamStart); + if (allocatedVRamAddr != NULL) { - Load2_LoadOverlay(vRomStart, vRomEnd, vRamStart, vRamEnd, (u32)allocatedVRamAddr); + Load2_LoadOverlay(vRomStart, vRomEnd, vRamStart, vRamEnd, allocatedVRamAddr); } return allocatedVRamAddr; diff --git a/src/code/z_DLF.c b/src/code/z_DLF.c index 4c57aca01..862463b76 100644 --- a/src/code/z_DLF.c +++ b/src/code/z_DLF.c @@ -1,5 +1,6 @@ #include "global.h" #include "system_malloc.h" +#include "z64load.h" #pragma GLOBAL_ASM("asm/non_matchings/code/z_DLF/Overlay_LoadGameState.s") diff --git a/src/code/z_actor.c b/src/code/z_actor.c index 9325d315c..bbabeafd5 100644 --- a/src/code/z_actor.c +++ b/src/code/z_actor.c @@ -4,6 +4,7 @@ */ #include "global.h" +#include "z64load.h" #include "overlays/actors/ovl_En_Horse/z_en_horse.h" #include "overlays/actors/ovl_En_Part/z_en_part.h" #include "overlays/actors/ovl_En_Box/z_en_box.h" diff --git a/src/code/z_effect_soft_sprite.c b/src/code/z_effect_soft_sprite.c index fd1d0212c..56eab9e8a 100644 --- a/src/code/z_effect_soft_sprite.c +++ b/src/code/z_effect_soft_sprite.c @@ -1,4 +1,5 @@ #include "global.h" +#include "z64load.h" EffectSsInfo sEffectSsInfo = { NULL, 0, 0 }; diff --git a/src/code/z_kaleido_manager.c b/src/code/z_kaleido_manager.c index 5e89f49e9..73656d65d 100644 --- a/src/code/z_kaleido_manager.c +++ b/src/code/z_kaleido_manager.c @@ -1,4 +1,5 @@ #include "global.h" +#include "z64load.h" #define KALEIDO_OVERLAY(name) \ { \ diff --git a/src/code/z_overlay.c b/src/code/z_overlay.c index f635331c9..a358cefe6 100644 --- a/src/code/z_overlay.c +++ b/src/code/z_overlay.c @@ -1,4 +1,5 @@ #include "global.h" +#include "z64load.h" #pragma GLOBAL_ASM("asm/non_matchings/code/z_overlay/func_801651B0.s") diff --git a/tools/disasm/functions.txt b/tools/disasm/functions.txt index dbe8f14a6..f037221be 100644 --- a/tools/disasm/functions.txt +++ b/tools/disasm/functions.txt @@ -114,7 +114,7 @@ 0x80084CD0:("Load_AllocateAndLoad",), 0x80084DB0:("Load2_Relocate",), 0x8008501C:("Load2_LoadOverlay",), - 0x800850C8:("Overlay_AllocateAndLoad",), + 0x800850C8:("Load2_AllocateAndLoad",), 0x80085130:("PadUtils_Init",), 0x80085150:("func_80085150",), 0x80085158:("PadUtils_ResetPressRel",), diff --git a/tools/disasm/variables.txt b/tools/disasm/variables.txt index 1223f31fa..e0bf5a65a 100644 --- a/tools/disasm/variables.txt +++ b/tools/disasm/variables.txt @@ -27,8 +27,8 @@ 0x80096BC8:("sFpuExceptions","char*","[6]",0x18), 0x80096BE0:("sFaultDrawContext","FaultDrawer*","",0x4), 0x80096BE4:("sFaultDrawerDefault","FaultDrawer","",0x3c), - 0x80096C20:("D_80096C20","UNK_TYPE4","",0x4), - 0x80096C30:("D_80096C30","UNK_TYPE4","",0x4), + 0x80096C20:("gLoadLogSeverity","UNK_TYPE4","",0x4), + 0x80096C30:("gLoad2LogSeverity","UNK_TYPE4","",0x4), 0x80096C40:("sStackInfoListStart","StackEntry*","",0x4), 0x80096C44:("sStackInfoListEnd","StackEntry*","",0x4), 0x80096C50:("sGfxPrintFontTLUT","u16","[64]",0x80),