diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4e041896..b9cca02c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,22 +6,22 @@ on: jobs: build: + name: Build runs-on: ubuntu-latest - container: devkitpro/devkitppc:latest - env: - WINEPREFIX: ${{github.workspace}}/.wine + container: ghcr.io/projectpiki/build:main + strategy: + fail-fast: false + matrix: + version: [usa.1] steps: - - name: Install devkitPro - run: | - sudo dpkg --add-architecture i386 - sudo apt-get update - sudo apt-get -y install build-essential wine32 - sudo chown $(whoami) "$GITHUB_WORKSPACE" - - uses: actions/checkout@v3 - - name: Download compilers - run: | - curl -L https://cdn.discordapp.com/attachments/727918646525165659/917185027656286218/GC_WII_COMPILERS.zip \ - | bsdtar -xvf- -C tools --exclude Wii - mv tools/GC tools/mwcc_compiler - - name: make - run: make -j + - name: Checkout + uses: actions/checkout@v3 + - name: Git config + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + - name: Build + run: make -j$(nproc) VERSION=${{matrix.version}} COMPILERS=/compilers/GC + - name: Upload map + uses: actions/upload-artifact@v3 + with: + name: pikmin-${{matrix.version}}.map + path: build/*/build.map diff --git a/.gitignore b/.gitignore index 057252fe..a6484a24 100644 --- a/.gitignore +++ b/.gitignore @@ -20,8 +20,8 @@ *.7z *.bat build -epilogue -tools/mwcc_compiler/ +tools/mwcc_compiler/* +!tools/mwcc_compiler/.gitkeep *.sln *.vcxproj *.user diff --git a/Makefile b/Makefile index a10bbc04..57be9aba 100644 --- a/Makefile +++ b/Makefile @@ -19,13 +19,7 @@ NAME := pikmin VERSION := usa.1 #VERSION := usa.0 -# Overkill epilogue fixup strategy. Set to 1 if necessary. -EPILOGUE_PROCESS := 1 - BUILD_DIR := build/$(NAME).$(VERSION) -ifeq ($(EPILOGUE_PROCESS),1) -EPILOGUE_DIR := epilogue/$(NAME).$(VERSION) -endif # Inputs S_FILES := $(wildcard asm/*.s) @@ -40,50 +34,49 @@ ELF := $(DOL:.dol=.elf) MAP := $(BUILD_DIR)/build.map include obj_files.mk -ifeq ($(EPILOGUE_PROCESS),1) -include e_files.mk -endif O_FILES := $(GROUP_0_FILES) $(SYSBOOTUP) $(JAUDIO) $(HVQM4DEC) $(SYSCOMMON) $(SYSDOLPHIN)\ $(COLIN) $(KANDO) $(NAKATA) $(NISHIMURA) $(OGAWA) $(YAMASHITA)\ $(BASE) $(OS) $(DB) $(MTX) $(DVD) $(VI) $(PAD) $(AI) $(AR) $(DSP)\ $(CARD) $(HIO) $(GX) $(RUNTIME) $(MSL_C) $(TRK_MINNOW_DOLPHIN)\ $(AMCEXI2) $(AMCNOTSTUB) $(ODEMUEXI2) $(ODENOTSTUB) -ifeq ($(EPILOGUE_PROCESS),1) -E_FILES := $(EPILOGUE_UNSCHEDULED) -endif #------------------------------------------------------------------------------- # Tools #------------------------------------------------------------------------------- MWCC_VERSION := 1.2.5 -ifeq ($(EPILOGUE_PROCESS),1) -MWCC_EPI_VERSION := 1.2.5e -MWCC_EPI_EXE := mwcceppc.exe -endif MWLD_VERSION := 1.1 +# Compiler versions and flags +$(COLIN): MWCC_VERSION := 1.2.5n +$(KANDO): MWCC_VERSION := 1.2.5n +$(NAKATA): MWCC_VERSION := 1.2.5n +$(NISHIMURA): MWCC_VERSION := 1.2.5n +$(OGAWA): MWCC_VERSION := 1.2.5n +$(YAMASHITA): MWCC_VERSION := 1.2.5n + # Programs ifeq ($(WINDOWS),1) WINE := AS := $(DEVKITPPC)/bin/powerpc-eabi-as.exe PYTHON := python else - WINE ?= wine + WIBO := $(shell command -v wibo 2> /dev/null) + ifdef WIBO + WINE ?= wibo + else + WINE ?= wine + endif AS := $(DEVKITPPC)/bin/powerpc-eabi-as PYTHON := python3 endif -CC = $(WINE) tools/mwcc_compiler/$(MWCC_VERSION)/mwcceppc.exe -ifeq ($(EPILOGUE_PROCESS),1) -CC_EPI = $(WINE) tools/mwcc_compiler/$(MWCC_EPI_VERSION)/$(MWCC_EPI_EXE) -endif -LD := $(WINE) tools/mwcc_compiler/$(MWLD_VERSION)/mwldeppc.exe +COMPILERS ?= tools/mwcc_compiler +CC = $(WINE) $(COMPILERS)/$(MWCC_VERSION)/mwcceppc.exe +LD := $(WINE) $(COMPILERS)/$(MWLD_VERSION)/mwldeppc.exe DTK := tools/dtk ELF2DOL := $(DTK) elf2dol SHASUM := $(DTK) shasum -FRANK := tools/frank.py - # Options INCLUDES := -i include/ ASM_INCLUDES := -I include/ @@ -106,18 +99,10 @@ default: all all: $(DOL) ALL_DIRS := $(sort $(dir $(O_FILES))) -ifeq ($(EPILOGUE_PROCESS),1) -EPI_DIRS := $(sort $(dir $(E_FILES))) -endif # Make sure build directory exists before compiling anything DUMMY != mkdir -p $(ALL_DIRS) -ifeq ($(EPILOGUE_PROCESS),1) -# Make sure profile directory exists before compiling anything -DUMMY != mkdir -p $(EPI_DIRS) -endif - .PHONY: tools # DOL creation makefile instructions @@ -129,24 +114,16 @@ $(DOL): $(ELF) | $(DTK) clean: rm -f -d -r build - rm -f -d -r epilogue $(DTK): tools/dtk_version @echo "Downloading $@" $(QUIET) $(PYTHON) tools/download_dtk.py $< $@ # ELF creation makefile instructions -ifeq ($(EPILOGUE_PROCESS),1) - @echo Linking ELF $@ -$(ELF): $(O_FILES) $(E_FILES) $(LDSCRIPT) - $(QUIET) @echo $(O_FILES) > build/o_files - $(QUIET) $(LD) $(LDFLAGS) -o $@ -lcf $(LDSCRIPT) @build/o_files -else $(ELF): $(O_FILES) $(LDSCRIPT) @echo Linking ELF $@ $(QUIET) @echo $(O_FILES) > build/o_files $(QUIET) $(LD) $(LDFLAGS) -o $@ -lcf $(LDSCRIPT) @build/o_files -endif $(BUILD_DIR)/%.o: %.s @echo Assembling $< @@ -164,26 +141,6 @@ $(BUILD_DIR)/%.o: %.cpp @echo "Compiling " $< $(QUIET) $(CC) $(CFLAGS) -c -o $@ $< -ifeq ($(EPILOGUE_PROCESS),1) -$(EPILOGUE_DIR)/%.o: %.c $(BUILD_DIR)/%.o - @echo Frank is fixing $< - $(QUIET) $(CC_EPI) $(CFLAGS) -c -o $@ $< - $(QUIET) $(PYTHON) $(FRANK) $(word 2,$^) $@ $(word 2,$^) - $(QUIET) touch $@ - -$(EPILOGUE_DIR)/%.o: %.cp $(BUILD_DIR)/%.o - @echo Frank is fixing $< - $(QUIET) $(CC_EPI) $(CFLAGS) -c -o $@ $< - $(QUIET) $(PYTHON) $(FRANK) $(word 2,$^) $@ $(word 2,$^) - $(QUIET) touch $@ - -$(EPILOGUE_DIR)/%.o: %.cpp $(BUILD_DIR)/%.o - @echo Frank is fixing $< - $(QUIET) $(CC_EPI) $(CFLAGS) -c -o $@ $< - $(QUIET) $(PYTHON) $(FRANK) $(word 2,$^) $@ $(word 2,$^) - $(QUIET) touch $@ -endif - ### Debug Print ### print-% : ; $(info $* is a $(flavor $*) variable set to [$($*)]) @true diff --git a/README.MD b/README.MD index c5e52bf7..8fc35013 100644 --- a/README.MD +++ b/README.MD @@ -16,11 +16,6 @@ It builds the following DOL: pikmin.usa.1.dol: `sha1: 02204260B7EFE8742D34572E58BA3DFECD92E4E9` ## Building -This edited compiler modifies the epilogue in such a way as to approximate older scheduling models. - -In this case, the epilogue should remain unscheduled. - -tools/frank.py cleans up the output. ### Required Tools * [devkitPro](https://devkitpro.org/wiki/Getting_Started) diff --git a/e_files.mk b/e_files.mk deleted file mode 100644 index a1da5edb..00000000 --- a/e_files.mk +++ /dev/null @@ -1,8 +0,0 @@ -# Files with unscheduled epilogues. - -EPILOGUE_UNSCHEDULED:=\ - $(EPILOGUE_DIR)/src/plugPikiNakata/tekievent.o\ - $(EPILOGUE_DIR)/src/plugPikiYamashita/TAIanimation.o\ - $(EPILOGUE_DIR)/src/plugPikiYamashita/ptclGenPack.o\ - $(EPILOGUE_DIR)/src/plugPikiKando/objectTypes.o\ - $(EPILOGUE_DIR)/src/plugPikiKando/globalShapes.o\ diff --git a/tools/frank.py b/tools/frank.py deleted file mode 100644 index 07c53394..00000000 --- a/tools/frank.py +++ /dev/null @@ -1,221 +0,0 @@ -#! /usr/bin/env python3 - -# Written by Ethan Roseman (ethteck) -# MIT License -# Copyright 2021 - -# Modified by EpochFlame - -import argparse -import sys - -# Byte sequence that marks code size -CODESIZE_MAGIC = b"\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x34" -BLR_BYTE_SEQ = b"\x4E\x80\x00\x20" -MTLR_BYTE_SEQ = b"\x7C\x08\x03\xA6" -PROFILE_EXTRA_BYTES = b"\x48\x00\x00\x01\x60\x00\x00\x00" - -LWZ_BYTE = b"\x80" - -# Byte sequence array for branches to link register -BLR_BYTE_SEQ_ARRAY = [BLR_BYTE_SEQ, -b"\x4D\x80\x00\x20", b"\x4D\x80\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21", -b"\x4D\x82\x00\x20", b"\x4D\x82\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21", -b"\x4D\x81\x00\x20", b"\x4D\x81\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21", -b"\x4C\x82\x00\x20", b"\x4C\x82\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21", -b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21", -b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21"] - -# Example invocation: ./frank.py vanilla.o profile.o output.o -parser = argparse.ArgumentParser() -parser.add_argument("vanilla", help="Path to the vanilla object", type=argparse.FileType('rb')) -parser.add_argument("profile", help="Path to the profile object", type=argparse.FileType('rb')) -parser.add_argument("target", help="Path to the target object (to write)") - -args = parser.parse_args() - -# Read contents into bytearrays and close files -vanilla_bytes = args.vanilla.read() -args.vanilla.close() - -# If the file contains no code, the codesize magic will not be found. -# The vanilla object requires no modification. -code_size_magic_idx = vanilla_bytes.find(CODESIZE_MAGIC) -if code_size_magic_idx == -1: - with open(args.target, "wb") as f: - f.write(vanilla_bytes) - sys.exit(0) - -profile_bytes = args.profile.read() -args.profile.close() - -# Peephole rescheduling -# -# This is the pattern we will detect: -# (A) lwz <--. .--> (A) li -# (B) li <---\-' bl -# \ nop -# '---> (B) lwz -# -# If the profiled schedule swaps the -# instructions around the bl/nop, we -# instead use the vanilla schedule. -# -idx = 8 -shift = 0 # difference between vanilla and profile code, due to bl/nops -while idx < len(profile_bytes) - 16: - # Find next epilogue - epi_pos = profile_bytes.find(PROFILE_EXTRA_BYTES, idx) - if epi_pos == -1: - break # break while loop when no targets remain - if epi_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - - v_pos = epi_pos - shift - shift += 8 - - vanilla_inst_a = vanilla_bytes[v_pos-4:v_pos] - vanilla_inst_b = vanilla_bytes[v_pos:v_pos+4] - vanilla_inst_c = vanilla_bytes[v_pos+4:v_pos+8] - profile_inst_a = profile_bytes[epi_pos-4:epi_pos] - profile_inst_b = profile_bytes[epi_pos+8:epi_pos+12] - profile_inst_c = profile_bytes[epi_pos+12:epi_pos+16] - - # Instruction decoding - as_int = lambda x: int.from_bytes(x, "big") - RT = lambda x: (as_int(x) >> 21) & 0x1F - RA = lambda x: (as_int(x) >> 16) & 0x1F - - opcode_a = vanilla_inst_a[0] >> 2 - opcode_b = vanilla_inst_b[0] >> 2 - opcode_c = vanilla_inst_c[0] >> 2 - - LWZ = 0x80 >> 2 - LFS = 0xC0 >> 2 - ADDI = 0x38 >> 2 - LI = ADDI # an LI instruction is just an ADDI with RA=0 - LMW = 0xB8 >> 2 - FDIVS = 0xEC >> 2 - - if opcode_a == LWZ and \ - opcode_b in [LI, LFS, FDIVS] and \ - vanilla_inst_a == profile_inst_b and \ - vanilla_inst_b == profile_inst_a and \ - vanilla_inst_c == profile_inst_c and \ - not (opcode_a == LWZ and RA(vanilla_inst_a) == 1 and - opcode_b == ADDI and RA(vanilla_inst_b) != 0) and \ - opcode_c != ADDI: # <- don't reorder if at the very end of the epilogue - - # Swap instructions (A) and (B) - profile_bytes = profile_bytes[:epi_pos-4] \ - + vanilla_inst_a \ - + PROFILE_EXTRA_BYTES \ - + vanilla_inst_b \ - + profile_bytes[epi_pos+12:] - - # Similar reordering for lwz/lmw, except both insns follow the bl/nop - elif opcode_b == LWZ and \ - opcode_c == LMW and \ - vanilla_inst_b == profile_inst_c and \ - vanilla_inst_c == profile_inst_b: - - profile_bytes = profile_bytes[:epi_pos+8] \ - + vanilla_inst_b \ - + vanilla_inst_c \ - + profile_bytes[epi_pos+16:] - - idx = epi_pos + 8 - -# Remove byte sequence -stripped_bytes = profile_bytes.replace(PROFILE_EXTRA_BYTES, b"") - -# Find end of code sections in vanilla and stripped bytes -code_size_offset = code_size_magic_idx + len(CODESIZE_MAGIC) -code_size_bytes = vanilla_bytes[code_size_offset:code_size_offset+4] -code_size = int.from_bytes(code_size_bytes, byteorder='big') - -eoc_offset = 0x34 + code_size - -# Break if the eoc is not found -assert(eoc_offset != len(vanilla_bytes)) - -# Replace 0x34 - eoc in vanilla with bytes from stripped -final_bytes = vanilla_bytes[:0x34] + stripped_bytes[0x34:eoc_offset] + vanilla_bytes[eoc_offset:] - -# Fix branches to link register -for seq in BLR_BYTE_SEQ_ARRAY: - idx = 0 - - while idx < len(vanilla_bytes): - found_pos = vanilla_bytes.find(seq, idx) - if found_pos == -1: - break # break while loop when no targets remain - if found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - final_bytes = final_bytes[:found_pos] + vanilla_bytes[found_pos:found_pos+4] + final_bytes[found_pos+4:] - idx = found_pos + len(seq) - -# Reunify mtlr/blr instructions, shifting intermediary instructions up -idx = 0 - -while idx < len(final_bytes): - # Find mtlr position - mtlr_found_pos = final_bytes.find(MTLR_BYTE_SEQ, idx) - if mtlr_found_pos == -1: - break # break while loop when no targets remain - if mtlr_found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - # Find paired blr position - blr_found_pos = final_bytes.find(BLR_BYTE_SEQ, mtlr_found_pos) - if blr_found_pos == -1: - break # break while loop when no targets remain - if blr_found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - if mtlr_found_pos + 4 == blr_found_pos: - idx += 4 - continue # continue if mtlr is followed directly by blr - - final_bytes = final_bytes[:mtlr_found_pos] + final_bytes[mtlr_found_pos+4:blr_found_pos] + final_bytes[mtlr_found_pos:mtlr_found_pos+4] + final_bytes[blr_found_pos:] - idx = mtlr_found_pos + len(MTLR_BYTE_SEQ) - -# Reorder lmw/lwz/lfd instructions, if needed (@Altafen) -# Specifically, if this sequence shows up in the stripped profiler code: "LMW, LWZ, LFD*" -# And this sequence shows up in the vanilla code: "LWZ, LFD*, LMW" -# (LFD* = any number of LFDs, including zero) -# If all bytes match between the two (except for the reordering), then use the vanilla ordering. -# This could be written to anchor around the "BL, NOP" instructions in unstripped profiler code, -# or to check for the presence of "ADDI, MTLR, BLR" soon after. -# This also could be written to decode the operands of each instruction to make sure the reorder is harmless. -# Neither of these safeguards are necessary at the moment. -LWZ = 32 -LMW = 46 -LFD = 50 -idx = 0 -while idx+4 < len(final_bytes): - if final_bytes[idx] >> 2 == LMW and final_bytes[idx+4] >> 2 == LWZ and vanilla_bytes[idx] >> 2 == LWZ: - start_idx = idx - lmw_bytes = final_bytes[idx:idx+4] - lwz_bytes = final_bytes[idx+4:idx+8] - if vanilla_bytes[idx:idx+4] != lwz_bytes: - idx += 4 - continue - lfd_bytes = b"" - idx += 4 - while vanilla_bytes[idx] >> 2 == LFD: - lfd_bytes += vanilla_bytes[idx:idx+4] - idx += 4 - if vanilla_bytes[idx:idx+4] != lmw_bytes: - continue - if final_bytes[start_idx+8:start_idx+8+len(lfd_bytes)] != lfd_bytes: - continue - idx += 4 - final_bytes = final_bytes[:start_idx] + lwz_bytes + lfd_bytes + lmw_bytes + final_bytes[idx:] - continue - idx += 4 - -with open(args.target, "wb") as f: - f.write(final_bytes) diff --git a/tools/mwcc_compiler/.gitkeep b/tools/mwcc_compiler/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tools/postprocess.py b/tools/postprocess.py deleted file mode 100644 index 190d17f9..00000000 --- a/tools/postprocess.py +++ /dev/null @@ -1,348 +0,0 @@ -#!/usr/bin/env python3 - -BANNER = """ -# This script is the culmination of three patches supporting decompilation -# with the CodeWarrior compiler. -# - riidefi, 2020 -# -# postprocess.py [args] file -# -# 1) Certain versions have a bug where the ctor alignment is ignored and set incorrectly. -# This option is enabled with -fctor-realign, and disabled by default with -fno-ctor-realign -# -# 2) Certain C++ symbols cannot be assembled normally. -# To support the buildsystem, a simple substitution system has been devised -# -# ? -> CHAR -# -# IDs (all irregular symbols in mangled names): -# 0: < -# 1: > -# 2: @ -# 3: \\ -# 4: , -# 5: - -# -# This option is enabled with -fsymbol-fixup, and disabled by default with -fno-symbol-fixup -# -# 3) CodeWarrior versions below 2.3 used a different scheduler model. -# The script can currently adjust function epilogues with the old_stack option. -# -fepilogue-fixup=[default=none, none, old_stack] -""" - -import struct - -# Substitutions -substitutions = ( - ('<', '?0'), - ('>', '?1'), - ('@', '?2'), - ('\\', '?3'), - (',', '?4'), - ('-', '?5') -) - -def format(symbol): - for sub in substitutions: - symbol = symbol.replace(sub[0], sub[1]) - - return symbol - -def decodeformat(symbol): - for sub in substitutions: - symbol = symbol.replace(sub[1], sub[0]) - - return symbol - -# Stream utilities - -def read_u8(f): - return struct.unpack("B", f.read(1))[0] - -def read_u32(f): - return struct.unpack(">I", f.read(4))[0] - -def read_u16(f): - return struct.unpack(">H", f.read(2))[0] - -def write_u32(f, val): - f.write(struct.pack(">I", val)) - -class ToReplace: - def __init__(self, position, dest, src_size): - self.position = position # Where in file - self.dest = dest # String to patch - self.src_size = src_size # Pad rest with zeroes - - # print("To replace: %s %s %s" % (self.position, self.dest, self.src_size)) - -def read_string(f): - tmp = "" - c = 0xff - while c != 0x00: - c = read_u8(f) - if c != 0: - tmp += chr(c) - return tmp - -def ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg): - patch_align_ofs = [] - - for i in range(nSecHeader): - f.seek(ofsSecHeader + i * 0x28) - ofsname = read_u32(f) - if not ofsname: continue - - back = f.tell() - - f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10) - ofsShST = read_u32(f) - f.seek(ofsShST + ofsname) - name = read_string(f) - if name == ".ctors" or name == ".dtors": - patch_align_ofs.append(ofsSecHeader + i * 0x28 + 0x20) - - f.seek(back) - - return patch_align_ofs - -SHT_PROGBITS = 1 -SHT_STRTAB = 3 - -def impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup): - result = [] - - f.seek(0x20) - ofsSecHeader = read_u32(f) - f.seek(0x30) - nSecHeader = read_u16(f) - idxSegNameSeg = read_u16(f) - secF = False # First instance the section names - - # Header: 0x32: - patch_align_ofs = [] - - if do_ctor_realign: - patch_align_ofs = ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg) - - for i in range(nSecHeader): - f.seek(ofsSecHeader + i * 0x28) - sh_name = read_u32(f) - sh_type = read_u32(f) - - if sh_type == SHT_STRTAB and do_symbol_fixup: - if not secF: - secF = True - continue - f.seek(ofsSecHeader + i * 0x28 + 0x10) - ofs = read_u32(f) - size = read_u32(f) - - f.seek(ofs) - string = "" - str_spos = ofs - for i in range(ofs, ofs+size): - c = read_u8(f) - if c == 0: - if len(string): - fixed = decodeformat(string) - if fixed != string: - result.append(ToReplace(str_spos, fixed, len(string))) - string = "" - str_spos = i+1 - else: - string += chr(c) - else: - f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10) - ofsShST = read_u32(f) - f.seek(ofsShST + sh_name) - name = read_string(f) - - if name == ".text" and do_old_stack: - f.seek(ofsSecHeader + i * 0x28 + 0x10) - ofs = read_u32(f) - size = read_u32(f) - - # We assume - # 1) Only instructions are in the .text section - # 2) These instructions are 4-byte aligned - assert ofs != 0 - assert ofs % 4 == 0 - assert size % 4 == 0 - - f.seek(ofs) - - mtlr_pos = 0 - addi_pos = 0 - lwz_pos = 0 - mr_pos = 0 - - # (mtlr position, blr position) - epilogues = [] - - # (lwz position, mr position) - mr_epilogues = [] - - for _ in range(ofs, ofs+size, 4): - it = f.tell() - instr = read_u32(f) - - # Skip padding - if instr == 0: continue - - # Call analysis is not actually required - # No mtlr will exist without a blr; mtctr/bctr* is used for dynamic dispatch - - # FUN_A: - # li r3, 0 - # blr <---- No mtlr, move onto the next function - # FUN_B: - # ; complex function, stack manip - # mtlr r0 <---- Expect a blr - # addi r1, r1, 24 - # blr <---- Confirm patch above - - # mtlr alias for mtspr - if instr == 0x7C0803A6: - assert mtlr_pos == 0 - mtlr_pos = it - elif instr == 0x38210018: # addi r1, r1, 0x18 - if mtlr_pos == 0: - addi_pos = it - elif instr & 0xFFFFFF00 == 0x80010000 and instr & 0xFF > 0: # lwz r0, N(r1) - assert lwz_pos == 0 - lwz_pos = it - elif (instr == 0x7F83E378 or instr == 0x7FE3FB78 or instr == 0x7FC3F378 or instr == 0x7FA3EB78) and it == lwz_pos + 4: # mr r3, r28 || mr r3, r29 || mr r3, r30 || mr r3, r31 - mr_pos = it - # blr - elif instr == 0x4E800020: - if mtlr_pos: - epilogues.append((mtlr_pos, it)) - if addi_pos and mtlr_pos == 0: - epilogues.append((addi_pos, it)) - if lwz_pos and mr_pos: - mr_epilogues.append((lwz_pos, mr_pos)) - mtlr_pos = 0 - addi_pos = 0 - lwz_pos = 0 - mr_pos = 0 - - # Reunify mtlr/blr instructions, shifting intermediary instructions up - for mtlr_pos, blr_pos in epilogues: - # Check if we need to do anything - if mtlr_pos + 4 == blr_pos: continue - - # As the processor can only hold 6 instructions at once in the pipeline, - # it's unlikely for the mtlr be shifted up more instructions than that--usually, - # only one: - # mtlr r0 - # addi r1, r1, 24 - # blr - assert blr_pos - 4 > mtlr_pos - assert blr_pos - mtlr_pos <= 7 * 4 - - print("Patching old epilogue: %s %s" % (mtlr_pos, blr_pos)) - - f.seek(mtlr_pos) - mtlr = read_u32(f) - - for it in range(mtlr_pos, blr_pos - 4, 4): - f.seek(it + 4) - next_instr = read_u32(f) - f.seek(it) - write_u32(f, next_instr) - - f.seek(blr_pos - 4) - write_u32(f, mtlr) - - # Swap the lwz and mr instruction that are incorrectly scheduled - for lwz_pos, mr_pos in mr_epilogues: - print("Patching old lwz/mr epilogue: %s %s" % (lwz_pos, mr_pos)) - - f.seek(lwz_pos) - lwz = read_u32(f) - f.seek(mr_pos) - mr = read_u32(f) - f.seek(mr_pos) - write_u32(f, lwz) - f.seek(lwz_pos) - write_u32(f, mr) - - return (result, patch_align_ofs) - -def postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup): - patches = impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup) - - f.seek(0) - source_bytes = list(f.read()) - for patch in patches[0]: - assert len(patch.dest) <= patch.src_size - for j in range(patch.src_size): - if j >= len(patch.dest): - c = 0 - else: - c = ord(patch.dest[j]) - source_bytes[patch.position + j] = c - - # Patch ctor align - nP = 0 - for p in patches[1]: - print("Patching ctors") - source_bytes[p + 0] = 0 - source_bytes[p + 1] = 0 - source_bytes[p + 2] = 0 - source_bytes[p + 3] = 4 - nP += 1 - if nP > 1: - print("Patched ctors + dtors") - - f.seek(0) - f.write(bytes(source_bytes)) - -def frontend(args): - inplace = "" - do_ctor_realign = False - do_old_stack = False - do_symbol_fixup = False - - for arg in args: - if arg.startswith('-f'): - negated = False - if arg.startswith('-fno-'): - negated = True - arg = arg[len('-fno-'):] - else: - arg = arg[len('-f'):] - - if arg == 'ctor_realign': - do_ctor_realign = not negated - elif arg == 'symbol-fixup': - do_symbol_fixup = not negated - elif arg.startswith('epilogue-fixup='): - do_old_stack = arg[len('epilogue-fixup='):] == 'old_stack' - else: - print("Unknown argument: %s" % arg) - elif arg.startswith('-'): - print("Unknown argument: %s. Perhaps you meant -f%s?" % (arg, arg)) - else: - if inplace: - print("Cannot process %s. Only one source file may be specified." % arg) - else: - inplace = arg - - if not inplace: - print("A file must be specified!") - return - - try: - postprocess_elf(open(inplace, 'rb+'), do_ctor_realign, do_old_stack, do_symbol_fixup) - except FileNotFoundError: - print("Cannot open file %s" % inplace) - -if __name__ == "__main__": - import sys - - if len(sys.argv) < 2: - print(BANNER) - else: - frontend(sys.argv[1:])