Use 1.2.5n, delete frank & update CI

This commit is contained in:
Luke Street 2023-07-15 10:43:35 -04:00 committed by Hunter Shelton
parent 2d232f7ce0
commit 792a92c3a3
8 changed files with 36 additions and 661 deletions

View File

@ -6,22 +6,22 @@ on:
jobs:
build:
name: Build
runs-on: ubuntu-latest
container: devkitpro/devkitppc:latest
env:
WINEPREFIX: ${{github.workspace}}/.wine
container: ghcr.io/projectpiki/build:main
strategy:
fail-fast: false
matrix:
version: [usa.1]
steps:
- name: Install devkitPro
run: |
sudo dpkg --add-architecture i386
sudo apt-get update
sudo apt-get -y install build-essential wine32
sudo chown $(whoami) "$GITHUB_WORKSPACE"
- uses: actions/checkout@v3
- name: Download compilers
run: |
curl -L https://cdn.discordapp.com/attachments/727918646525165659/917185027656286218/GC_WII_COMPILERS.zip \
| bsdtar -xvf- -C tools --exclude Wii
mv tools/GC tools/mwcc_compiler
- name: make
run: make -j
- name: Checkout
uses: actions/checkout@v3
- name: Git config
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- name: Build
run: make -j$(nproc) VERSION=${{matrix.version}} COMPILERS=/compilers/GC
- name: Upload map
uses: actions/upload-artifact@v3
with:
name: pikmin-${{matrix.version}}.map
path: build/*/build.map

4
.gitignore vendored
View File

@ -20,8 +20,8 @@
*.7z
*.bat
build
epilogue
tools/mwcc_compiler/
tools/mwcc_compiler/*
!tools/mwcc_compiler/.gitkeep
*.sln
*.vcxproj
*.user

View File

@ -19,13 +19,7 @@ NAME := pikmin
VERSION := usa.1
#VERSION := usa.0
# Overkill epilogue fixup strategy. Set to 1 if necessary.
EPILOGUE_PROCESS := 1
BUILD_DIR := build/$(NAME).$(VERSION)
ifeq ($(EPILOGUE_PROCESS),1)
EPILOGUE_DIR := epilogue/$(NAME).$(VERSION)
endif
# Inputs
S_FILES := $(wildcard asm/*.s)
@ -40,50 +34,49 @@ ELF := $(DOL:.dol=.elf)
MAP := $(BUILD_DIR)/build.map
include obj_files.mk
ifeq ($(EPILOGUE_PROCESS),1)
include e_files.mk
endif
O_FILES := $(GROUP_0_FILES) $(SYSBOOTUP) $(JAUDIO) $(HVQM4DEC) $(SYSCOMMON) $(SYSDOLPHIN)\
$(COLIN) $(KANDO) $(NAKATA) $(NISHIMURA) $(OGAWA) $(YAMASHITA)\
$(BASE) $(OS) $(DB) $(MTX) $(DVD) $(VI) $(PAD) $(AI) $(AR) $(DSP)\
$(CARD) $(HIO) $(GX) $(RUNTIME) $(MSL_C) $(TRK_MINNOW_DOLPHIN)\
$(AMCEXI2) $(AMCNOTSTUB) $(ODEMUEXI2) $(ODENOTSTUB)
ifeq ($(EPILOGUE_PROCESS),1)
E_FILES := $(EPILOGUE_UNSCHEDULED)
endif
#-------------------------------------------------------------------------------
# Tools
#-------------------------------------------------------------------------------
MWCC_VERSION := 1.2.5
ifeq ($(EPILOGUE_PROCESS),1)
MWCC_EPI_VERSION := 1.2.5e
MWCC_EPI_EXE := mwcceppc.exe
endif
MWLD_VERSION := 1.1
# Compiler versions and flags
$(COLIN): MWCC_VERSION := 1.2.5n
$(KANDO): MWCC_VERSION := 1.2.5n
$(NAKATA): MWCC_VERSION := 1.2.5n
$(NISHIMURA): MWCC_VERSION := 1.2.5n
$(OGAWA): MWCC_VERSION := 1.2.5n
$(YAMASHITA): MWCC_VERSION := 1.2.5n
# Programs
ifeq ($(WINDOWS),1)
WINE :=
AS := $(DEVKITPPC)/bin/powerpc-eabi-as.exe
PYTHON := python
else
WINE ?= wine
WIBO := $(shell command -v wibo 2> /dev/null)
ifdef WIBO
WINE ?= wibo
else
WINE ?= wine
endif
AS := $(DEVKITPPC)/bin/powerpc-eabi-as
PYTHON := python3
endif
CC = $(WINE) tools/mwcc_compiler/$(MWCC_VERSION)/mwcceppc.exe
ifeq ($(EPILOGUE_PROCESS),1)
CC_EPI = $(WINE) tools/mwcc_compiler/$(MWCC_EPI_VERSION)/$(MWCC_EPI_EXE)
endif
LD := $(WINE) tools/mwcc_compiler/$(MWLD_VERSION)/mwldeppc.exe
COMPILERS ?= tools/mwcc_compiler
CC = $(WINE) $(COMPILERS)/$(MWCC_VERSION)/mwcceppc.exe
LD := $(WINE) $(COMPILERS)/$(MWLD_VERSION)/mwldeppc.exe
DTK := tools/dtk
ELF2DOL := $(DTK) elf2dol
SHASUM := $(DTK) shasum
FRANK := tools/frank.py
# Options
INCLUDES := -i include/
ASM_INCLUDES := -I include/
@ -106,18 +99,10 @@ default: all
all: $(DOL)
ALL_DIRS := $(sort $(dir $(O_FILES)))
ifeq ($(EPILOGUE_PROCESS),1)
EPI_DIRS := $(sort $(dir $(E_FILES)))
endif
# Make sure build directory exists before compiling anything
DUMMY != mkdir -p $(ALL_DIRS)
ifeq ($(EPILOGUE_PROCESS),1)
# Make sure profile directory exists before compiling anything
DUMMY != mkdir -p $(EPI_DIRS)
endif
.PHONY: tools
# DOL creation makefile instructions
@ -129,24 +114,16 @@ $(DOL): $(ELF) | $(DTK)
clean:
rm -f -d -r build
rm -f -d -r epilogue
$(DTK): tools/dtk_version
@echo "Downloading $@"
$(QUIET) $(PYTHON) tools/download_dtk.py $< $@
# ELF creation makefile instructions
ifeq ($(EPILOGUE_PROCESS),1)
@echo Linking ELF $@
$(ELF): $(O_FILES) $(E_FILES) $(LDSCRIPT)
$(QUIET) @echo $(O_FILES) > build/o_files
$(QUIET) $(LD) $(LDFLAGS) -o $@ -lcf $(LDSCRIPT) @build/o_files
else
$(ELF): $(O_FILES) $(LDSCRIPT)
@echo Linking ELF $@
$(QUIET) @echo $(O_FILES) > build/o_files
$(QUIET) $(LD) $(LDFLAGS) -o $@ -lcf $(LDSCRIPT) @build/o_files
endif
$(BUILD_DIR)/%.o: %.s
@echo Assembling $<
@ -164,26 +141,6 @@ $(BUILD_DIR)/%.o: %.cpp
@echo "Compiling " $<
$(QUIET) $(CC) $(CFLAGS) -c -o $@ $<
ifeq ($(EPILOGUE_PROCESS),1)
$(EPILOGUE_DIR)/%.o: %.c $(BUILD_DIR)/%.o
@echo Frank is fixing $<
$(QUIET) $(CC_EPI) $(CFLAGS) -c -o $@ $<
$(QUIET) $(PYTHON) $(FRANK) $(word 2,$^) $@ $(word 2,$^)
$(QUIET) touch $@
$(EPILOGUE_DIR)/%.o: %.cp $(BUILD_DIR)/%.o
@echo Frank is fixing $<
$(QUIET) $(CC_EPI) $(CFLAGS) -c -o $@ $<
$(QUIET) $(PYTHON) $(FRANK) $(word 2,$^) $@ $(word 2,$^)
$(QUIET) touch $@
$(EPILOGUE_DIR)/%.o: %.cpp $(BUILD_DIR)/%.o
@echo Frank is fixing $<
$(QUIET) $(CC_EPI) $(CFLAGS) -c -o $@ $<
$(QUIET) $(PYTHON) $(FRANK) $(word 2,$^) $@ $(word 2,$^)
$(QUIET) touch $@
endif
### Debug Print ###
print-% : ; $(info $* is a $(flavor $*) variable set to [$($*)]) @true

View File

@ -16,11 +16,6 @@ It builds the following DOL:
pikmin.usa.1.dol: `sha1: 02204260B7EFE8742D34572E58BA3DFECD92E4E9`
## Building
This edited compiler modifies the epilogue in such a way as to approximate older scheduling models.
In this case, the epilogue should remain unscheduled.
tools/frank.py cleans up the output.
### Required Tools
* [devkitPro](https://devkitpro.org/wiki/Getting_Started)

View File

@ -1,8 +0,0 @@
# Files with unscheduled epilogues.
EPILOGUE_UNSCHEDULED:=\
$(EPILOGUE_DIR)/src/plugPikiNakata/tekievent.o\
$(EPILOGUE_DIR)/src/plugPikiYamashita/TAIanimation.o\
$(EPILOGUE_DIR)/src/plugPikiYamashita/ptclGenPack.o\
$(EPILOGUE_DIR)/src/plugPikiKando/objectTypes.o\
$(EPILOGUE_DIR)/src/plugPikiKando/globalShapes.o\

View File

@ -1,221 +0,0 @@
#! /usr/bin/env python3
# Written by Ethan Roseman (ethteck)
# MIT License
# Copyright 2021
# Modified by EpochFlame
import argparse
import sys
# Byte sequence that marks code size
CODESIZE_MAGIC = b"\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x34"
BLR_BYTE_SEQ = b"\x4E\x80\x00\x20"
MTLR_BYTE_SEQ = b"\x7C\x08\x03\xA6"
PROFILE_EXTRA_BYTES = b"\x48\x00\x00\x01\x60\x00\x00\x00"
LWZ_BYTE = b"\x80"
# Byte sequence array for branches to link register
BLR_BYTE_SEQ_ARRAY = [BLR_BYTE_SEQ,
b"\x4D\x80\x00\x20", b"\x4D\x80\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21",
b"\x4D\x82\x00\x20", b"\x4D\x82\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21",
b"\x4D\x81\x00\x20", b"\x4D\x81\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21",
b"\x4C\x82\x00\x20", b"\x4C\x82\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21",
b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21",
b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21"]
# Example invocation: ./frank.py vanilla.o profile.o output.o
parser = argparse.ArgumentParser()
parser.add_argument("vanilla", help="Path to the vanilla object", type=argparse.FileType('rb'))
parser.add_argument("profile", help="Path to the profile object", type=argparse.FileType('rb'))
parser.add_argument("target", help="Path to the target object (to write)")
args = parser.parse_args()
# Read contents into bytearrays and close files
vanilla_bytes = args.vanilla.read()
args.vanilla.close()
# If the file contains no code, the codesize magic will not be found.
# The vanilla object requires no modification.
code_size_magic_idx = vanilla_bytes.find(CODESIZE_MAGIC)
if code_size_magic_idx == -1:
with open(args.target, "wb") as f:
f.write(vanilla_bytes)
sys.exit(0)
profile_bytes = args.profile.read()
args.profile.close()
# Peephole rescheduling
#
# This is the pattern we will detect:
# (A) lwz <--. .--> (A) li
# (B) li <---\-' bl
# \ nop
# '---> (B) lwz
#
# If the profiled schedule swaps the
# instructions around the bl/nop, we
# instead use the vanilla schedule.
#
idx = 8
shift = 0 # difference between vanilla and profile code, due to bl/nops
while idx < len(profile_bytes) - 16:
# Find next epilogue
epi_pos = profile_bytes.find(PROFILE_EXTRA_BYTES, idx)
if epi_pos == -1:
break # break while loop when no targets remain
if epi_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
v_pos = epi_pos - shift
shift += 8
vanilla_inst_a = vanilla_bytes[v_pos-4:v_pos]
vanilla_inst_b = vanilla_bytes[v_pos:v_pos+4]
vanilla_inst_c = vanilla_bytes[v_pos+4:v_pos+8]
profile_inst_a = profile_bytes[epi_pos-4:epi_pos]
profile_inst_b = profile_bytes[epi_pos+8:epi_pos+12]
profile_inst_c = profile_bytes[epi_pos+12:epi_pos+16]
# Instruction decoding
as_int = lambda x: int.from_bytes(x, "big")
RT = lambda x: (as_int(x) >> 21) & 0x1F
RA = lambda x: (as_int(x) >> 16) & 0x1F
opcode_a = vanilla_inst_a[0] >> 2
opcode_b = vanilla_inst_b[0] >> 2
opcode_c = vanilla_inst_c[0] >> 2
LWZ = 0x80 >> 2
LFS = 0xC0 >> 2
ADDI = 0x38 >> 2
LI = ADDI # an LI instruction is just an ADDI with RA=0
LMW = 0xB8 >> 2
FDIVS = 0xEC >> 2
if opcode_a == LWZ and \
opcode_b in [LI, LFS, FDIVS] and \
vanilla_inst_a == profile_inst_b and \
vanilla_inst_b == profile_inst_a and \
vanilla_inst_c == profile_inst_c and \
not (opcode_a == LWZ and RA(vanilla_inst_a) == 1 and
opcode_b == ADDI and RA(vanilla_inst_b) != 0) and \
opcode_c != ADDI: # <- don't reorder if at the very end of the epilogue
# Swap instructions (A) and (B)
profile_bytes = profile_bytes[:epi_pos-4] \
+ vanilla_inst_a \
+ PROFILE_EXTRA_BYTES \
+ vanilla_inst_b \
+ profile_bytes[epi_pos+12:]
# Similar reordering for lwz/lmw, except both insns follow the bl/nop
elif opcode_b == LWZ and \
opcode_c == LMW and \
vanilla_inst_b == profile_inst_c and \
vanilla_inst_c == profile_inst_b:
profile_bytes = profile_bytes[:epi_pos+8] \
+ vanilla_inst_b \
+ vanilla_inst_c \
+ profile_bytes[epi_pos+16:]
idx = epi_pos + 8
# Remove byte sequence
stripped_bytes = profile_bytes.replace(PROFILE_EXTRA_BYTES, b"")
# Find end of code sections in vanilla and stripped bytes
code_size_offset = code_size_magic_idx + len(CODESIZE_MAGIC)
code_size_bytes = vanilla_bytes[code_size_offset:code_size_offset+4]
code_size = int.from_bytes(code_size_bytes, byteorder='big')
eoc_offset = 0x34 + code_size
# Break if the eoc is not found
assert(eoc_offset != len(vanilla_bytes))
# Replace 0x34 - eoc in vanilla with bytes from stripped
final_bytes = vanilla_bytes[:0x34] + stripped_bytes[0x34:eoc_offset] + vanilla_bytes[eoc_offset:]
# Fix branches to link register
for seq in BLR_BYTE_SEQ_ARRAY:
idx = 0
while idx < len(vanilla_bytes):
found_pos = vanilla_bytes.find(seq, idx)
if found_pos == -1:
break # break while loop when no targets remain
if found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
final_bytes = final_bytes[:found_pos] + vanilla_bytes[found_pos:found_pos+4] + final_bytes[found_pos+4:]
idx = found_pos + len(seq)
# Reunify mtlr/blr instructions, shifting intermediary instructions up
idx = 0
while idx < len(final_bytes):
# Find mtlr position
mtlr_found_pos = final_bytes.find(MTLR_BYTE_SEQ, idx)
if mtlr_found_pos == -1:
break # break while loop when no targets remain
if mtlr_found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
# Find paired blr position
blr_found_pos = final_bytes.find(BLR_BYTE_SEQ, mtlr_found_pos)
if blr_found_pos == -1:
break # break while loop when no targets remain
if blr_found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
if mtlr_found_pos + 4 == blr_found_pos:
idx += 4
continue # continue if mtlr is followed directly by blr
final_bytes = final_bytes[:mtlr_found_pos] + final_bytes[mtlr_found_pos+4:blr_found_pos] + final_bytes[mtlr_found_pos:mtlr_found_pos+4] + final_bytes[blr_found_pos:]
idx = mtlr_found_pos + len(MTLR_BYTE_SEQ)
# Reorder lmw/lwz/lfd instructions, if needed (@Altafen)
# Specifically, if this sequence shows up in the stripped profiler code: "LMW, LWZ, LFD*"
# And this sequence shows up in the vanilla code: "LWZ, LFD*, LMW"
# (LFD* = any number of LFDs, including zero)
# If all bytes match between the two (except for the reordering), then use the vanilla ordering.
# This could be written to anchor around the "BL, NOP" instructions in unstripped profiler code,
# or to check for the presence of "ADDI, MTLR, BLR" soon after.
# This also could be written to decode the operands of each instruction to make sure the reorder is harmless.
# Neither of these safeguards are necessary at the moment.
LWZ = 32
LMW = 46
LFD = 50
idx = 0
while idx+4 < len(final_bytes):
if final_bytes[idx] >> 2 == LMW and final_bytes[idx+4] >> 2 == LWZ and vanilla_bytes[idx] >> 2 == LWZ:
start_idx = idx
lmw_bytes = final_bytes[idx:idx+4]
lwz_bytes = final_bytes[idx+4:idx+8]
if vanilla_bytes[idx:idx+4] != lwz_bytes:
idx += 4
continue
lfd_bytes = b""
idx += 4
while vanilla_bytes[idx] >> 2 == LFD:
lfd_bytes += vanilla_bytes[idx:idx+4]
idx += 4
if vanilla_bytes[idx:idx+4] != lmw_bytes:
continue
if final_bytes[start_idx+8:start_idx+8+len(lfd_bytes)] != lfd_bytes:
continue
idx += 4
final_bytes = final_bytes[:start_idx] + lwz_bytes + lfd_bytes + lmw_bytes + final_bytes[idx:]
continue
idx += 4
with open(args.target, "wb") as f:
f.write(final_bytes)

View File

View File

@ -1,348 +0,0 @@
#!/usr/bin/env python3
BANNER = """
# This script is the culmination of three patches supporting decompilation
# with the CodeWarrior compiler.
# - riidefi, 2020
#
# postprocess.py [args] file
#
# 1) Certain versions have a bug where the ctor alignment is ignored and set incorrectly.
# This option is enabled with -fctor-realign, and disabled by default with -fno-ctor-realign
#
# 2) Certain C++ symbols cannot be assembled normally.
# To support the buildsystem, a simple substitution system has been devised
#
# ?<ID> -> CHAR
#
# IDs (all irregular symbols in mangled names):
# 0: <
# 1: >
# 2: @
# 3: \\
# 4: ,
# 5: -
#
# This option is enabled with -fsymbol-fixup, and disabled by default with -fno-symbol-fixup
#
# 3) CodeWarrior versions below 2.3 used a different scheduler model.
# The script can currently adjust function epilogues with the old_stack option.
# -fepilogue-fixup=[default=none, none, old_stack]
"""
import struct
# Substitutions
substitutions = (
('<', '?0'),
('>', '?1'),
('@', '?2'),
('\\', '?3'),
(',', '?4'),
('-', '?5')
)
def format(symbol):
for sub in substitutions:
symbol = symbol.replace(sub[0], sub[1])
return symbol
def decodeformat(symbol):
for sub in substitutions:
symbol = symbol.replace(sub[1], sub[0])
return symbol
# Stream utilities
def read_u8(f):
return struct.unpack("B", f.read(1))[0]
def read_u32(f):
return struct.unpack(">I", f.read(4))[0]
def read_u16(f):
return struct.unpack(">H", f.read(2))[0]
def write_u32(f, val):
f.write(struct.pack(">I", val))
class ToReplace:
def __init__(self, position, dest, src_size):
self.position = position # Where in file
self.dest = dest # String to patch
self.src_size = src_size # Pad rest with zeroes
# print("To replace: %s %s %s" % (self.position, self.dest, self.src_size))
def read_string(f):
tmp = ""
c = 0xff
while c != 0x00:
c = read_u8(f)
if c != 0:
tmp += chr(c)
return tmp
def ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg):
patch_align_ofs = []
for i in range(nSecHeader):
f.seek(ofsSecHeader + i * 0x28)
ofsname = read_u32(f)
if not ofsname: continue
back = f.tell()
f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10)
ofsShST = read_u32(f)
f.seek(ofsShST + ofsname)
name = read_string(f)
if name == ".ctors" or name == ".dtors":
patch_align_ofs.append(ofsSecHeader + i * 0x28 + 0x20)
f.seek(back)
return patch_align_ofs
SHT_PROGBITS = 1
SHT_STRTAB = 3
def impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup):
result = []
f.seek(0x20)
ofsSecHeader = read_u32(f)
f.seek(0x30)
nSecHeader = read_u16(f)
idxSegNameSeg = read_u16(f)
secF = False # First instance the section names
# Header: 0x32:
patch_align_ofs = []
if do_ctor_realign:
patch_align_ofs = ctor_realign(f, ofsSecHeader, nSecHeader, idxSegNameSeg)
for i in range(nSecHeader):
f.seek(ofsSecHeader + i * 0x28)
sh_name = read_u32(f)
sh_type = read_u32(f)
if sh_type == SHT_STRTAB and do_symbol_fixup:
if not secF:
secF = True
continue
f.seek(ofsSecHeader + i * 0x28 + 0x10)
ofs = read_u32(f)
size = read_u32(f)
f.seek(ofs)
string = ""
str_spos = ofs
for i in range(ofs, ofs+size):
c = read_u8(f)
if c == 0:
if len(string):
fixed = decodeformat(string)
if fixed != string:
result.append(ToReplace(str_spos, fixed, len(string)))
string = ""
str_spos = i+1
else:
string += chr(c)
else:
f.seek(ofsSecHeader + (idxSegNameSeg * 0x28) + 0x10)
ofsShST = read_u32(f)
f.seek(ofsShST + sh_name)
name = read_string(f)
if name == ".text" and do_old_stack:
f.seek(ofsSecHeader + i * 0x28 + 0x10)
ofs = read_u32(f)
size = read_u32(f)
# We assume
# 1) Only instructions are in the .text section
# 2) These instructions are 4-byte aligned
assert ofs != 0
assert ofs % 4 == 0
assert size % 4 == 0
f.seek(ofs)
mtlr_pos = 0
addi_pos = 0
lwz_pos = 0
mr_pos = 0
# (mtlr position, blr position)
epilogues = []
# (lwz position, mr position)
mr_epilogues = []
for _ in range(ofs, ofs+size, 4):
it = f.tell()
instr = read_u32(f)
# Skip padding
if instr == 0: continue
# Call analysis is not actually required
# No mtlr will exist without a blr; mtctr/bctr* is used for dynamic dispatch
# FUN_A:
# li r3, 0
# blr <---- No mtlr, move onto the next function
# FUN_B:
# ; complex function, stack manip
# mtlr r0 <---- Expect a blr
# addi r1, r1, 24
# blr <---- Confirm patch above
# mtlr alias for mtspr
if instr == 0x7C0803A6:
assert mtlr_pos == 0
mtlr_pos = it
elif instr == 0x38210018: # addi r1, r1, 0x18
if mtlr_pos == 0:
addi_pos = it
elif instr & 0xFFFFFF00 == 0x80010000 and instr & 0xFF > 0: # lwz r0, N(r1)
assert lwz_pos == 0
lwz_pos = it
elif (instr == 0x7F83E378 or instr == 0x7FE3FB78 or instr == 0x7FC3F378 or instr == 0x7FA3EB78) and it == lwz_pos + 4: # mr r3, r28 || mr r3, r29 || mr r3, r30 || mr r3, r31
mr_pos = it
# blr
elif instr == 0x4E800020:
if mtlr_pos:
epilogues.append((mtlr_pos, it))
if addi_pos and mtlr_pos == 0:
epilogues.append((addi_pos, it))
if lwz_pos and mr_pos:
mr_epilogues.append((lwz_pos, mr_pos))
mtlr_pos = 0
addi_pos = 0
lwz_pos = 0
mr_pos = 0
# Reunify mtlr/blr instructions, shifting intermediary instructions up
for mtlr_pos, blr_pos in epilogues:
# Check if we need to do anything
if mtlr_pos + 4 == blr_pos: continue
# As the processor can only hold 6 instructions at once in the pipeline,
# it's unlikely for the mtlr be shifted up more instructions than that--usually,
# only one:
# mtlr r0
# addi r1, r1, 24
# blr
assert blr_pos - 4 > mtlr_pos
assert blr_pos - mtlr_pos <= 7 * 4
print("Patching old epilogue: %s %s" % (mtlr_pos, blr_pos))
f.seek(mtlr_pos)
mtlr = read_u32(f)
for it in range(mtlr_pos, blr_pos - 4, 4):
f.seek(it + 4)
next_instr = read_u32(f)
f.seek(it)
write_u32(f, next_instr)
f.seek(blr_pos - 4)
write_u32(f, mtlr)
# Swap the lwz and mr instruction that are incorrectly scheduled
for lwz_pos, mr_pos in mr_epilogues:
print("Patching old lwz/mr epilogue: %s %s" % (lwz_pos, mr_pos))
f.seek(lwz_pos)
lwz = read_u32(f)
f.seek(mr_pos)
mr = read_u32(f)
f.seek(mr_pos)
write_u32(f, lwz)
f.seek(lwz_pos)
write_u32(f, mr)
return (result, patch_align_ofs)
def postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup):
patches = impl_postprocess_elf(f, do_ctor_realign, do_old_stack, do_symbol_fixup)
f.seek(0)
source_bytes = list(f.read())
for patch in patches[0]:
assert len(patch.dest) <= patch.src_size
for j in range(patch.src_size):
if j >= len(patch.dest):
c = 0
else:
c = ord(patch.dest[j])
source_bytes[patch.position + j] = c
# Patch ctor align
nP = 0
for p in patches[1]:
print("Patching ctors")
source_bytes[p + 0] = 0
source_bytes[p + 1] = 0
source_bytes[p + 2] = 0
source_bytes[p + 3] = 4
nP += 1
if nP > 1:
print("Patched ctors + dtors")
f.seek(0)
f.write(bytes(source_bytes))
def frontend(args):
inplace = ""
do_ctor_realign = False
do_old_stack = False
do_symbol_fixup = False
for arg in args:
if arg.startswith('-f'):
negated = False
if arg.startswith('-fno-'):
negated = True
arg = arg[len('-fno-'):]
else:
arg = arg[len('-f'):]
if arg == 'ctor_realign':
do_ctor_realign = not negated
elif arg == 'symbol-fixup':
do_symbol_fixup = not negated
elif arg.startswith('epilogue-fixup='):
do_old_stack = arg[len('epilogue-fixup='):] == 'old_stack'
else:
print("Unknown argument: %s" % arg)
elif arg.startswith('-'):
print("Unknown argument: %s. Perhaps you meant -f%s?" % (arg, arg))
else:
if inplace:
print("Cannot process %s. Only one source file may be specified." % arg)
else:
inplace = arg
if not inplace:
print("A file must be specified!")
return
try:
postprocess_elf(open(inplace, 'rb+'), do_ctor_realign, do_old_stack, do_symbol_fixup)
except FileNotFoundError:
print("Cannot open file %s" % inplace)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print(BANNER)
else:
frontend(sys.argv[1:])