mirror of
https://github.com/zestydevy/dinosaur-planet.git
synced 2024-11-23 13:29:46 +00:00
Update progress.py (#88)
* Speed up DLL function analysis by ~30% * Fix Dockerfile when building with existing user * Update progress.py to count DLL functions and emit data for progress site * Add shield in README for progress * Fix progress.py executable bit * Add new shields to README
This commit is contained in:
parent
c3c9dceaf3
commit
2388f299bb
@ -16,10 +16,10 @@ RUN pip3 install -r requirements.txt
|
|||||||
# Symlink dino.py
|
# Symlink dino.py
|
||||||
RUN ln -s /dino/dino.py /usr/local/bin/dino
|
RUN ln -s /dino/dino.py /usr/local/bin/dino
|
||||||
|
|
||||||
# Set up user
|
# Set up user (if they don't exist)
|
||||||
ARG login=sabre
|
ARG login=sabre
|
||||||
ARG uid=1000
|
ARG uid=1000
|
||||||
RUN adduser --system --uid $uid --group $login
|
RUN id -u $uid &>/dev/null || adduser --system --uid $uid --group $login
|
||||||
|
|
||||||
# Set entrypoint
|
# Set entrypoint
|
||||||
RUN echo "#!/bin/bash\nexec \"\$@\"" > /entrypoint.sh && chmod +x /entrypoint.sh
|
RUN echo "#!/bin/bash\nexec \"\$@\"" > /entrypoint.sh && chmod +x /entrypoint.sh
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
![Dinosaur Planet Decompilation](docs/banner.png)
|
![Dinosaur Planet Decompilation](docs/banner.png)
|
||||||
|
|
||||||
[![](https://img.shields.io/badge/Discord-Dinosaur%20Planet%20Community-5865F2?logo=discord)](https://discord.gg/H6WGkznZBc)
|
[![](https://img.shields.io/badge/Discord-Dinosaur%20Planet%20Community-5865F2?logo=discord)](https://discord.gg/H6WGkznZBc)
|
||||||
|
![](https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2FFrancessco121%2Fdino-status%2Fgh-pages%2Ftotal.shield.json)
|
||||||
|
![](https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2FFrancessco121%2Fdino-status%2Fgh-pages%2Fcore.shield.json)
|
||||||
|
![](https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2FFrancessco121%2Fdino-status%2Fgh-pages%2Fdlls.shield.json)
|
||||||
|
|
||||||
A WIP decompilation of Dinosaur Planet for the Nintendo 64, as released by Forest of Illusion on Feb. 20, 2021.
|
A WIP decompilation of Dinosaur Planet for the Nintendo 64, as released by Forest of Illusion on Feb. 20, 2021.
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
ansiwrap
|
ansiwrap
|
||||||
capstone
|
capstone
|
||||||
colorama>=0.4.4,<0.5
|
colorama>=0.4.4,<0.5
|
||||||
colour
|
|
||||||
cxxfilt
|
cxxfilt
|
||||||
GitPython
|
GitPython
|
||||||
ninja
|
ninja
|
||||||
|
@ -185,6 +185,8 @@ class DLLRelocationTable:
|
|||||||
return DLLRelocationTable(True, global_offset_table, gp_relocations, data_relocations)
|
return DLLRelocationTable(True, global_offset_table, gp_relocations, data_relocations)
|
||||||
|
|
||||||
class DLLInst:
|
class DLLInst:
|
||||||
|
label: "str | None" = None
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
original: CsInsn,
|
original: CsInsn,
|
||||||
address: int,
|
address: int,
|
||||||
@ -192,7 +194,6 @@ class DLLInst:
|
|||||||
op_str: str,
|
op_str: str,
|
||||||
is_branch_delay_slot: bool,
|
is_branch_delay_slot: bool,
|
||||||
has_relocation: bool,
|
has_relocation: bool,
|
||||||
label: "str | None",
|
|
||||||
ref: "str | None") -> None:
|
ref: "str | None") -> None:
|
||||||
self.original = original
|
self.original = original
|
||||||
self.address = address
|
self.address = address
|
||||||
@ -200,7 +201,6 @@ class DLLInst:
|
|||||||
self.op_str = op_str
|
self.op_str = op_str
|
||||||
self.is_branch_delay_slot = is_branch_delay_slot
|
self.is_branch_delay_slot = is_branch_delay_slot
|
||||||
self.has_relocation = has_relocation
|
self.has_relocation = has_relocation
|
||||||
self.label = label
|
|
||||||
self.ref = ref
|
self.ref = ref
|
||||||
|
|
||||||
def is_op_modified(self):
|
def is_op_modified(self):
|
||||||
@ -232,12 +232,6 @@ class DLLFunction:
|
|||||||
self.relocations = relocations
|
self.relocations = relocations
|
||||||
"""All instruction relocations in the function, sorted by their position in the original DLL's GOT."""
|
"""All instruction relocations in the function, sorted by their position in the original DLL's GOT."""
|
||||||
|
|
||||||
def __mnemonic_has_delay_slot(mnemonic: str) -> bool:
|
|
||||||
return (mnemonic.startswith("b") or mnemonic.startswith("j")) and mnemonic != "break"
|
|
||||||
|
|
||||||
def __mnemonic_is_branch(mnemonic: str) -> bool:
|
|
||||||
return (mnemonic.startswith("b") or mnemonic == "j") and mnemonic != "break"
|
|
||||||
|
|
||||||
def parse_functions(data: bytearray,
|
def parse_functions(data: bytearray,
|
||||||
dll: DLL,
|
dll: DLL,
|
||||||
reloc_table: DLLRelocationTable,
|
reloc_table: DLLRelocationTable,
|
||||||
@ -256,15 +250,6 @@ def parse_functions(data: bytearray,
|
|||||||
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
|
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
|
||||||
insts = [i for i in md.disasm(data[header.size:text_end], 0x0)]
|
insts = [i for i in md.disasm(data[header.size:text_end], 0x0)]
|
||||||
|
|
||||||
# Extract all branches
|
|
||||||
branches: "list[tuple[int, int]]" = []
|
|
||||||
branch_dests: "set[int]" = set()
|
|
||||||
for i in insts:
|
|
||||||
if __mnemonic_is_branch(i.mnemonic):
|
|
||||||
branch_target = int(i.op_str.split(" ")[-1], 0)
|
|
||||||
branches.append((i.address, branch_target))
|
|
||||||
branch_dests.add(branch_target)
|
|
||||||
|
|
||||||
# Extract functions
|
# Extract functions
|
||||||
funcs: "list[DLLFunction]" = []
|
funcs: "list[DLLFunction]" = []
|
||||||
cur_func_insts: "list[DLLInst]" = []
|
cur_func_insts: "list[DLLInst]" = []
|
||||||
@ -275,14 +260,38 @@ def parse_functions(data: bytearray,
|
|||||||
cur_func_auto_syms: "OrderedDict[str, int]" = OrderedDict()
|
cur_func_auto_syms: "OrderedDict[str, int]" = OrderedDict()
|
||||||
cur_func_relocs: "list[DLLRelocation]" = []
|
cur_func_relocs: "list[DLLRelocation]" = []
|
||||||
cur_func_inst_index = 0
|
cur_func_inst_index = 0
|
||||||
for idx, i in enumerate(insts):
|
cur_func_branch_dests: "list[int]" = []
|
||||||
# Check if this instruction is a branch delay slot of the previous instruction
|
cur_func_forward_branches: "set[int]" = set()
|
||||||
is_delay_slot = last_mnemonic is not None and __mnemonic_has_delay_slot(last_mnemonic)
|
|
||||||
|
|
||||||
if new_func and i.mnemonic != "nop" and not is_delay_slot:
|
def add_function():
|
||||||
# Add previous function
|
if cur_func_name == "":
|
||||||
if cur_func_name != "":
|
return
|
||||||
|
|
||||||
|
# Discard trailing nops
|
||||||
|
for idx in range(len(cur_func_insts) - 1, 0, -1):
|
||||||
|
i = cur_func_insts[idx]
|
||||||
|
if i.mnemonic == "nop" and not i.is_branch_delay_slot:
|
||||||
|
cur_func_insts.pop(idx)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Ensure function ends with jr $ra
|
||||||
|
# Otherwise, it's not a function
|
||||||
|
if len(cur_func_insts) >= 2:
|
||||||
|
jr = cur_func_insts[-2] # -2 to account for the delay slot after jr
|
||||||
|
if jr.mnemonic != "jr" or jr.op_str != "$ra":
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sort relocations by GOT index
|
||||||
cur_func_relocs.sort(key=lambda r: r.got_index)
|
cur_func_relocs.sort(key=lambda r: r.got_index)
|
||||||
|
|
||||||
|
# Add branch labels
|
||||||
|
for addr in cur_func_branch_dests:
|
||||||
|
idx = (addr - cur_func_addr) // 4
|
||||||
|
if idx >= 0 and idx < len(cur_func_insts):
|
||||||
|
cur_func_insts[idx].label = ".L{:X}".format(addr)
|
||||||
|
|
||||||
|
# Add function
|
||||||
funcs.append(DLLFunction(
|
funcs.append(DLLFunction(
|
||||||
insts=cur_func_insts,
|
insts=cur_func_insts,
|
||||||
address=cur_func_addr,
|
address=cur_func_addr,
|
||||||
@ -292,6 +301,14 @@ def parse_functions(data: bytearray,
|
|||||||
relocations=cur_func_relocs
|
relocations=cur_func_relocs
|
||||||
))
|
))
|
||||||
|
|
||||||
|
for idx, i in enumerate(insts):
|
||||||
|
# Check if this instruction is a branch delay slot of the previous instruction
|
||||||
|
is_delay_slot = last_mnemonic is not None and __mnemonic_has_delay_slot(last_mnemonic)
|
||||||
|
|
||||||
|
if new_func and i.mnemonic != "nop" and not is_delay_slot:
|
||||||
|
# Add previous function
|
||||||
|
add_function()
|
||||||
|
|
||||||
# New function, determine name and type
|
# New function, determine name and type
|
||||||
if i.address == header.ctor_offset:
|
if i.address == header.ctor_offset:
|
||||||
cur_func_name = known_symbols.get(i.address, "dll_{}_ctor".format(dll.number))
|
cur_func_name = known_symbols.get(i.address, "dll_{}_ctor".format(dll.number))
|
||||||
@ -306,8 +323,10 @@ def parse_functions(data: bytearray,
|
|||||||
cur_func_has_gp_init = False
|
cur_func_has_gp_init = False
|
||||||
cur_func_auto_syms = OrderedDict()
|
cur_func_auto_syms = OrderedDict()
|
||||||
cur_func_relocs = []
|
cur_func_relocs = []
|
||||||
new_func = False
|
cur_func_branch_dests = []
|
||||||
|
cur_func_forward_branches = set()
|
||||||
cur_func_inst_index = 0
|
cur_func_inst_index = 0
|
||||||
|
new_func = False
|
||||||
|
|
||||||
# Pre-process instruction
|
# Pre-process instruction
|
||||||
mnemonic = i.mnemonic
|
mnemonic = i.mnemonic
|
||||||
@ -322,6 +341,12 @@ def parse_functions(data: bytearray,
|
|||||||
branch_target = int(operands[-1], 0)
|
branch_target = int(operands[-1], 0)
|
||||||
op_label = ".L{:X}".format(branch_target)
|
op_label = ".L{:X}".format(branch_target)
|
||||||
op_str = ", ".join(operands[:-1] + [op_label])
|
op_str = ", ".join(operands[:-1] + [op_label])
|
||||||
|
# Save target
|
||||||
|
cur_func_branch_dests.append(branch_target)
|
||||||
|
# If the branch target is ahead of this instruction, save it to assist in
|
||||||
|
# detecting the function end
|
||||||
|
if branch_target > i.address:
|
||||||
|
cur_func_forward_branches.add(branch_target)
|
||||||
elif cur_func_inst_index < 2 and num_operands > 0 and operands[0] == "$gp":
|
elif cur_func_inst_index < 2 and num_operands > 0 and operands[0] == "$gp":
|
||||||
# Add _gp_disp to $gp initializer stub
|
# Add _gp_disp to $gp initializer stub
|
||||||
# Note: The $gp initializer stub gets modified when compiled,
|
# Note: The $gp initializer stub gets modified when compiled,
|
||||||
@ -349,7 +374,7 @@ def parse_functions(data: bytearray,
|
|||||||
got_index = offset // 4
|
got_index = offset // 4
|
||||||
symbol_addr = reloc_table.global_offset_table[got_index]
|
symbol_addr = reloc_table.global_offset_table[got_index]
|
||||||
# Determine if this is a CALL16 or GOT16 relocation
|
# Determine if this is a CALL16 or GOT16 relocation
|
||||||
is_call16 = is_reloc_call16(idx, insts)
|
is_call16 = __is_reloc_call16(idx, insts)
|
||||||
# Make symbol
|
# Make symbol
|
||||||
if got_index == 0:
|
if got_index == 0:
|
||||||
symbol = ".text"
|
symbol = ".text"
|
||||||
@ -397,15 +422,6 @@ def parse_functions(data: bytearray,
|
|||||||
mnemonic = "addu"
|
mnemonic = "addu"
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"INVALID INSTRUCTION {i} {opcode}")
|
raise NotImplementedError(f"INVALID INSTRUCTION {i} {opcode}")
|
||||||
elif mnemonic in ["mtc0", "mfc0", "mtc2", "mfc2"]:
|
|
||||||
# TODO: what is this doing?
|
|
||||||
rd = (i.bytes[2] & 0xF8) >> 3
|
|
||||||
op_str = op_str.split(" ")[0] + " $" + str(rd)
|
|
||||||
|
|
||||||
# Determine whether this instruction address is branched to
|
|
||||||
label: "str | None" = None
|
|
||||||
if i.address in branch_dests:
|
|
||||||
label = ".L{:X}".format(i.address)
|
|
||||||
|
|
||||||
# Add instruction
|
# Add instruction
|
||||||
cur_func_insts.append(DLLInst(
|
cur_func_insts.append(DLLInst(
|
||||||
@ -414,40 +430,35 @@ def parse_functions(data: bytearray,
|
|||||||
mnemonic=mnemonic,
|
mnemonic=mnemonic,
|
||||||
op_str=op_str,
|
op_str=op_str,
|
||||||
is_branch_delay_slot=is_delay_slot,
|
is_branch_delay_slot=is_delay_slot,
|
||||||
label=label,
|
|
||||||
ref=ref,
|
ref=ref,
|
||||||
has_relocation=has_relocation
|
has_relocation=has_relocation
|
||||||
))
|
))
|
||||||
|
|
||||||
|
# If we reached a branch target, pop it
|
||||||
|
if i.address in cur_func_forward_branches:
|
||||||
|
cur_func_forward_branches.remove(i.address)
|
||||||
|
|
||||||
# Check for function end
|
# Check for function end
|
||||||
# TODO: this is very slow for large functions
|
if mnemonic == "jr" and i.op_str == "$ra" and len(cur_func_forward_branches) == 0:
|
||||||
if mnemonic == "jr" and i.op_str == "$ra":
|
# Reached a jr $ra and we're not inside of a branch, must be the function end
|
||||||
new_func = True
|
new_func = True
|
||||||
for branch in branches:
|
|
||||||
if (branch[0] > i.address and branch[1] <= i.address) or (branch[0] <= i.address and branch[1] > i.address):
|
|
||||||
# jr falls within a known branch, so there's more to this function
|
|
||||||
new_func = False
|
|
||||||
break
|
|
||||||
|
|
||||||
# Track last instruction
|
# Track last instruction
|
||||||
last_mnemonic = mnemonic
|
last_mnemonic = mnemonic
|
||||||
cur_func_inst_index += 1
|
cur_func_inst_index += 1
|
||||||
|
|
||||||
# Add final function
|
# Add final function
|
||||||
if cur_func_name != "":
|
add_function()
|
||||||
cur_func_relocs.sort(key=lambda r: r.got_index)
|
|
||||||
funcs.append(DLLFunction(
|
|
||||||
insts=cur_func_insts,
|
|
||||||
address=cur_func_addr,
|
|
||||||
symbol=cur_func_name,
|
|
||||||
is_static=cur_func_is_static,
|
|
||||||
auto_symbols=cur_func_auto_syms,
|
|
||||||
relocations=cur_func_relocs
|
|
||||||
))
|
|
||||||
|
|
||||||
return funcs
|
return funcs
|
||||||
|
|
||||||
def is_reloc_call16(idx: int, insts: "list[CsInsn]") -> bool:
|
def __mnemonic_has_delay_slot(mnemonic: str) -> bool:
|
||||||
|
return (mnemonic.startswith("b") or mnemonic.startswith("j")) and mnemonic != "break"
|
||||||
|
|
||||||
|
def __mnemonic_is_branch(mnemonic: str) -> bool:
|
||||||
|
return (mnemonic.startswith("b") or mnemonic == "j") and mnemonic != "break"
|
||||||
|
|
||||||
|
def __is_reloc_call16(idx: int, insts: "list[CsInsn]") -> bool:
|
||||||
# GOT value must be stored in $t9
|
# GOT value must be stored in $t9
|
||||||
if not insts[idx].op_str.startswith("$t9"):
|
if not insts[idx].op_str.startswith("$t9"):
|
||||||
return False
|
return False
|
||||||
|
344
tools/progress.py
Normal file → Executable file
344
tools/progress.py
Normal file → Executable file
@ -1,119 +1,305 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# Computes and displays progress for the decompilation project
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import git
|
from git.repo import Repo
|
||||||
|
from glob import glob
|
||||||
|
from io import TextIOWrapper
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from colour import Color
|
|
||||||
|
|
||||||
def set_version(version):
|
from dino.dll import DLL
|
||||||
global script_dir, root_dir, asm_dir, build_dir, elf_path
|
|
||||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
root_dir = os.path.join(script_dir, "..")
|
|
||||||
asm_dir = os.path.join(root_dir, "asm", "nonmatchings")
|
|
||||||
build_dir = os.path.join(root_dir, "build")
|
|
||||||
elf_path = os.path.join(build_dir, "dino.elf")
|
|
||||||
|
|
||||||
def get_func_sizes():
|
SCRIPT_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
ROOT_DIR = Path(os.path.abspath(os.path.join(SCRIPT_DIR, "..")))
|
||||||
|
|
||||||
|
ASM_PATH = ROOT_DIR.joinpath("asm")
|
||||||
|
BIN_PATH = ROOT_DIR.joinpath("bin")
|
||||||
|
BUILD_PATH = ROOT_DIR.joinpath("build")
|
||||||
|
SRC_PATH = ROOT_DIR.joinpath("src")
|
||||||
|
|
||||||
|
symbol_pattern = re.compile(r"(\S+)\s*=\s*(\S+);")
|
||||||
|
|
||||||
|
class DLLProgress:
|
||||||
|
def __init__(self,
|
||||||
|
number: str,
|
||||||
|
total_bytes: int,
|
||||||
|
total_funcs: int,
|
||||||
|
matching_bytes: int,
|
||||||
|
matching_funcs: int) -> None:
|
||||||
|
self.number = number
|
||||||
|
self.total_bytes = total_bytes
|
||||||
|
self.total_funcs = total_funcs
|
||||||
|
self.matching_bytes = matching_bytes
|
||||||
|
self.matching_funcs = matching_funcs
|
||||||
|
|
||||||
|
class CoreProgress:
|
||||||
|
def __init__(self,
|
||||||
|
total_bytes: int,
|
||||||
|
total_funcs: int,
|
||||||
|
matching_bytes: int,
|
||||||
|
matching_funcs: int) -> None:
|
||||||
|
self.total_bytes = total_bytes
|
||||||
|
self.total_funcs = total_funcs
|
||||||
|
self.matching_bytes = matching_bytes
|
||||||
|
self.matching_funcs = matching_funcs
|
||||||
|
|
||||||
|
class OverallProgress:
|
||||||
|
def __init__(self,
|
||||||
|
core: CoreProgress,
|
||||||
|
dlls: "list[DLLProgress]") -> None:
|
||||||
|
self.core = core
|
||||||
|
self.dlls = dlls
|
||||||
|
|
||||||
|
# Compute total DLL progress
|
||||||
|
self.dll_total_bytes = 0
|
||||||
|
self.dll_total_funcs = 0
|
||||||
|
self.dll_matching_bytes = 0
|
||||||
|
self.dll_matching_funcs = 0
|
||||||
|
|
||||||
|
for progress in dlls:
|
||||||
|
self.dll_total_bytes += progress.total_bytes
|
||||||
|
self.dll_total_funcs += progress.total_funcs
|
||||||
|
self.dll_matching_bytes += progress.matching_bytes
|
||||||
|
self.dll_matching_funcs += progress.matching_funcs
|
||||||
|
|
||||||
|
# Compute overall progress
|
||||||
|
self.total_bytes = core.total_bytes + self.dll_total_bytes
|
||||||
|
self.total_funcs = core.total_funcs + self.dll_total_funcs
|
||||||
|
self.matching_bytes = core.matching_bytes + self.dll_matching_bytes
|
||||||
|
self.matching_funcs = core.matching_funcs + self.dll_matching_funcs
|
||||||
|
|
||||||
|
# Compute ratios
|
||||||
|
self.core_matching_funcs_ratio = core.matching_funcs / core.total_funcs
|
||||||
|
self.core_matching_bytes_ratio = core.matching_bytes / core.total_bytes
|
||||||
|
|
||||||
|
self.dll_matching_funcs_ratio = self.dll_matching_funcs / self.dll_total_funcs
|
||||||
|
self.dll_matching_bytes_ratio = self.dll_matching_bytes / self.dll_total_bytes
|
||||||
|
|
||||||
|
self.matching_funcs_ratio = self.matching_funcs / self.total_funcs
|
||||||
|
self.matching_bytes_ratio = self.matching_bytes / self.total_bytes
|
||||||
|
|
||||||
|
def get_core_func_sizes(elf_path: Path) -> "tuple[dict[str, int], int]":
|
||||||
|
# Get functions and their sizes from the given .elf
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(['objdump', '-x', elf_path], stdout=subprocess.PIPE)
|
result = subprocess.run(['mips-linux-gnu-readelf', '--symbols', elf_path], stdout=subprocess.PIPE)
|
||||||
nm_lines = result.stdout.decode().split("\n")
|
lines = result.stdout.decode().split("\n")
|
||||||
except:
|
except:
|
||||||
print(f"Error: Could not run objdump on {elf_path} - make sure that the project is built")
|
print(f"Error: Could not run mips-linux-gnu-readelf on {elf_path} - make sure that the project is built")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
sizes = {}
|
sizes = {}
|
||||||
total = 0
|
total = 0
|
||||||
|
|
||||||
for line in nm_lines:
|
for line in [l for l in lines if "FUNC" in l]:
|
||||||
if " F " in line:
|
|
||||||
components = line.split()
|
components = line.split()
|
||||||
size = int(components[4], 16)
|
size = int(components[2])
|
||||||
name = components[5]
|
name = components[7]
|
||||||
|
# Include asm functions (which have a size of 0),
|
||||||
|
# but exclude branch labels (which also count as funcs and have a size of 0)
|
||||||
|
if size > 0 or not name.startswith("L8"):
|
||||||
total += size
|
total += size
|
||||||
sizes[name] = size
|
sizes[name] = size
|
||||||
|
|
||||||
return sizes, total
|
return sizes, total
|
||||||
|
|
||||||
def get_nonmatching_funcs():
|
def get_core_nonmatching_funcs() -> "set[str]":
|
||||||
|
nonmatching_path = ASM_PATH.joinpath("nonmatchings")
|
||||||
funcs = set()
|
funcs = set()
|
||||||
|
|
||||||
for root, dirs, files in os.walk(asm_dir):
|
for asm_path in nonmatching_path.rglob("*.s"):
|
||||||
for f in files:
|
# Skip DLL nonmatchings
|
||||||
if f.endswith(".s"):
|
if asm_path.relative_to(nonmatching_path).parts[0] == "dlls":
|
||||||
funcs.add(f[:-2])
|
continue
|
||||||
|
|
||||||
|
# Add
|
||||||
|
funcs.add(asm_path.stem)
|
||||||
|
|
||||||
return funcs
|
return funcs
|
||||||
|
|
||||||
def get_funcs_sizes(sizes, matchings, nonmatchings):
|
def get_core_progress() -> CoreProgress:
|
||||||
msize = 0
|
# Get all core functions and their sizes from the final .elf
|
||||||
nmsize = 0
|
dino_elf_path = BUILD_PATH.joinpath("dino.elf")
|
||||||
|
func_sizes, total_bytes = get_core_func_sizes(dino_elf_path)
|
||||||
for func in matchings:
|
|
||||||
msize += sizes[func]
|
|
||||||
|
|
||||||
for func in nonmatchings:
|
|
||||||
if func not in sizes:
|
|
||||||
pass
|
|
||||||
# print(func)
|
|
||||||
else:
|
|
||||||
nmsize += sizes[func]
|
|
||||||
|
|
||||||
return msize, nmsize
|
|
||||||
|
|
||||||
def lerp(a, b, alpha):
|
|
||||||
return a + (b - a) * alpha
|
|
||||||
|
|
||||||
def main(args):
|
|
||||||
set_version(args.version)
|
|
||||||
|
|
||||||
func_sizes, total_size = get_func_sizes()
|
|
||||||
all_funcs = set(func_sizes.keys())
|
all_funcs = set(func_sizes.keys())
|
||||||
|
|
||||||
nonmatching_funcs = get_nonmatching_funcs()
|
# Get nonmatching functions
|
||||||
|
nonmatching_funcs = get_core_nonmatching_funcs()
|
||||||
|
|
||||||
|
# Compute matching amount
|
||||||
matching_funcs = all_funcs - nonmatching_funcs
|
matching_funcs = all_funcs - nonmatching_funcs
|
||||||
|
matching_bytes = 0
|
||||||
|
for func in matching_funcs:
|
||||||
|
matching_bytes += func_sizes[func]
|
||||||
|
|
||||||
matching_size, nonmatching_size = get_funcs_sizes(func_sizes, matching_funcs, nonmatching_funcs)
|
# Done
|
||||||
|
return CoreProgress(
|
||||||
|
total_bytes=total_bytes,
|
||||||
|
total_funcs=len(all_funcs),
|
||||||
|
matching_bytes=matching_bytes,
|
||||||
|
matching_funcs=len(matching_funcs)
|
||||||
|
)
|
||||||
|
|
||||||
if len(all_funcs) == 0:
|
def read_dll_symbols_txt(path: Path) -> "dict[int, str]":
|
||||||
funcs_matching_ratio = 0.0
|
symbols: "dict[int, str]" = {}
|
||||||
matching_ratio = 0.0
|
|
||||||
|
with open(path, "r", encoding="utf-8") as syms_file:
|
||||||
|
for line in syms_file.readlines():
|
||||||
|
pairs = symbol_pattern.findall(line.strip())
|
||||||
|
for pair in pairs:
|
||||||
|
addr_str: str = pair[1]
|
||||||
|
if addr_str.lower().startswith("0x"):
|
||||||
|
addr = int(addr_str, base=16)
|
||||||
else:
|
else:
|
||||||
funcs_matching_ratio = (len(matching_funcs) / len(all_funcs)) * 100
|
addr = int(addr_str)
|
||||||
matching_ratio = (matching_size / total_size) * 100
|
|
||||||
|
|
||||||
if args.csv:
|
symbols[addr] = pair[0]
|
||||||
version = 1
|
|
||||||
git_object = git.Repo().head.object
|
|
||||||
timestamp = str(git_object.committed_date)
|
|
||||||
git_hash = git_object.hexsha
|
|
||||||
csv_list = [str(version), timestamp, git_hash, str(len(all_funcs)), str(len(nonmatching_funcs)),
|
|
||||||
str(len(matching_funcs)), str(total_size), str(nonmatching_size), str(matching_size)]
|
|
||||||
print(",".join(csv_list))
|
|
||||||
elif args.shield_json:
|
|
||||||
import json
|
|
||||||
|
|
||||||
# https://shields.io/endpoint
|
return symbols
|
||||||
color = Color("#50ca22", hue=lerp(0, 105/255, matching_ratio / 100))
|
|
||||||
print(json.dumps({
|
def get_dll_progress(dll_path: Path, number: str) -> DLLProgress:
|
||||||
"schemaVersion": 1,
|
known_symbols: "dict[int, str]" = {}
|
||||||
"label": f"progress ({args.version})",
|
nonmatching_funcs: "set[str]" = set()
|
||||||
"message": f"{matching_ratio:.2f}%",
|
has_src = False
|
||||||
"color": color.hex,
|
|
||||||
}))
|
# To determine progress we need to check if the DLL has a src directory
|
||||||
|
# If it does, we need its syms.txt and we need to check the respective asm/nonmatchings directory
|
||||||
|
syms_path = SRC_PATH.joinpath(f"dlls/{number}/syms.txt")
|
||||||
|
if syms_path.exists():
|
||||||
|
has_src = True
|
||||||
|
# Get a list of known symbols for the DLL (we need the function symbols)
|
||||||
|
known_symbols = read_dll_symbols_txt(syms_path)
|
||||||
|
# Get list of functions that aren't matching
|
||||||
|
nonmatchings_dir = ASM_PATH.joinpath(f"nonmatchings/dlls/{number}")
|
||||||
|
if nonmatchings_dir.exists():
|
||||||
|
for asm_file in nonmatchings_dir.iterdir():
|
||||||
|
if asm_file.name.endswith(".s"):
|
||||||
|
nonmatching_funcs.add(asm_file.name[:-2])
|
||||||
|
|
||||||
|
# Get all DLL functions and their sizes
|
||||||
|
with open(dll_path, "rb") as dll_file:
|
||||||
|
dll = DLL.parse(bytearray(dll_file.read()), number, include_funcs=True, known_symbols=known_symbols)
|
||||||
|
assert dll.functions is not None
|
||||||
|
|
||||||
|
func_sizes: "dict[str, int]" = {}
|
||||||
|
total_bytes = 0
|
||||||
|
for func in dll.functions:
|
||||||
|
size = len(func.insts) * 4
|
||||||
|
func_sizes[func.symbol] = size
|
||||||
|
total_bytes += size
|
||||||
|
|
||||||
|
# Compute matching amounts
|
||||||
|
if has_src:
|
||||||
|
matching_funcs = set(func_sizes.keys()) - nonmatching_funcs
|
||||||
|
matching_bytes = 0
|
||||||
|
for func in matching_funcs:
|
||||||
|
matching_bytes += func_sizes[func]
|
||||||
else:
|
else:
|
||||||
if matching_size + nonmatching_size != total_size:
|
matching_funcs = []
|
||||||
print("Warning: category/total size mismatch!\n")
|
matching_bytes = 0
|
||||||
print(f"{len(matching_funcs)} matched functions / {len(all_funcs)} total ({funcs_matching_ratio:.2f}%)")
|
|
||||||
print(f"{matching_size} matching bytes / {total_size} total ({matching_ratio:.2f}%)")
|
|
||||||
|
|
||||||
|
# Done
|
||||||
|
return DLLProgress(
|
||||||
|
number,
|
||||||
|
total_bytes=total_bytes,
|
||||||
|
total_funcs=len(func_sizes),
|
||||||
|
matching_bytes=matching_bytes,
|
||||||
|
matching_funcs=len(matching_funcs)
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_all_dll_progress() -> "list[DLLProgress]":
|
||||||
|
dlls_dir = BIN_PATH.joinpath("assets/dlls")
|
||||||
|
progress: "list[DLLProgress]" = []
|
||||||
|
|
||||||
|
# Get progress of each .dll asset
|
||||||
|
for dll_path in [Path(p) for p in glob(f"{dlls_dir}/*.dll")]:
|
||||||
|
number = dll_path.name.split(".")[0]
|
||||||
|
progress.append(get_dll_progress(dll_path, number))
|
||||||
|
|
||||||
|
return progress
|
||||||
|
|
||||||
|
def get_overall_progress() -> OverallProgress:
|
||||||
|
# Get core progress
|
||||||
|
core = get_core_progress()
|
||||||
|
# Get DLL progress
|
||||||
|
dlls = get_all_dll_progress()
|
||||||
|
# Return overall
|
||||||
|
return OverallProgress(core, dlls)
|
||||||
|
|
||||||
|
def output_json(p: OverallProgress, file: TextIOWrapper):
|
||||||
|
# Get current commit info
|
||||||
|
repo = Repo()
|
||||||
|
git_head_obj = repo.head.object
|
||||||
|
git_commit_hash = git_head_obj.hexsha
|
||||||
|
git_commit_hash_short = repo.git.rev_parse(git_commit_hash, short=7)
|
||||||
|
git_commit_timestamp = git_head_obj.committed_date
|
||||||
|
|
||||||
|
# Build JSON data
|
||||||
|
data = {
|
||||||
|
"total": {
|
||||||
|
"matching_ratio": p.matching_bytes_ratio,
|
||||||
|
"matching_funcs": p.matching_funcs,
|
||||||
|
"matching_bytes": p.matching_bytes,
|
||||||
|
"total_funcs": p.total_funcs,
|
||||||
|
"total_bytes": p.total_bytes,
|
||||||
|
},
|
||||||
|
"core": {
|
||||||
|
"matching_ratio": p.core_matching_bytes_ratio,
|
||||||
|
"matching_funcs": p.core.matching_funcs,
|
||||||
|
"matching_bytes": p.core.matching_bytes,
|
||||||
|
"total_funcs": p.core.total_funcs,
|
||||||
|
"total_bytes": p.core.total_bytes,
|
||||||
|
},
|
||||||
|
"dll": {
|
||||||
|
"matching_ratio": p.dll_matching_bytes_ratio,
|
||||||
|
"matching_funcs": p.dll_matching_funcs,
|
||||||
|
"matching_bytes": p.dll_matching_bytes,
|
||||||
|
"total_funcs": p.dll_total_funcs,
|
||||||
|
"total_bytes": p.dll_total_bytes,
|
||||||
|
},
|
||||||
|
"git": {
|
||||||
|
"commit_hash": git_commit_hash,
|
||||||
|
"commit_hash_short": git_commit_hash_short,
|
||||||
|
"commit_timestamp": git_commit_timestamp
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Output
|
||||||
|
json.dump(data, file, indent=2)
|
||||||
|
|
||||||
|
def print_progress(p: OverallProgress):
|
||||||
|
print(f"{p.core.matching_funcs} matched core functions / {p.core.total_funcs} total ({p.core_matching_funcs_ratio * 100:.2f}%)")
|
||||||
|
print(f"{p.core.matching_bytes} matching core bytes / {p.core.total_bytes} total ({p.core_matching_bytes_ratio * 100:.2f}%)")
|
||||||
|
print()
|
||||||
|
print(f"{p.dll_matching_funcs} matched DLL functions / {p.dll_total_funcs} total ({p.dll_matching_funcs_ratio * 100:.2f}%)")
|
||||||
|
print(f"{p.dll_matching_bytes} matching DLL bytes / {p.dll_total_bytes} total ({p.dll_matching_bytes_ratio * 100:.2f}%)")
|
||||||
|
print()
|
||||||
|
print(f"{p.matching_funcs} matched overall functions / {p.total_funcs} total ({p.matching_funcs_ratio * 100:.2f}%)")
|
||||||
|
print(f"{p.matching_bytes} matching overall bytes / {p.total_bytes} total ({p.matching_bytes_ratio * 100:.2f}%)")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="Reports progress for the project")
|
parser = argparse.ArgumentParser(description="Computes and reports progress for the project.")
|
||||||
parser.add_argument("version", default="current", nargs="?")
|
parser.add_argument("-q", "--quiet", action="store_true", help="Don't print messages to stdout.", default=False)
|
||||||
parser.add_argument("--csv", action="store_true")
|
parser.add_argument("--json", type=argparse.FileType("w", encoding="utf-8"), help="File to write the current progress to as JSON.")
|
||||||
parser.add_argument("--shield-json", action="store_true")
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
main(args)
|
# Compute progress
|
||||||
|
if not args.quiet:
|
||||||
|
print("Calculating progress...")
|
||||||
|
|
||||||
|
progress = get_overall_progress()
|
||||||
|
|
||||||
|
# Emit JSON
|
||||||
|
if args.json:
|
||||||
|
with args.json as json_file:
|
||||||
|
output_json(progress, json_file)
|
||||||
|
|
||||||
|
# Print progress
|
||||||
|
if not args.quiet:
|
||||||
|
print()
|
||||||
|
print_progress(progress)
|
||||||
|
Loading…
Reference in New Issue
Block a user