#!/usr/bin/env python3 import argparse, csv, os, re, sys parser = argparse.ArgumentParser() parser = argparse.ArgumentParser(description="Computes current progress throughout the whole project.") parser.add_argument("format", nargs="?", default="text", choices=["text", "csv", "shield-json"]) parser.add_argument("-m", "--matching", dest='matching', action='store_true', help="Output matching progress instead of decompilation progress") args = parser.parse_args() NON_MATCHING_PATTERN = r'#ifdef\s+NON_MATCHING.*?#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\).*?#endif' NOT_ATTEMPTED_PATTERN = r'#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\)' # This is the format ZAPD uses to autogenerate variable names # It should not be used for properly documented variables AUTOGENERATED_ASSET_NAME = re.compile(r".+0[0-9A-Fa-f]{5}") ASM_JMP_LABEL = re.compile(r"^(?PL[0-9A-F]{8})$") # TODO: consider making this a parameter of this script GAME_VERSION = "n64-us" def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) def GetFunctionsByPattern(pattern, files): functions = [] for file in files: with open(file) as f: functions += re.findall(pattern, f.read(), re.DOTALL) return functions def ReadAllLines(fileName): line_list = list() with open(fileName) as f: line_list = f.readlines() return line_list def GetFiles(path, ext): files = [] for r, d, f in os.walk(path): for file in f: if file.endswith(ext): files.append(os.path.join(r, file)) return files def GetCsvFilelist(version, filelist): path = os.path.join("tools", "filelists", version, filelist) with open(path, newline='') as f: return list(csv.reader(f, delimiter=',')) def GetRemovableSize(functionSizes, functions_to_count): size = 0 for func in functions_to_count: if func in functionSizes: size += functionSizes[func] return size def CalculateMapSizes(mapFileList): for mapFile in mapFileList: accumulatedSize = 0 symbolCount = len(mapFile["symbols"]) if symbolCount == 0: continue # Calculate size of each symbol for index in range(symbolCount - 1): symbol = mapFile["symbols"][index] nextSymbol = mapFile["symbols"][index+1] size = nextSymbol["vram"] - symbol["vram"] accumulatedSize += size mapFile["symbols"][index]["size"] = size # Calculate size of last symbol of the file symbol = mapFile["symbols"][-1] size = mapFile["size"] - accumulatedSize mapFile["symbols"][-1]["size"] = size return mapFileList def GetFunctionSizes(mapFileList): functionSizes = dict() for mapFile in mapFileList: if mapFile["section"] != ".text": continue for symbol in mapFile["symbols"]: symbolName = symbol["name"] functionSizes[symbolName] = symbol["size"] return functionSizes def CalculateNonNamedAssets(mapFileList, assetsTracker): for mapFile in mapFileList: if mapFile["section"] != ".data" and mapFile["section"] != ".rodata": continue if not mapFile["name"].startswith("build/n64-us/assets/"): continue assetCat = mapFile["name"].split("/")[3] for symbol in mapFile["symbols"]: symbolName = symbol["name"] if AUTOGENERATED_ASSET_NAME.search(symbolName) is not None: if assetCat in assetsTracker: assetsTracker[assetCat]["removableSize"] += symbol["size"] return assetsTracker map_file = ReadAllLines('build/n64-us/mm-n64-us.map') # Get list of Non-Matchings all_files = GetFiles("src", ".c") non_matching_functions = GetFunctionsByPattern(NON_MATCHING_PATTERN, all_files) # Get list of functions not attempted. not_attempted_functions = GetFunctionsByPattern(NOT_ATTEMPTED_PATTERN, all_files) not_attempted_functions = list(set(not_attempted_functions).difference(non_matching_functions)) # If we are looking for a count that includes non-matchings, then we want to set non matching functions list to empty. # We want to do this after not attempted functions list generation so we can remove all non matchings. if not args.matching: non_matching_functions = [] # The order of this list should not change to prevent breaking the graph of the website # New stuff shall be appended at the end of the list assetsCategories = [ "audio", "interface", "misc", "objects", "scenes", "text", # "deleted", # "segments", ] assetsTracker = dict() # Assets that we don't have a proper way of tracking right now ignoredAssets = { "archives", "code", } # Manual fixer for files that would be counted in wrong categories # "filename": "correctSection" fileSectionFixer = { "sequence_font_table": "code", # Currently in assets (would be counted as an audio asset) } for assetCat in assetsCategories: assetsTracker[assetCat] = dict() # Get asset files assetsTracker[assetCat]["files"] = GetCsvFilelist(GAME_VERSION, f"{assetCat}.csv") assetsTracker[assetCat]["currentSize"] = 0 assetsTracker[assetCat]["removableSize"] = 0 assetsTracker[assetCat]["totalSize"] = 0 assetsTracker[assetCat]["percent"] = 0 # Initialize all the code values srcCategories = [ "boot", "libultra", "code", "overlays", "audio", ] srcCategoriesFixer = { "boot_O2": "boot", "boot_O2_g3": "boot", } srcTracker = dict() asmTracker = dict() for srcCat in srcCategories: srcTracker[srcCat] = dict() srcTracker[srcCat]["currentSize"] = 0 srcTracker[srcCat]["totalSize"] = 0 srcTracker[srcCat]["percent"] = 0 asmTracker[srcCat] = dict() asmTracker[srcCat]["currentSize"] = 0 asmTracker[srcCat]["totalSize"] = 0 asmTracker[srcCat]["percent"] = 0 mapFileList = [] for line in map_file: line_split = list(filter(None, line.split(" "))) if (len(line_split) == 4 and line_split[0].startswith(".")): section = line_split[0] obj_vram = int(line_split[1], 16) file_size = int(line_split[2], 16) obj_file = line_split[3].strip() objFileSplit = obj_file.split("/") objFileName = objFileSplit[-1].split(".o")[0] fileData = {"name": obj_file, "vram": obj_vram, "size": file_size, "section": section, "symbols": []} mapFileList.append(fileData) if (section == ".text"): srcCat = obj_file.split("/")[3] if srcCat in srcCategoriesFixer: srcCat = srcCategoriesFixer[srcCat] if objFileName in fileSectionFixer: srcCat = fileSectionFixer[objFileName] if obj_file.startswith("build/n64-us/src"): if srcCat in srcTracker: srcTracker[srcCat]["totalSize"] += file_size elif (obj_file.startswith("build/n64-us/asm")): if srcCat in asmTracker: asmTracker[srcCat]["totalSize"] += file_size if section == ".data" or section == ".rodata": if obj_file.startswith("build/n64-us/assets/"): assetCat = obj_file.split("/")[3] if objFileName in fileSectionFixer: assetCat = fileSectionFixer[objFileName] if assetCat in assetsTracker: assetsTracker[assetCat]["currentSize"] += file_size elif assetCat in ignoredAssets: pass else: eprint(f"Found file '{obj_file}' in unknown asset category '{assetCat}'") eprint("I'll ignore this for now, but please fix it!") elif len(line_split) == 2 and line_split[0].startswith("0x"): varVramStr, varName = line_split varVram = int(varVramStr, 16) varName = varName.strip() if varName == "0x0": continue if ASM_JMP_LABEL.search(varName) is not None: # Filter out jump table's labels continue symbolData = {"name": varName, "vram": varVram, "size": 0} mapFileList[-1]["symbols"].append(symbolData) mapFileList = CalculateMapSizes(mapFileList) functionSizes = GetFunctionSizes(mapFileList) assetsTracker = CalculateNonNamedAssets(mapFileList, assetsTracker) # Add libultra to boot. srcTracker["boot"]["totalSize"] += srcTracker["libultra"]["totalSize"] asmTracker["boot"]["totalSize"] += asmTracker["libultra"]["totalSize"] del srcTracker["libultra"] del asmTracker["libultra"] # Add audio to code. srcTracker["code"]["totalSize"] += srcTracker["audio"]["totalSize"] asmTracker["code"]["totalSize"] += asmTracker["audio"]["totalSize"] del srcTracker["audio"] del asmTracker["audio"] # Calculate Non-Matching non_matching_functions_ovl = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/overlays/" in x, non_matching_functions))) non_matching_functions_code = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/code/" in x, non_matching_functions))) non_matching_functions_boot = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/boot/" in x, non_matching_functions))) non_matching_asm_ovl = GetRemovableSize(functionSizes, non_matching_functions_ovl) non_matching_asm_code = GetRemovableSize(functionSizes, non_matching_functions_code) non_matching_asm_boot = GetRemovableSize(functionSizes, non_matching_functions_boot) # Calculate Not Attempted not_attempted_functions_ovl = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/overlays/" in x, not_attempted_functions))) not_attempted_functions_code = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/code/" in x, not_attempted_functions))) not_attempted_functions_boot = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/boot/" in x, not_attempted_functions))) not_attempted_asm_ovl = GetRemovableSize(functionSizes, not_attempted_functions_ovl) not_attempted_asm_code = GetRemovableSize(functionSizes, not_attempted_functions_code) not_attempted_asm_boot = GetRemovableSize(functionSizes, not_attempted_functions_boot) # All the non matching asm is the sum of non-matching code non_matching_asm = non_matching_asm_ovl + non_matching_asm_code + non_matching_asm_boot # All the not attempted asm is the sum of not attemped code not_attempted_asm = not_attempted_asm_ovl + not_attempted_asm_code + not_attempted_asm_boot # Calculate total decompiled for each bucket by taking out the non-matching and not attempted in ovl/code/boot buckets. srcTracker["code"]["currentSize"] = srcTracker["code"]["totalSize"] - (non_matching_asm_code + not_attempted_asm_code) srcTracker["boot"]["currentSize"] = srcTracker["boot"]["totalSize"] - (non_matching_asm_boot + not_attempted_asm_boot) srcTracker["overlays"]["currentSize"] = srcTracker["overlays"]["totalSize"] - (non_matching_asm_ovl + not_attempted_asm_ovl) # Total code bucket sizes handwritten = 0 for srcCat in asmTracker: handwritten += asmTracker[srcCat]["totalSize"] # Calculate the total amount of decompilable code total = 0 for srcCat in asmTracker: total += srcTracker[srcCat]["totalSize"] # Calculate size of all assets for assetCat in assetsTracker: for index, f in assetsTracker[assetCat]["files"]: assetsTracker[assetCat]["totalSize"] += os.stat(os.path.join("extracted", "n64-us", "baserom", f)).st_size if args.matching: for assetCat in assetsTracker: assetsTracker[assetCat]["currentSize"] -= assetsTracker[assetCat]["removableSize"] # Calculate asm and src totals src = 0 for srcCat in srcTracker: src += srcTracker[srcCat]["currentSize"] asm = 0 for srcCat in asmTracker: asm += asmTracker[srcCat]["totalSize"] asm += non_matching_asm + not_attempted_asm # Calculate assets totals assets = sum(x["currentSize"] for x in assetsTracker.values()) assets_total = sum(x["totalSize"] for x in assetsTracker.values()) # Convert vaules to percentages src_percent = 100 * src / total asm_percent = 100 * asm / total for srcCat in ["boot", "code", "overlays"]: srcTracker[srcCat]["percent"] = 100 * srcTracker[srcCat]["currentSize"] / srcTracker[srcCat]["totalSize"] assets_percent = 100 * assets / assets_total for assetCat in assetsTracker: assetsTracker[assetCat]["percent"] = 100 * assetsTracker[assetCat]["currentSize"] / assetsTracker[assetCat]["totalSize"] # convert bytes to masks and rupees num_masks = 24 max_rupees = 500 bytes_per_mask = total / num_masks bytes_per_rupee = bytes_per_mask / max_rupees masks = int(src / bytes_per_mask) rupees = int((src % bytes_per_mask) / bytes_per_rupee) if args.format == 'csv': import git version = 2 git_object = git.Repo().head.object timestamp = str(git_object.committed_date) git_hash = git_object.hexsha csv_list = [ version, timestamp, git_hash, src, total, ] for srcCat in ["boot", "code", "overlays"]: csv_list += [srcTracker[srcCat]["currentSize"], srcTracker[srcCat]["totalSize"]] csv_list += [ asm, len(non_matching_functions), ] csv_list += [ assets, assets_total, ] for assetCat in assetsCategories: csv_list += [assetsTracker[assetCat]["currentSize"], assetsTracker[assetCat]["totalSize"]] print(",".join(map(str, csv_list))) elif args.format == 'shield-json': import json # https://shields.io/endpoint print(json.dumps({ "schemaVersion": 1, "label": "progress", "message": f"{src_percent:.3g}%", "color": 'yellow', })) elif args.format == 'text': adjective = "decompiled" if not args.matching else "matched" assetsAdjective = "debinarized" if not args.matching else "identified" print("src: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(src, total, adjective, round(src_percent, 4))) for srcCat in ["boot", "code", "overlays"]: src = srcTracker[srcCat] print(" {:<10} {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(f"{srcCat}:", src["currentSize"], src["totalSize"], adjective, round(src["percent"], 4))) print() print("assets: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(assets, assets_total, assetsAdjective, round(assets_percent, 4))) for assetCat in assetsTracker: data = assetsTracker[assetCat] print(" {:<10} {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(f"{assetCat}:", data["currentSize"], data["totalSize"], assetsAdjective, round(data["percent"], 4))) print() print("------------------------------------\n") if (rupees > 0): print('You have {}/{} masks and {}/{} rupee(s).\n'.format(masks, num_masks, rupees, max_rupees)) else: print('You have {}/{} masks.\n'.format(masks, num_masks)) else: print("Unknown format argument: " + args.format)