implement automatic hash checking for check.py

This commit is contained in:
shibbo 2024-08-09 18:53:35 -04:00
parent 13f995e093
commit 86061d72ae
6 changed files with 260 additions and 141 deletions

View File

@ -1,6 +1,8 @@
# build.py
# the main build script for building each library
import glob
import hashlib
import subprocess
import sys
import os
@ -35,6 +37,27 @@ COMPILER_CMD = f"-x c++ -O3 -fno-omit-frame-pointer -mno-implicit-float -fno-cxx
COMPILER_PATH = pathlib.Path("compiler/nx/aarch64/bin/clang++.exe")
OBJDUMP_PATH = pathlib.Path("compiler/nx/aarch64/bin/llvm-objdump.exe")
# if we don't have this file, create it
HASHES_BASE_PATH = pathlib.Path("data\\hashes.txt")
CHANGED_PATH = pathlib.Path("data\\changed.txt")
if not os.path.exists(CHANGED_PATH):
open(CHANGED_PATH, 'a').close()
# our hashes that we are starting out with
start_hashes = {}
if os.path.exists(HASHES_BASE_PATH):
with open(HASHES_BASE_PATH, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip("\n")
spl = line.split("=")
obj = spl[0]
hash = spl[1]
start_hashes[obj] = hash
isNotWindows = os.name != 'nt'
def genNinja(compile_tasks):
@ -106,4 +129,40 @@ def generateMaps():
with open(map_path, "w") as w:
w.writelines(newOutput)
compileLibraries(LIBRARIES)
compileLibraries(LIBRARIES)
obj_hashes = {}
changed_objs = []
for lib in LIBRARIES:
objs = []
if lib == "Game":
objs = glob.glob(os.path.join("build", "**", "*.o"), recursive=True)
else:
objs = glob.glob(os.path.join("lib", lib, "build", "**", "*.o"), recursive=True)
# generate our hashes
for obj in objs:
obj_hashes[obj] = hashlib.md5(open(obj,'rb').read()).hexdigest()
# now we determine what objects were changed based on comparing the two MD5 hashes
for obj in obj_hashes:
if obj in start_hashes:
if start_hashes[obj] != obj_hashes[obj]:
changed_objs.append(obj)
# do we have changed objs?
# if we do, then we write those changed objects to our text file
# if not, we clear the file
if len(changed_objs) > 0:
with open(CHANGED_PATH, "w") as w:
for obj in changed_objs:
w.write(f"{obj}\n")
else:
open(CHANGED_PATH, 'w').close()
# write our new hashes
with open(HASHES_BASE_PATH, "w") as w:
for obj in obj_hashes:
w.write(f"{obj}={obj_hashes[obj]}\n")

312
check.py
View File

@ -104,10 +104,33 @@ def getFunctionData(functionAddr, functionSize):
return nso_file.getFunction(functionAddr, functionSize)
if len(sys.argv) < 2:
print("python check.py [-prog] [-no-diff] <mangled symbol>")
objs_to_check = []
funcs_to_check = []
with open("data\\changed.txt", "r") as f:
lines = f.readlines()
for line in lines:
objs_to_check.append(line.strip("\n"))
if len(objs_to_check) == 0:
print("There are no functions to check.")
sys.exit(1)
for obj in objs_to_check:
with open(obj, "rb") as input:
elf_file = ELFFile(input)
symtab = elf_file.get_section_by_name('.symtab')
for symbol in symtab.iter_symbols():
section = symbol['st_shndx']
if isinstance(section, int) and symbol.name != '':
section_name = elf_file.get_section(section).name
if section_name.startswith('.text'):
if not symbol.name.startswith("$"):
funcs_to_check.append(symbol.name)
if "-prog" in sys.argv:
genProgress()
sys.exit(1)
@ -115,174 +138,191 @@ if "-prog" in sys.argv:
start = time.time()
printDiff = True
isChanged = False
if "-no-diff" in sys.argv:
sym = sys.argv[2]
printDiff = False
else:
sym = sys.argv[1]
# first let's see if our symbol even exists somewhere
path = getModule("map", sym)
for sym in funcs_to_check:
print(f"{Fore.BLUE}{sym}{Style.RESET_ALL} =================================================")
# first let's see if our symbol even exists somewhere
path = getModule("map", sym)
if path == "":
for lib in LIBRARIES:
path = getModule(f"lib/{lib}/map", sym)
if path == "":
for lib in LIBRARIES:
path = getModule(f"lib/{lib}/map", sym)
if path != "":
break
if path != "":
break
if path == "":
print("Unable to find symbol.")
sys.exit(1)
functionSize = 0
functionAddr = 0
with open("data/main.map", "r") as f:
lines = f.readlines()
for line in lines:
spl = line.split("=")
name = spl[0]
addr = spl[1]
addr = int(addr[10:], 16)
size = int(spl[2], 16)
if sym == name:
functionSize = size
functionAddr = addr
break
funcData = getFunctionData(functionAddr, functionSize)
capstone_inst = Cs(CS_ARCH_ARM64, CS_MODE_ARM + CS_MODE_LITTLE_ENDIAN)
capstone_inst.detail = True
capstone_inst.imm_unsigned = False
if funcData == b'':
print("Failed to fetch function data.")
sys.exit(1)
error_count = 0
warning_count = 0
original_instrs = list(capstone_inst.disasm(funcData, 0))
with open(path, "rb") as f:
elf = f
elf_file = ELFFile(elf)
symtab = elf_file.get_section_by_name('.symtab')
if symtab.get_symbol_by_name(sym) is None:
print("Could not find symbol in object file. This may be caused by the code not being compiled, the function being in the wrong C++ source file or the function signature being wrong.")
if path == "":
print("Unable to find symbol.")
sys.exit(1)
compiled_symbol = symtab.get_symbol_by_name(sym)[0]
custom_offset = compiled_symbol["st_value"]
custom_size = compiled_symbol['st_size']
text = elf_file.get_section_by_name(f".text.{sym}")
functionSize = 0
functionAddr = 0
constructor_swap = False
with open("data/main.map", "r") as f:
lines = f.readlines()
if text is None:
# it is very possible that we are dealing with a C1 / C2 swap...
# for some reason, llvm-objdump dumps our symbols incorrectly
# so the changes of the symbol being present in the object and being a ctor makes it possible we are seeing one of these swaps
print("Possible constructor swap?")
if "C1E" in sym:
sym = sym.replace("C1E", "C2E")
constructor_swap = True
elif "C2E" in sym:
sym = sym.replace("C2E", "C1E")
constructor_swap = True
# now let's try again
text = elf_file.get_section_by_name(f".text.{sym}")
for line in lines:
spl = line.split("=")
name = spl[0]
addr = spl[1]
addr = int(addr[10:], 16)
size = int(spl[2], 16)
if text is None:
print("Could not find function in text data.")
if sym == name:
functionSize = size
functionAddr = addr
break
funcData = getFunctionData(functionAddr, functionSize)
capstone_inst = Cs(CS_ARCH_ARM64, CS_MODE_ARM + CS_MODE_LITTLE_ENDIAN)
capstone_inst.detail = True
capstone_inst.imm_unsigned = False
if funcData == b'':
print("Failed to fetch function data.")
continue
error_count = 0
warning_count = 0
original_instrs = list(capstone_inst.disasm(funcData, 0))
with open(path, "rb") as f:
elf = f
elf_file = ELFFile(elf)
symtab = elf_file.get_section_by_name('.symtab')
if symtab.get_symbol_by_name(sym) is None:
print("Could not find symbol in object file. This may be caused by the code not being compiled, the function being in the wrong C++ source file or the function signature being wrong.")
sys.exit(1)
custom_data = text.data()[custom_offset:custom_offset + custom_size]
custom_instructions = list(capstone_inst.disasm(custom_data, 0))
compiled_symbol = symtab.get_symbol_by_name(sym)[0]
custom_offset = compiled_symbol["st_value"]
custom_size = compiled_symbol['st_size']
text = elf_file.get_section_by_name(f".text.{sym}")
orig_length = len(list(original_instrs))
cust_length = len(list(custom_instructions))
constructor_swap = False
instr_equal = True
regs_equal = True
if text is None:
# it is very possible that we are dealing with a C1 / C2 swap...
# for some reason, llvm-objdump dumps our symbols incorrectly
# so the changes of the symbol being present in the object and being a ctor makes it possible we are seeing one of these swaps
print("Possible constructor swap?")
if "C1E" in sym:
sym = sym.replace("C1E", "C2E")
constructor_swap = True
elif "C2E" in sym:
sym = sym.replace("C2E", "C1E")
constructor_swap = True
# now let's try again
text = elf_file.get_section_by_name(f".text.{sym}")
# did we have to correct our llvm-objdump? if so, we swap our symbol we are marking here
if constructor_swap:
if "C1E" in sym:
sym = sym.replace("C1E", "C2E")
elif "C2E" in sym:
sym = sym.replace("C2E", "C1E")
if text is None:
print("Could not find function in text data.")
sys.exit(1)
for i in range(orig_length):
curOrigInstr = original_instrs[i]
curCustInstr = custom_instructions[i]
custom_data = text.data()[custom_offset:custom_offset + custom_size]
custom_instructions = list(capstone_inst.disasm(custom_data, 0))
orig_operands = curOrigInstr.operands
cust_operands = curCustInstr.operands
orig_length = len(list(original_instrs))
cust_length = len(list(custom_instructions))
if str(curOrigInstr) == str(curCustInstr):
if printDiff == True:
print(f"{Fore.GREEN}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
continue
instr_equal = True
regs_equal = True
if curOrigInstr.id != curCustInstr.id:
print(f"{Fore.RED}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
instr_equal = False
continue
# did we have to correct our llvm-objdump? if so, we swap our symbol we are marking here
if constructor_swap:
if "C1E" in sym:
sym = sym.replace("C1E", "C2E")
elif "C2E" in sym:
sym = sym.replace("C2E", "C1E")
for j in range(len(orig_operands)):
if orig_operands[j].reg != cust_operands[j]:
# ADRP and ADD can give of wrong operands because of us not linking, same with LDR
if curOrigInstr.id == ARM64_INS_ADRP or curOrigInstr.id == ARM64_INS_ADD or curOrigInstr.id == ARM64_INS_LDR:
print(f"{Fore.YELLOW}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
# B and BL instructions
elif curOrigInstr.id == ARM64_INS_B or curOrigInstr.id == ARM64_INS_BL:
print(f"{Fore.YELLOW}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
else:
print(f"{Fore.RED}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
regs_equal = False
break
for i in range(orig_length):
curOrigInstr = original_instrs[i]
curCustInstr = custom_instructions[i]
isAlreadyMarked = False
orig_operands = curOrigInstr.operands
cust_operands = curCustInstr.operands
if str(curOrigInstr) == str(curCustInstr):
if printDiff == True:
print(f"{Fore.GREEN}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
continue
if curOrigInstr.id != curCustInstr.id:
print(f"{Fore.RED}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
instr_equal = False
continue
for j in range(len(orig_operands)):
if orig_operands[j].reg != cust_operands[j]:
# ADRP and ADD can give of wrong operands because of us not linking, same with LDR
if curOrigInstr.id == ARM64_INS_ADRP or curOrigInstr.id == ARM64_INS_ADD or curOrigInstr.id == ARM64_INS_LDR:
print(f"{Fore.YELLOW}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
# B and BL instructions
elif curOrigInstr.id == ARM64_INS_B or curOrigInstr.id == ARM64_INS_BL:
print(f"{Fore.YELLOW}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
else:
print(f"{Fore.RED}{str(curOrigInstr):<80}{curCustInstr}{Style.RESET_ALL}")
regs_equal = False
break
isAlreadyMarked = False
if instr_equal == True and regs_equal == True:
with open("data/main.map", "r") as f:
csvData = f.readlines()
outCsv = []
for c in csvData:
spl = c.split("=")
if spl[0] == sym and spl[3] == "false\n":
outCsv.append(f"{spl[0]}={spl[1]}={spl[2]}=true\n")
elif spl[0] == sym and spl[3] == "true\n":
isAlreadyMarked = True
outCsv.append(c)
# we have a matching function
if instr_equal == True and regs_equal == True:
for c in csvData:
spl = c.split("=")
if spl[0] == sym and spl[3] == "false\n":
outCsv.append(f"{spl[0]}={spl[1]}={spl[2]}=true\n")
elif spl[0] == sym and spl[3] == "true\n":
isAlreadyMarked = True
outCsv.append(c)
else:
outCsv.append(c)
if isAlreadyMarked == True:
print("Function is already marked as decompiled.")
else:
outCsv.append(c)
print("Function is matching! Marking as decompiled...")
isChanged = True
# we have a nonmatching function
else:
for c in csvData:
spl = c.split("=")
if spl[0] == sym and spl[3] == "true\n":
outCsv.append(f"{spl[0]}={spl[1]}={spl[2]}=false\n")
elif spl[0] == sym and spl[3] == "false\n":
outCsv.append(c)
else:
outCsv.append(c)
if isAlreadyMarked == True:
print("Function was marked as decompiled, but does not match. Marking as undecompiled...")
if instr_equal == True and regs_equal == False:
print("Function has matching instructions, but operands are not equal.")
elif instr_equal == False and regs_equal == True:
print("Function has matching operands, but instructions are not equal.")
elif instr_equal == False and regs_equal == False:
print("Function does not match in either instructions or operands.")
with open("data/main.map", "w") as w:
w.writelines(outCsv)
if isAlreadyMarked == True:
print("Function is already marked as decompiled.")
else:
print("Function is matching! Marking as decompiled...")
genProgress()
elif instr_equal == True and regs_equal == False:
print("Function has matching instructions, but operands are not equal.")
elif instr_equal == False and regs_equal == True:
print("Function has matching operands, but instructions are not equal.")
elif instr_equal == False and regs_equal == False:
print("Function does not match in either instructions or operands.")
if isChanged:
genProgress()
end = time.time()
length = end - start

0
data/changed.txt Normal file
View File

View File

@ -1,6 +1,6 @@
{
"schemaVersion": 1,
"label": "decompiled",
"message": "0.1468%",
"message": "0.149%",
"color": "blue"
}

20
data/hashes.txt Normal file
View File

@ -0,0 +1,20 @@
lib\ActionLibrary\build\Library\Actor\ActorSensorController.o=279b3ed9791554d0c81b6457dd38f43f
lib\ActionLibrary\build\Library\Actor\ActorSensorUtil.o=974779dd78598c2acb5521b6f0d19408
lib\ActionLibrary\build\Library\Collision\Collider.o=c25595eb11dfc382b0e944261c8293a5
lib\ActionLibrary\build\Library\Collision\HitInfo.o=b5f0cee66dd02a40ddd29a92d46b6b64
lib\ActionLibrary\build\Library\HitSensor\HitSensor.o=779957dcecf6759d589dd8049dd5e1b6
lib\ActionLibrary\build\Library\HitSensor\HitSensorKeeper.o=1c00b0d7dbda052adeeb44aef677bac5
lib\ActionLibrary\build\Library\HitSensor\SensorHitGroup.o=81f22962ed5b63f782f05e8dc0a3c195
lib\ActionLibrary\build\Library\Item\ActorItemFunction.o=5206fb53708087211f95ea01286dbba5
lib\ActionLibrary\build\Library\Item\ActorItemInfo.o=f9c94c24ebeaede804baaefeffe0b1ed
lib\ActionLibrary\build\Library\Item\ActorItemKeeper.o=fa38e0001496e8e2b51a5c83e8ad8c49
lib\ActionLibrary\build\Library\Item\ActorScoreInfo.o=abd2bb10df1ba1212a1256ef7d50639b
lib\ActionLibrary\build\Library\Item\ActorScoreKeeper.o=8d923c76eda0825d663da363a61b4d71
lib\ActionLibrary\build\Library\LiveActor\LiveActor.o=4add73d475a9e9e66a12bf9b88601c64
lib\ActionLibrary\build\Library\Nerve\NerveExecutor.o=d740f10f087c3bd155429578d7954536
lib\ActionLibrary\build\Library\Nerve\NerveKeeper.o=d91b53bcf1a4bee36cdb6b6c85f70c24
lib\ActionLibrary\build\Library\Nerve\NerveStateBase.o=3e3da64b3f5229c0e5afe27433fb191b
lib\ActionLibrary\build\Library\Nerve\NerveStateCtrl.o=ea817ea8c505dc6cca0257f3001160c8
lib\ActionLibrary\build\Library\Yaml\ByamlArrayIter.o=f02b3f737394c04b4a99e657ec4bb00e
lib\ActionLibrary\build\Library\Yaml\ByamlStringTableIter.o=5c4df7764719debcbe1fa84b1c6f81cc
lib\sead\build\heap\IDisposer.o=6214e5f376fabad9763a7af22b3e7c43

View File

@ -50466,13 +50466,13 @@ _ZN2al15setDisasterModeEPNS_9LiveActorEb=0000007100828FB0=0000004C=false
_ZN2al14isDisasterModeEPNS_9LiveActorE=0000007100829000=00000050=false
_ZN2al16stopAllPadRumbleEPNS_9LiveActorE=0000007100829050=00000024=false
_ZN2al12isSingleModeEPKNS_9LiveActorE=0000007100829080=00000018=false
_ZN2al21ActorSensorControllerC1EPNS_9LiveActorEPKc=00000071008290A0=00000054=true
_ZN2al21ActorSensorControllerC1EPNS_9LiveActorEPKc=00000071008290A0=00000054=false
_ZN2al21ActorSensorController14setSensorScaleEf=0000007100829100=00000034=true
_ZN2al21ActorSensorController15setSensorRadiusEf=0000007100829140=0000000C=true
_ZN2al21ActorSensorController24setSensorFollowPosOffsetERKN4sead7Vector3IfEE=0000007100829150=00000018=true
_ZN2al21ActorSensorController26resetActorSensorControllerEv=0000007100829170=00000024=minor
_ZN2al25ActorSensorControllerListC1Ei=00000071008291A0=00000070=true
_ZN2al25ActorSensorControllerList9addSensorEPNS_9LiveActorEPKc=0000007100829210=00000080=true
_ZN2al25ActorSensorControllerListC1Ei=00000071008291A0=00000070=false
_ZN2al25ActorSensorControllerList9addSensorEPNS_9LiveActorEPKc=0000007100829210=00000080=false
_ZN2al25ActorSensorControllerList17setAllSensorScaleEf=0000007100829290=0000005C=minor
_ZN2al25ActorSensorControllerList29resetAllActorSensorControllerEv=00000071008292F0=0000004C=minor
_ZN2al18addHitSensorPlayerEPNS_9LiveActorERKNS_13ActorInitInfoEPKcftRKN4sead7Vector3IfEE=0000007100829340=0000008C=false