mirror of
https://github.com/open-goal/jak-project.git
synced 2025-03-06 04:47:25 +00:00
add decompiler
This commit is contained in:
parent
ba3c3af43e
commit
c3aff47886
README.mddecomp.sh
decompiler
CMakeLists.txt
Disasm
Instruction.cppInstruction.hInstructionDecode.cppInstructionDecode.hInstructionMatching.cppInstructionMatching.hOpcodeInfo.cppOpcodeInfo.hRegister.cppRegister.h
Function
ObjectFile
LinkedObjectFile.cppLinkedObjectFile.hLinkedObjectFileCreation.cppLinkedObjectFileCreation.hLinkedWord.hObjectFileDB.cppObjectFileDB.h
README.mdTypeSystem
GoalFunction.cppGoalFunction.hGoalSymbol.cppGoalSymbol.hGoalType.cppGoalType.hTypeInfo.cppTypeInfo.hTypeSpec.cppTypeSpec.h
config.cppconfig.hconfig
main.cppscripts
util
decompiler_out
11
README.md
11
README.md
@ -47,7 +47,7 @@ Design:
|
||||
- Workflow for development:
|
||||
- `./gc.sh` : run the compiler in interactive mode
|
||||
- `./gs.sh` : run a goos interpreter in interactive mode
|
||||
- `./decomp.sh ./iso_data` : run the decompiler
|
||||
- `./decomp.sh : run the decompiler
|
||||
|
||||
Current state:
|
||||
- GOAL compiler just implements the GOOS Scheme Macro Language. Running `./gc.sh` just loads the GOOS library (`goalc/gs/goos-lib.gs`) and then goes into an interactive mode. Use `(exit)` to exit.
|
||||
@ -79,7 +79,8 @@ TODOS:
|
||||
- performance stats for `SystemThread` (probably just get rid of these performance stats completely)
|
||||
- `mmap`ing executable memory
|
||||
- line input library (appears windows compatible?)
|
||||
- Clean up possible duplicate code in compiler/decompiler `util` folder
|
||||
- Clean up possible duplicate code in compiler/decompiler `util` folder, consider a common utility library
|
||||
- Clean up header guard names (or just use `#pragma once`?)
|
||||
- Investigate a better config format
|
||||
- The current JSON library seems to have issues with comments, which I really like
|
||||
- Clean up use of namespaces
|
||||
@ -90,9 +91,9 @@ TODOS:
|
||||
- Listener protocol document
|
||||
- GOAL Compiler IR
|
||||
- GOAL Compiler Skeleton
|
||||
|
||||
In Progress:
|
||||
- GOAL emitter / emitter testing setup
|
||||
- Gtest setup for checking decompiler results against hand-decompiled stuff
|
||||
- Clean up decompiler print spam, finish up the CFG stuff
|
||||
- Decompiler document
|
||||
|
||||
|
||||
Project Description
|
||||
|
6
decomp.sh
Executable file
6
decomp.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Directory of this script
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
$DIR/build/decompiler/decompiler $DIR/decompiler/config/jak1_ntsc_black_label.jsonc $DIR/iso_data $DIR/decompiler_out
|
@ -0,0 +1,25 @@
|
||||
add_executable(decompiler
|
||||
util/LispPrint.cpp
|
||||
main.cpp
|
||||
ObjectFile/ObjectFileDB.cpp
|
||||
Disasm/Instruction.cpp
|
||||
Disasm/InstructionDecode.cpp
|
||||
Disasm/OpcodeInfo.cpp
|
||||
Disasm/Register.cpp
|
||||
ObjectFile/LinkedObjectFileCreation.cpp
|
||||
ObjectFile/LinkedObjectFile.cpp
|
||||
Function/Function.cpp
|
||||
util/FileIO.cpp
|
||||
config.cpp
|
||||
util/LispPrint.cpp
|
||||
util/Timer.cpp
|
||||
Function/BasicBlocks.cpp
|
||||
Disasm/InstructionMatching.cpp
|
||||
TypeSystem/GoalType.cpp
|
||||
TypeSystem/GoalFunction.cpp
|
||||
TypeSystem/GoalSymbol.cpp
|
||||
TypeSystem/TypeInfo.cpp
|
||||
TypeSystem/TypeSpec.cpp Function/CfgVtx.cpp Function/CfgVtx.h)
|
||||
|
||||
target_link_libraries(decompiler
|
||||
minilzo)
|
304
decompiler/Disasm/Instruction.cpp
Normal file
304
decompiler/Disasm/Instruction.cpp
Normal file
@ -0,0 +1,304 @@
|
||||
/*!
|
||||
* @file Instruction.cpp
|
||||
* An EE instruction, represented as an operation, plus a list of source/destination atoms.
|
||||
* Can print itself (within the context of a LinkedObjectFile).
|
||||
*/
|
||||
|
||||
#include "Instruction.h"
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include <cassert>
|
||||
|
||||
/*!
|
||||
* Convert atom to a string for disassembly.
|
||||
*/
|
||||
std::string InstructionAtom::to_string(const LinkedObjectFile& file) const {
|
||||
switch (kind) {
|
||||
case REGISTER:
|
||||
return reg.to_string();
|
||||
case IMM:
|
||||
return std::to_string(imm);
|
||||
case LABEL:
|
||||
return file.get_label_name(label_id);
|
||||
case VU_ACC:
|
||||
return "acc";
|
||||
case VU_Q:
|
||||
return "Q";
|
||||
case IMM_SYM:
|
||||
return sym;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Make this atom a register.
|
||||
*/
|
||||
void InstructionAtom::set_reg(Register r) {
|
||||
kind = REGISTER;
|
||||
reg = r;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Make this atom an immediate.
|
||||
*/
|
||||
void InstructionAtom::set_imm(int32_t i) {
|
||||
kind = IMM;
|
||||
imm = i;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Make this atom a label.
|
||||
*/
|
||||
void InstructionAtom::set_label(int id) {
|
||||
kind = LABEL;
|
||||
label_id = id;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Make this atom the VU ACC register.
|
||||
*/
|
||||
void InstructionAtom::set_vu_acc() {
|
||||
kind = VU_ACC;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Make this atom the VU0 Q register.
|
||||
*/
|
||||
void InstructionAtom::set_vu_q() {
|
||||
kind = VU_Q;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Make this atom a symbol.
|
||||
*/
|
||||
void InstructionAtom::set_sym(std::string _sym) {
|
||||
kind = IMM_SYM;
|
||||
sym = std::move(_sym);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get as register, or error if not a register.
|
||||
*/
|
||||
Register InstructionAtom::get_reg() const {
|
||||
assert(kind == REGISTER);
|
||||
return reg;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get as integer immediate, or error if not an integer immediate.
|
||||
*/
|
||||
int32_t InstructionAtom::get_imm() const {
|
||||
assert(kind == IMM);
|
||||
return imm;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get as label index, or error if not a label.
|
||||
*/
|
||||
int InstructionAtom::get_label() const {
|
||||
assert(kind == LABEL);
|
||||
return label_id;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get as symbol, or error if not a symbol.
|
||||
*/
|
||||
std::string InstructionAtom::get_sym() const {
|
||||
assert(kind == IMM_SYM);
|
||||
return sym;
|
||||
}
|
||||
|
||||
/*!
|
||||
* True if this atom is some sort of constant that doesn't involve linking.
|
||||
*/
|
||||
bool InstructionAtom::is_link_or_label() const {
|
||||
return kind == IMM_SYM || kind == LABEL;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert entire instruction to a string.
|
||||
*/
|
||||
std::string Instruction::to_string(const LinkedObjectFile& file) const {
|
||||
auto& info = gOpcodeInfo[(int)kind];
|
||||
|
||||
// the name
|
||||
std::string result = info.name;
|
||||
|
||||
// optional "interlock" specification.
|
||||
if (il != 0xff) {
|
||||
result.append(il ? ".i" : ".ni");
|
||||
}
|
||||
|
||||
// optional "broadcast" specification for COP2 opcodes.
|
||||
if (cop2_bc != 0xff) {
|
||||
switch (cop2_bc) {
|
||||
case 0:
|
||||
result.push_back('x');
|
||||
break;
|
||||
case 1:
|
||||
result.push_back('y');
|
||||
break;
|
||||
case 2:
|
||||
result.push_back('z');
|
||||
break;
|
||||
case 3:
|
||||
result.push_back('w');
|
||||
break;
|
||||
default:
|
||||
result.push_back('?');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// optional "destination" specification for COP2 opcodes.
|
||||
if (cop2_dest != 0xff) {
|
||||
result += ".";
|
||||
if (cop2_dest & 8)
|
||||
result.push_back('x');
|
||||
if (cop2_dest & 4)
|
||||
result.push_back('y');
|
||||
if (cop2_dest & 2)
|
||||
result.push_back('z');
|
||||
if (cop2_dest & 1)
|
||||
result.push_back('w');
|
||||
}
|
||||
|
||||
// relative store and load instructions have a special syntax in MIPS
|
||||
if (info.is_store) {
|
||||
assert(n_dst == 0);
|
||||
assert(n_src == 3);
|
||||
result += " ";
|
||||
result += src[0].to_string(file);
|
||||
result += ", ";
|
||||
result += src[1].to_string(file);
|
||||
result += "(";
|
||||
result += src[2].to_string(file);
|
||||
result += ")";
|
||||
} else if (info.is_load) {
|
||||
assert(n_dst == 1);
|
||||
assert(n_src == 2);
|
||||
result += " ";
|
||||
result += dst[0].to_string(file);
|
||||
result += ", ";
|
||||
result += src[0].to_string(file);
|
||||
result += "(";
|
||||
result += src[1].to_string(file);
|
||||
result += ")";
|
||||
} else {
|
||||
// for instructions that aren't a store or load, the dest/sources are comma separated.
|
||||
bool end_comma = false;
|
||||
|
||||
for (uint8_t i = 0; i < n_dst; i++) {
|
||||
result += " " + dst[i].to_string(file) + ",";
|
||||
end_comma = true;
|
||||
}
|
||||
|
||||
for (uint8_t i = 0; i < n_src; i++) {
|
||||
result += " " + src[i].to_string(file) + ",";
|
||||
end_comma = true;
|
||||
}
|
||||
|
||||
if (end_comma) {
|
||||
result.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Was this instruction successfully decoded?
|
||||
*/
|
||||
bool Instruction::is_valid() const {
|
||||
return kind != InstructionKind::UNKNOWN;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add a destination atom to this Instruction
|
||||
*/
|
||||
void Instruction::add_dst(InstructionAtom& a) {
|
||||
assert(n_dst < MAX_INTRUCTION_DEST);
|
||||
dst[n_dst++] = a;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add a source atom to this Instruction
|
||||
*/
|
||||
void Instruction::add_src(InstructionAtom& a) {
|
||||
assert(n_src < MAX_INSTRUCTION_SOURCE);
|
||||
src[n_src++] = a;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get a source atom that's an immediate, or error if it doesn't exist.
|
||||
*/
|
||||
InstructionAtom& Instruction::get_imm_src() {
|
||||
for (int i = 0; i < n_src; i++) {
|
||||
if (src[i].kind == InstructionAtom::IMM) {
|
||||
return src[i];
|
||||
}
|
||||
}
|
||||
assert(false);
|
||||
return src[0];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Try to find a src which is an integer immediate, and return it as an integer.
|
||||
*/
|
||||
int32_t Instruction::get_imm_src_int() {
|
||||
return get_imm_src().get_imm();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Safe get dst atom
|
||||
*/
|
||||
InstructionAtom& Instruction::get_dst(size_t idx) {
|
||||
assert(idx < n_dst);
|
||||
return dst[idx];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Safe get src atom
|
||||
*/
|
||||
InstructionAtom& Instruction::get_src(size_t idx) {
|
||||
assert(idx < n_src);
|
||||
return src[idx];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Safe get dst atom
|
||||
*/
|
||||
const InstructionAtom& Instruction::get_dst(size_t idx) const {
|
||||
assert(idx < n_dst);
|
||||
return dst[idx];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Safe get src atom
|
||||
*/
|
||||
const InstructionAtom& Instruction::get_src(size_t idx) const {
|
||||
assert(idx < n_src);
|
||||
return src[idx];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get OpcodeInfo for the opcode used in this instruction.
|
||||
*/
|
||||
const OpcodeInfo& Instruction::get_info() const {
|
||||
return gOpcodeInfo[int(kind)];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the target label for this instruction. If the instruction doesn't have a target label,
|
||||
* return -1.
|
||||
*/
|
||||
int Instruction::get_label_target() const {
|
||||
int result = -1;
|
||||
for (int i = 0; i < n_src; i++) {
|
||||
if (src[i].kind == InstructionAtom::AtomKind::LABEL) {
|
||||
assert(result == -1);
|
||||
result = src[i].get_label();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
89
decompiler/Disasm/Instruction.h
Normal file
89
decompiler/Disasm/Instruction.h
Normal file
@ -0,0 +1,89 @@
|
||||
/*!
|
||||
* @file Instruction.h
|
||||
* An EE instruction, represented as an operation, plus a list of source/destination atoms.
|
||||
* Can print itself (within the context of a LinkedObjectFile).
|
||||
*/
|
||||
|
||||
#ifndef NEXT_INSTRUCTION_H
|
||||
#define NEXT_INSTRUCTION_H
|
||||
|
||||
#include "OpcodeInfo.h"
|
||||
#include "Register.h"
|
||||
|
||||
class LinkedObjectFile;
|
||||
|
||||
constexpr int MAX_INSTRUCTION_SOURCE = 3;
|
||||
constexpr int MAX_INTRUCTION_DEST = 1;
|
||||
|
||||
// An "atom", representing a single register, immediate, etc... for use in an Instruction.
|
||||
struct InstructionAtom {
|
||||
enum AtomKind {
|
||||
REGISTER, // An EE Register
|
||||
IMM, // An immediate value (stored as int32)
|
||||
IMM_SYM, // An immediate value (a symbolic link)
|
||||
LABEL, // A label in a LinkedObjectFile
|
||||
VU_ACC, // The VU0 Accumulator
|
||||
VU_Q, // The VU0 Q Register
|
||||
INVALID
|
||||
} kind = INVALID;
|
||||
|
||||
void set_reg(Register r);
|
||||
void set_imm(int32_t i);
|
||||
void set_label(int id);
|
||||
void set_vu_q();
|
||||
void set_vu_acc();
|
||||
void set_sym(std::string _sym);
|
||||
|
||||
Register get_reg() const;
|
||||
int32_t get_imm() const;
|
||||
int get_label() const;
|
||||
std::string get_sym() const;
|
||||
|
||||
std::string to_string(const LinkedObjectFile& file) const;
|
||||
|
||||
bool is_link_or_label() const;
|
||||
|
||||
private:
|
||||
int32_t imm;
|
||||
int label_id;
|
||||
Register reg;
|
||||
|
||||
std::string sym;
|
||||
};
|
||||
|
||||
// An "Instruction", consisting of a "kind" (the opcode), and the source/destination atoms it
|
||||
// operates on.
|
||||
class Instruction {
|
||||
public:
|
||||
InstructionKind kind = InstructionKind::UNKNOWN;
|
||||
|
||||
std::string to_string(const LinkedObjectFile& file) const;
|
||||
bool is_valid() const;
|
||||
|
||||
void add_src(InstructionAtom& a);
|
||||
void add_dst(InstructionAtom& a);
|
||||
|
||||
InstructionAtom& get_src(size_t idx);
|
||||
InstructionAtom& get_dst(size_t idx);
|
||||
const InstructionAtom& get_src(size_t idx) const;
|
||||
const InstructionAtom& get_dst(size_t idx) const;
|
||||
|
||||
// source and destination atoms
|
||||
uint8_t n_src = 0, n_dst = 0;
|
||||
InstructionAtom src[MAX_INSTRUCTION_SOURCE];
|
||||
InstructionAtom dst[MAX_INTRUCTION_DEST];
|
||||
|
||||
InstructionAtom& get_imm_src();
|
||||
int32_t get_imm_src_int();
|
||||
|
||||
const OpcodeInfo& get_info() const;
|
||||
|
||||
int get_label_target() const;
|
||||
|
||||
// extra fields for some COP2 instructions.
|
||||
uint8_t cop2_dest = 0xff; // 0xff indicates "don't print dest"
|
||||
uint8_t cop2_bc = 0xff; // 0xff indicates "don't print bc"
|
||||
uint8_t il = 0xff; // 0xff indicates "don't print il"
|
||||
};
|
||||
|
||||
#endif // NEXT_INSTRUCTION_H
|
1173
decompiler/Disasm/InstructionDecode.cpp
Normal file
1173
decompiler/Disasm/InstructionDecode.cpp
Normal file
File diff suppressed because it is too large
Load Diff
17
decompiler/Disasm/InstructionDecode.h
Normal file
17
decompiler/Disasm/InstructionDecode.h
Normal file
@ -0,0 +1,17 @@
|
||||
/*!
|
||||
* @file InstructionDecode.h
|
||||
* The Instruction Decoder - converts a LinkedWord into a Instruction.
|
||||
* This is the part of the disassembler that decodes MIPS instructions.
|
||||
*/
|
||||
|
||||
#ifndef NEXT_INSTRUCTIONDECODE_H
|
||||
#define NEXT_INSTRUCTIONDECODE_H
|
||||
|
||||
#include "Instruction.h"
|
||||
|
||||
class LinkedWord;
|
||||
class LinkedObjectFile;
|
||||
|
||||
Instruction decode_instruction(LinkedWord& word, LinkedObjectFile& file, int seg_id, int word_id);
|
||||
|
||||
#endif // NEXT_INSTRUCTIONDECODE_H
|
350
decompiler/Disasm/InstructionMatching.cpp
Normal file
350
decompiler/Disasm/InstructionMatching.cpp
Normal file
@ -0,0 +1,350 @@
|
||||
/*!
|
||||
* @file InstructionMatching.cpp
|
||||
* Utilities for checking if an instruction matches some criteria.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include "InstructionMatching.h"
|
||||
|
||||
/*!
|
||||
* Check if the given instruction stores a GPR with the specified parameters.
|
||||
*/
|
||||
bool is_no_link_gpr_store(const Instruction& instr,
|
||||
MatchParam<int> size,
|
||||
MatchParam<Register> src,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> dest) {
|
||||
// match the opcode
|
||||
if (!size.is_wildcard) {
|
||||
switch (size.value) {
|
||||
case 1:
|
||||
if (instr.kind != InstructionKind::SB) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (instr.kind != InstructionKind::SH) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (instr.kind != InstructionKind::SW) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (instr.kind != InstructionKind::SD) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
if (instr.kind != InstructionKind::SQ) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
} else {
|
||||
// just make sure it's a gpr store
|
||||
if (!is_gpr_store(instr)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
assert(instr.n_src == 3);
|
||||
|
||||
// match other arguments
|
||||
return src == instr.src[0].get_reg() && offset == instr.src[1].get_imm() &&
|
||||
dest == instr.src[2].get_reg();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Check if the given instruction loads a GPR with the specified parameters.
|
||||
* LD and LQ count as signed, unsigned, and "wildcard signed" loads.
|
||||
* LWL/LWR/LDL/LDR will never match.
|
||||
*
|
||||
* "no ll" means no link or label
|
||||
*/
|
||||
bool is_no_ll_gpr_load(const Instruction& instr,
|
||||
MatchParam<int> size,
|
||||
MatchParam<bool> is_signed,
|
||||
MatchParam<Register> dst_reg,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> mem_reg) {
|
||||
// match the opcode
|
||||
if (!size.is_wildcard) {
|
||||
if (is_signed.is_wildcard) {
|
||||
switch (size.value) {
|
||||
case 1:
|
||||
if (instr.kind != InstructionKind::LB && instr.kind != InstructionKind::LBU) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (instr.kind != InstructionKind::LH && instr.kind != InstructionKind::LHU) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (instr.kind != InstructionKind::LW && instr.kind != InstructionKind::LWU) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (instr.kind != InstructionKind::LD) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
if (instr.kind != InstructionKind::LQ) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
} else {
|
||||
if (is_signed.value) {
|
||||
switch (size.value) {
|
||||
case 1:
|
||||
if (instr.kind != InstructionKind::LB) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (instr.kind != InstructionKind::LH) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (instr.kind != InstructionKind::LW) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (instr.kind != InstructionKind::LD) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
if (instr.kind != InstructionKind::LQ) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
} else {
|
||||
switch (size.value) {
|
||||
case 1:
|
||||
if (instr.kind != InstructionKind::LBU) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (instr.kind != InstructionKind::LHU) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (instr.kind != InstructionKind::LWU) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (instr.kind != InstructionKind::LD) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
if (instr.kind != InstructionKind::LQ) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// just make sure it's a gpr store
|
||||
if (!is_gpr_load(instr, is_signed)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// match other arguments
|
||||
return dst_reg == instr.get_dst(0).get_reg() && offset == instr.get_src(0).get_imm() &&
|
||||
mem_reg == instr.get_src(1).get_reg();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Check if the instruction stores an FPR (SWC1)
|
||||
* "no ll" means that there is no label or linking involved.
|
||||
*/
|
||||
bool is_no_ll_fpr_store(const Instruction& instr,
|
||||
MatchParam<Register> src,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> dest) {
|
||||
return instr.kind == InstructionKind::SWC1 && src == instr.src[0].get_reg() &&
|
||||
offset == instr.src[1].get_imm() && dest == instr.src[2].get_reg();
|
||||
}
|
||||
/*!
|
||||
* Check if the instruction loads an FPR (LWC1)
|
||||
* "no ll" means that there is no label or linking involved.
|
||||
*/
|
||||
bool is_no_ll_fpr_load(const Instruction& instr,
|
||||
MatchParam<Register> dst_reg,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> mem_reg) {
|
||||
return instr.kind == InstructionKind::LWC1 && dst_reg == instr.get_dst(0).get_reg() &&
|
||||
offset == instr.get_src(0).get_imm() && mem_reg == instr.get_src(1).get_reg();
|
||||
}
|
||||
|
||||
namespace {
|
||||
auto gpr_stores = {InstructionKind::SB, InstructionKind::SH, InstructionKind::SW,
|
||||
InstructionKind::SD, InstructionKind::SQ};
|
||||
auto gpr_signed_loads = {InstructionKind::LB, InstructionKind::LH, InstructionKind::LW,
|
||||
InstructionKind::LD, InstructionKind::LQ};
|
||||
auto gpr_unsigned_loads = {InstructionKind::LBU, InstructionKind::LHU, InstructionKind::LWU,
|
||||
InstructionKind::LD, InstructionKind::LQ};
|
||||
auto gpr_all_loads = {InstructionKind::LBU, InstructionKind::LB, InstructionKind::LH,
|
||||
InstructionKind::LHU, InstructionKind::LW, InstructionKind::LWU,
|
||||
InstructionKind::SD, InstructionKind::SQ};
|
||||
} // namespace
|
||||
|
||||
/*!
|
||||
* Is this a GPR store instruction? sb,sh,sw,sd,sq
|
||||
*/
|
||||
bool is_gpr_store(const Instruction& instr) {
|
||||
for (auto x : gpr_stores) {
|
||||
if (instr.kind == x) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Is this a GPR load instruction?
|
||||
* Only LB/LBU,LH/LHU,LW/LWU,LD,LQ are treated as loads
|
||||
* The LD, LQ opcodes are both signed, unsigned, and "wildcard signed"
|
||||
*/
|
||||
bool is_gpr_load(const Instruction& instr, MatchParam<bool> is_signed) {
|
||||
if (is_signed.is_wildcard) {
|
||||
for (auto x : gpr_all_loads) {
|
||||
if (instr.kind == x) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} else if (is_signed.value) {
|
||||
for (auto x : gpr_signed_loads) {
|
||||
if (instr.kind == x) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
for (auto x : gpr_unsigned_loads) {
|
||||
if (instr.kind == x) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Given a store, get the offset as an integer.
|
||||
*/
|
||||
int32_t get_gpr_store_offset_as_int(const Instruction& instr) {
|
||||
assert(is_gpr_store(instr));
|
||||
assert(instr.n_src == 3);
|
||||
return instr.src[1].get_imm();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Match an instruction in the form OP, dst, src0, src1 where all args are registers.
|
||||
*/
|
||||
bool is_gpr_3(const Instruction& instr,
|
||||
MatchParam<InstructionKind> kind,
|
||||
MatchParam<Register> dst,
|
||||
MatchParam<Register> src0,
|
||||
MatchParam<Register> src1) {
|
||||
return kind == instr.kind && dst == instr.get_dst(0).get_reg() &&
|
||||
src0 == instr.get_src(0).get_reg() && src1 == instr.get_src(1).get_reg();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Match an instruction in the form OP, dst, src0, src1 where all args are registers, except for
|
||||
* src1, which is an integer.
|
||||
*/
|
||||
bool is_gpr_2_imm_int(const Instruction& instr,
|
||||
MatchParam<InstructionKind> kind,
|
||||
MatchParam<Register> dst,
|
||||
MatchParam<Register> src,
|
||||
MatchParam<int32_t> imm) {
|
||||
return kind == instr.kind && dst == instr.get_dst(0).get_reg() &&
|
||||
src == instr.get_src(0).get_reg() && imm == instr.get_src(1).get_imm();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Create a Register for a GPR.
|
||||
*/
|
||||
Register make_gpr(Reg::Gpr gpr) {
|
||||
return Register(Reg::GPR, gpr);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Create a Register for an FPR.
|
||||
*/
|
||||
Register make_fpr(int fpr) {
|
||||
return Register(Reg::FPR, fpr);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Is this a "nop"? More specifically, it checks for sll r0, r0, 0, the recommended MIPS nop.
|
||||
*/
|
||||
bool is_nop(const Instruction& instr) {
|
||||
return is_gpr_2_imm_int(instr, InstructionKind::SLL, make_gpr(Reg::R0), make_gpr(Reg::R0), 0);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Is this jr ra?
|
||||
*/
|
||||
bool is_jr_ra(const Instruction& instr) {
|
||||
return instr.kind == InstructionKind::JR && instr.get_src(0).get_reg() == make_gpr(Reg::RA);
|
||||
}
|
||||
|
||||
bool is_branch(const Instruction& instr, MatchParam<bool> likely) {
|
||||
const auto& info = instr.get_info();
|
||||
if (likely.is_wildcard) {
|
||||
return info.is_branch || info.is_branch_likely;
|
||||
} else if (likely.value) {
|
||||
return info.is_branch_likely;
|
||||
} else {
|
||||
return info.is_branch && !info.is_branch_likely;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_always_branch(const Instruction& instr) {
|
||||
if (!is_branch(instr, {})) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto r0 = make_gpr(Reg::R0);
|
||||
if (instr.kind == InstructionKind::BEQ && instr.get_src(0).get_reg() == r0 &&
|
||||
instr.get_src(1).get_reg() == r0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (instr.kind == InstructionKind::BEQL && instr.get_src(0).get_reg() == r0 &&
|
||||
instr.get_src(1).get_reg() == r0) {
|
||||
assert(false);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
69
decompiler/Disasm/InstructionMatching.h
Normal file
69
decompiler/Disasm/InstructionMatching.h
Normal file
@ -0,0 +1,69 @@
|
||||
#ifndef JAK_DISASSEMBLER_INSTRUCTIONMATCHING_H
|
||||
#define JAK_DISASSEMBLER_INSTRUCTIONMATCHING_H
|
||||
|
||||
#include "Instruction.h"
|
||||
|
||||
template <typename T>
|
||||
struct MatchParam {
|
||||
MatchParam() { is_wildcard = true; }
|
||||
|
||||
// intentionally not explicit so you don't have to put MatchParam<whatever>(blah) everywhere
|
||||
MatchParam(T x) {
|
||||
value = x;
|
||||
is_wildcard = false;
|
||||
}
|
||||
|
||||
T value;
|
||||
bool is_wildcard = true;
|
||||
|
||||
bool operator==(const T& other) { return is_wildcard || (value == other); }
|
||||
bool operator!=(const T& other) { return !(*this == other); }
|
||||
};
|
||||
|
||||
bool is_no_link_gpr_store(const Instruction& instr,
|
||||
MatchParam<int> size,
|
||||
MatchParam<Register> src,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> dest);
|
||||
bool is_no_ll_gpr_load(const Instruction& instr,
|
||||
MatchParam<int> size,
|
||||
MatchParam<bool> is_signed,
|
||||
MatchParam<Register> dst_reg,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> mem_reg);
|
||||
|
||||
bool is_no_ll_fpr_store(const Instruction& instr,
|
||||
MatchParam<Register> src,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> dest);
|
||||
bool is_no_ll_fpr_load(const Instruction& instr,
|
||||
MatchParam<Register> dst_reg,
|
||||
MatchParam<int> offset,
|
||||
MatchParam<Register> mem_reg);
|
||||
|
||||
bool is_gpr_store(const Instruction& instr);
|
||||
bool is_gpr_load(const Instruction& instr, MatchParam<bool> is_signed);
|
||||
int32_t get_gpr_store_offset_as_int(const Instruction& instr);
|
||||
|
||||
bool is_gpr_3(const Instruction& instr,
|
||||
MatchParam<InstructionKind> kind,
|
||||
MatchParam<Register> dst,
|
||||
MatchParam<Register> src0,
|
||||
MatchParam<Register> src1);
|
||||
|
||||
bool is_gpr_2_imm_int(const Instruction& instr,
|
||||
MatchParam<InstructionKind> kind,
|
||||
MatchParam<Register> dst,
|
||||
MatchParam<Register> src,
|
||||
MatchParam<int32_t> imm);
|
||||
|
||||
bool is_nop(const Instruction& instr);
|
||||
bool is_jr_ra(const Instruction& instr);
|
||||
|
||||
Register make_gpr(Reg::Gpr gpr);
|
||||
Register make_fpr(int fpr);
|
||||
|
||||
bool is_branch(const Instruction& instr, MatchParam<bool> likely);
|
||||
bool is_always_branch(const Instruction& instr);
|
||||
|
||||
#endif // JAK_DISASSEMBLER_INSTRUCTIONMATCHING_H
|
499
decompiler/Disasm/OpcodeInfo.cpp
Normal file
499
decompiler/Disasm/OpcodeInfo.cpp
Normal file
@ -0,0 +1,499 @@
|
||||
#include "OpcodeInfo.h"
|
||||
#include <cassert>
|
||||
|
||||
OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX];
|
||||
|
||||
typedef InstructionKind IK;
|
||||
typedef FieldType FT;
|
||||
typedef DecodeType DT;
|
||||
|
||||
static OpcodeInfo& def(IK k, const char* name) {
|
||||
gOpcodeInfo[(uint32_t)k].defined = true;
|
||||
gOpcodeInfo[(uint32_t)k].name = name;
|
||||
return gOpcodeInfo[(uint32_t)k];
|
||||
}
|
||||
|
||||
static OpcodeInfo& def_branch(IK k, const char* name) {
|
||||
auto& result = def(k, name);
|
||||
result.is_branch = true;
|
||||
result.has_delay_slot = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
static OpcodeInfo& def_branch_likely(IK k, const char* name) {
|
||||
auto& result = def(k, name);
|
||||
result.is_branch = true;
|
||||
result.is_branch_likely = true;
|
||||
result.has_delay_slot = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
static OpcodeInfo& def_store(IK k, const char* name) {
|
||||
auto& result = def(k, name);
|
||||
result.is_store = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
static OpcodeInfo& def_load(IK k, const char* name) {
|
||||
auto& result = def(k, name);
|
||||
result.is_load = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
static OpcodeInfo& drt_srs_ssimm16(OpcodeInfo& info) {
|
||||
return info.dst_gpr(FT::RT).src_gpr(FT::RS).src(FT::SIMM16, DT::IMM);
|
||||
}
|
||||
|
||||
static OpcodeInfo& srt_ssimm16_srs(OpcodeInfo& info) {
|
||||
return info.src_gpr(FT::RT).src(FT::SIMM16, DT::IMM).src_gpr(FT::RS);
|
||||
}
|
||||
|
||||
static OpcodeInfo& drt_ssimm16_srs(OpcodeInfo& info) {
|
||||
return info.dst_gpr(FT::RT).src(FT::SIMM16, DT::IMM).src_gpr(FT::RS);
|
||||
}
|
||||
|
||||
static OpcodeInfo& drd_srs_srt(OpcodeInfo& info) {
|
||||
return info.dst_gpr(FT::RD).src_gpr(FT::RS).src_gpr(FT::RT);
|
||||
}
|
||||
|
||||
static OpcodeInfo& drd_srt_srs(OpcodeInfo& info) {
|
||||
return info.dst_gpr(FT::RD).src_gpr(FT::RT).src_gpr(FT::RS);
|
||||
}
|
||||
|
||||
static OpcodeInfo& drd_srt_ssa(OpcodeInfo& info) {
|
||||
return info.dst_gpr(FT::RD).src_gpr(FT::RT).src(FT::SA, DT::IMM);
|
||||
}
|
||||
|
||||
static OpcodeInfo& srs_srt_bt(OpcodeInfo& info) {
|
||||
return info.src_gpr(FT::RS).src_gpr(FT::RT).src(FT::SIMM16, DT::BRANCH_TARGET);
|
||||
}
|
||||
|
||||
static OpcodeInfo& srs_bt(OpcodeInfo& info) {
|
||||
return info.src_gpr(FT::RS).src(FT::SIMM16, DT::BRANCH_TARGET);
|
||||
}
|
||||
|
||||
static OpcodeInfo& bt(OpcodeInfo& info) {
|
||||
return info.src(FT::SIMM16, DT::BRANCH_TARGET);
|
||||
}
|
||||
|
||||
static OpcodeInfo& dfd_sfs_sft(OpcodeInfo& info) {
|
||||
return info.dst_fpr(FT::FD).src_fpr(FT::FS).src_fpr(FT::FT);
|
||||
}
|
||||
|
||||
static OpcodeInfo& sfs_sft(OpcodeInfo& info) {
|
||||
return info.src_fpr(FT::FS).src_fpr(FT::FT);
|
||||
}
|
||||
|
||||
static OpcodeInfo& dfd_sfs(OpcodeInfo& info) {
|
||||
return info.dst_fpr(FT::FD).src_fpr(FT::FS);
|
||||
}
|
||||
|
||||
static OpcodeInfo& drd(OpcodeInfo& info) {
|
||||
return info.dst_gpr(FT::RD);
|
||||
}
|
||||
|
||||
static OpcodeInfo& cd_dvft_svfs(OpcodeInfo& info) {
|
||||
return info.src(FT::DEST, DT::DEST).dst_vf(FT::FT).src_vf(FT::FS);
|
||||
}
|
||||
|
||||
static OpcodeInfo& cd_dvfd_svfs_svft(OpcodeInfo& info) {
|
||||
return info.src(FT::DEST, DT::DEST).dst_vf(FT::FD).src_vf(FT::FS).src_vf(FT::FT);
|
||||
}
|
||||
|
||||
static OpcodeInfo& cb_cd_dvfd_svfs_svft(OpcodeInfo& info) {
|
||||
return info.src(FT::BC, DT::BC)
|
||||
.src(FT::DEST, DT::DEST)
|
||||
.dst_vf(FT::FD)
|
||||
.src_vf(FT::FS)
|
||||
.src_vf(FT::FT);
|
||||
}
|
||||
|
||||
static OpcodeInfo& cb_cd_dacc_svfs_svft(OpcodeInfo& info) {
|
||||
return info.src(FT::BC, DT::BC)
|
||||
.src(FT::DEST, DT::DEST)
|
||||
.dst(FT::ZERO, DT::VU_ACC)
|
||||
.src_vf(FT::FS)
|
||||
.src_vf(FT::FT);
|
||||
}
|
||||
|
||||
static OpcodeInfo& cd_dvfd_svfs_sq(OpcodeInfo& info) {
|
||||
return info.src(FT::DEST, DT::DEST).dst_vf(FT::FD).src_vf(FT::FS).src(FT::ZERO, DT::VU_Q);
|
||||
}
|
||||
|
||||
static OpcodeInfo& cd_dacc_svfs_svft(OpcodeInfo& info) {
|
||||
return info.src(FT::DEST, DT::DEST).dst(FT::ZERO, DT::VU_ACC).src_vf(FT::FS).src_vf(FT::FT);
|
||||
}
|
||||
|
||||
void init_opcode_info() {
|
||||
gOpcodeInfo[0].name = ";; ??????";
|
||||
|
||||
// RT, RS, SIMM
|
||||
drt_srs_ssimm16(def(IK::DADDIU, "daddiu")); // Doubleword Add Immediate Unsigned
|
||||
drt_srs_ssimm16(def(IK::ADDIU, "addiu")); // Add Immediate Unsigned Word
|
||||
drt_srs_ssimm16(def(IK::SLTI, "slti")); // Set on Less Than Immediate
|
||||
drt_srs_ssimm16(def(IK::SLTIU, "sltiu")); // Set on Less Than Immediate Unsigned
|
||||
|
||||
// stores in srt_ssimm16_srs
|
||||
srt_ssimm16_srs(def_store(IK::SB, "sb")); // Store Byte
|
||||
srt_ssimm16_srs(def_store(IK::SH, "sh")); // Store Halfword
|
||||
srt_ssimm16_srs(def_store(IK::SW, "sw")); // Store Word
|
||||
srt_ssimm16_srs(def_store(IK::SD, "sd")); // Store Doubleword
|
||||
srt_ssimm16_srs(def_store(IK::SQ, "sq")); // Store Quadword
|
||||
|
||||
// loads in dsrt_ssimm16_srs
|
||||
drt_ssimm16_srs(def_load(IK::LB, "lb")); // Load Byte
|
||||
drt_ssimm16_srs(def_load(IK::LBU, "lbu")); // Load Byte Unsigned
|
||||
drt_ssimm16_srs(def_load(IK::LH, "lh")); // Load Halfword
|
||||
drt_ssimm16_srs(def_load(IK::LHU, "lhu")); // Load Halfword Unsigned
|
||||
drt_ssimm16_srs(def_load(IK::LW, "lw")); // Load Word
|
||||
drt_ssimm16_srs(def_load(IK::LWU, "lwu")); // Load Word Unsigned
|
||||
drt_ssimm16_srs(def_load(IK::LD, "ld")); // Load Doubleword
|
||||
drt_ssimm16_srs(def_load(IK::LQ, "lq")); // Load Quadword
|
||||
drt_ssimm16_srs(def_load(IK::LDR, "ldr")); // Load Doubleword Left
|
||||
drt_ssimm16_srs(def_load(IK::LDL, "ldl")); // Load Doubleword Right
|
||||
drt_ssimm16_srs(def_load(IK::LWL, "lwl")); // Load Word Left
|
||||
drt_ssimm16_srs(def_load(IK::LWR, "lwr")); // Load Word Right
|
||||
|
||||
// drd_srs_srt
|
||||
drd_srs_srt(def(IK::DADDU, "daddu")); // Doubleword Add Unsigned
|
||||
drd_srs_srt(def(IK::SUBU, "subu")); // Subtract Unsigned Word
|
||||
drd_srs_srt(def(IK::ADDU, "addu")); // Add Unsigned Word
|
||||
drd_srs_srt(def(IK::DSUBU, "dsubu")); // Doubleword Subtract Unsigned
|
||||
drd_srs_srt(def(IK::MULT3, "mult3")); // Multiply Word
|
||||
drd_srs_srt(def(IK::MULTU3, "multu3")); // Multiply Unsigned Word
|
||||
drd_srs_srt(def(IK::AND, "and")); // And
|
||||
drd_srs_srt(def(IK::OR, "or")); // Or
|
||||
drd_srs_srt(def(IK::NOR, "nor")); // Not Or
|
||||
drd_srs_srt(def(IK::XOR, "xor")); // Exclusive Or
|
||||
drd_srs_srt(def(IK::MOVN, "movn")); // Move Conditional on Not Zero
|
||||
drd_srs_srt(def(IK::MOVZ, "movz")); // Move Conditional on Zero
|
||||
drd_srs_srt(def(IK::SLT, "slt")); // Set on Less Than
|
||||
drd_srs_srt(def(IK::SLTU, "sltu")); // Set on Less Than Unsigned
|
||||
|
||||
// fixed shifts
|
||||
drd_srt_ssa(def(IK::SLL, "sll")); // Shift Left Logical
|
||||
drd_srt_ssa(def(IK::SRA, "sra")); // Shift Right Arithmetic
|
||||
drd_srt_ssa(def(IK::SRL, "srl")); // Shift Right Logical
|
||||
drd_srt_ssa(def(IK::DSLL, "dsll")); // Doubleword Shift Left Logical
|
||||
drd_srt_ssa(def(IK::DSLL32, "dsll32")); // Doubleword Shift Left Logical Plus 32
|
||||
drd_srt_ssa(def(IK::DSRA, "dsra")); // Doubleword Shift Right Arithmetic
|
||||
drd_srt_ssa(def(IK::DSRA32, "dsra32")); // Doubleword Shift Right Arithmetic Plus 32
|
||||
drd_srt_ssa(def(IK::DSRL, "dsrl")); // Doubleword Shift Right Logical
|
||||
drd_srt_ssa(def(IK::DSRL32, "dsrl32")); // Doubleword Shift Right Logical Plus 32
|
||||
|
||||
// variable shifts
|
||||
drd_srt_srs(def(IK::DSRAV, "dsrav")); // Doubleword Shift Right Arithmetic Variable
|
||||
drd_srt_srs(def(IK::SLLV, "sllv")); // Shift Word Left Logical Variable
|
||||
drd_srt_srs(def(IK::DSLLV, "dsllv")); // Doubleword Shift Left Logical Variable
|
||||
drd_srt_srs(def(IK::DSRLV, "dsrlv")); // Doubleword Shift Right Logical Variable
|
||||
|
||||
// branch (two registers)
|
||||
srs_srt_bt(def_branch(IK::BEQ, "beq")); // Branch on Equal
|
||||
srs_srt_bt(def_branch(IK::BNE, "bne")); // Branch on Not Equal
|
||||
srs_srt_bt(def_branch_likely(IK::BEQL, "beql")); // Branch on Equal Likely
|
||||
srs_srt_bt(def_branch_likely(IK::BNEL, "bnel")); // Branch on Not Equal Likely
|
||||
|
||||
// branch (one register)
|
||||
srs_bt(def_branch(IK::BLTZ, "bltz")); // Branch on Less Than Zero
|
||||
srs_bt(def_branch(IK::BGEZ, "bgez")); // Branch on Greater Than or Equal to Zero
|
||||
srs_bt(def_branch(IK::BLEZ, "blez")); // Branch on Less Than or Equal to Zero
|
||||
srs_bt(def_branch(IK::BGTZ, "bgtz")); // Branch on Greater Than Zero
|
||||
srs_bt(def_branch(IK::BGEZAL, "bgezal")); // Branch on Greater Than or Equal to Zero and Link
|
||||
srs_bt(def_branch_likely(IK::BLTZL, "bltzl")); // Branch on Less Than Zero Likely
|
||||
srs_bt(def_branch_likely(IK::BGTZL, "bgtzl")); // Branch on Greater Than Zero Likely
|
||||
srs_bt(def_branch_likely(IK::BGEZL, "bgezl")); // Branch on Greater Than or Equal to Zero Likely
|
||||
|
||||
// weird ones
|
||||
def(IK::DIV, "div").src_gpr(FT::RS).src_gpr(FT::RT); // Divide Word
|
||||
def(IK::DIVU, "divu").src_gpr(FT::RS).src_gpr(FT::RT); // Divide Unsigned Word
|
||||
|
||||
def(IK::ORI, "ori").dst_gpr(FT::RT).src_gpr(FT::RS).src(FT::ZIMM16, DT::IMM); // Or Immediate
|
||||
def(IK::XORI, "xori")
|
||||
.dst_gpr(FT::RT)
|
||||
.src_gpr(FT::RS)
|
||||
.src(FT::ZIMM16, DT::IMM); // Exclusive Or Immediate
|
||||
def(IK::ANDI, "andi").dst_gpr(FT::RT).src_gpr(FT::RS).src(FT::ZIMM16, DT::IMM); // And Immediate
|
||||
|
||||
def(IK::LUI, "lui").dst_gpr(FT::RT).src(FT::SIMM16, DT::IMM); // Load Upper Immediate
|
||||
def(IK::JALR, "jalr").dst_gpr(FT::RD).src_gpr(FT::RS).has_delay_slot =
|
||||
true; // Jump and Link Register
|
||||
def(IK::JR, "jr").src_gpr(FT::RS).has_delay_slot = true; // Jump Register
|
||||
|
||||
def_load(IK::LWC1, "lwc1")
|
||||
.dst_fpr(FT::FT)
|
||||
.src(FT::SIMM16, DT::IMM)
|
||||
.src_gpr(FT::RS); // Load Word to Floating Point
|
||||
def_store(IK::SWC1, "swc1")
|
||||
.src_fpr(FT::FT)
|
||||
.src(FT::SIMM16, DT::IMM)
|
||||
.src_gpr(FT::RS); // Store Word from Floating Point
|
||||
|
||||
// weird moves
|
||||
def(IK::MFC1, "mfc1").dst_gpr(FT::RT).src_fpr(FT::FS); // Move Word from Floating Point
|
||||
def(IK::MTC1, "mtc1").src_gpr(FT::RT).dst_fpr(FT::FS); // Move Word to Floating Point
|
||||
def(IK::MTC0, "mtc0")
|
||||
.src_gpr(FT::RT)
|
||||
.dst(FT::RD, DT::COP0); // Move to System Control Coprocessor
|
||||
def(IK::MFC0, "mfc0")
|
||||
.dst_gpr(FT::RT)
|
||||
.src(FT::RD, DT::COP0); // Move from System Control Coprocessor
|
||||
def(IK::MTDAB, "mtdab").src_gpr(FT::RT); // Move to Data Address Breakpoint Register
|
||||
def(IK::MTDABM, "mtdabm").src_gpr(FT::RT); // Move to Data Address Breakpoint Mask Register
|
||||
drd(def(IK::MFHI, "mfhi")); // Move from HI Register
|
||||
drd(def(IK::MFLO, "mflo")); // Move from LO Register
|
||||
def(IK::MTLO1, "mtlo1").src_gpr(FT::RS); // Move to LO1 Register
|
||||
drd(def(IK::MFLO1, "mflo1")); // Move from LO1 Register
|
||||
drd(def(IK::PMFHL_UW, "pmfhl.uw")); // Parallel Move From HI/LO Register
|
||||
drd(def(IK::PMFHL_LW, "pmfhl.lw"));
|
||||
drd(def(IK::PMFHL_LH, "pmfhl.lh"));
|
||||
def(IK::MFPC, "mfpc").dst_gpr(FT::RT).src(FT::PCR, DT::PCR); // Move from Performance Counter
|
||||
def(IK::MTPC, "mtpc").src_gpr(FT::RT).dst(FT::PCR, DT::PCR); // Move to Performance Counter
|
||||
|
||||
// other weirds
|
||||
def(IK::SYSCALL, "syscall").src(FT::SYSCALL, DT::IMM); // System Call
|
||||
def(IK::CACHE_DXWBIN, "cache dxwbin")
|
||||
.src_gpr(FT::RS)
|
||||
.src(FT::SIMM16, DT::IMM); // Cache Operation (Index Writeback Invalidate)
|
||||
def(IK::PREF, "pref").src_gpr(FT::RT).src(FT::SIMM16, DT::IMM).src_gpr(FT::RS); // Prefetch
|
||||
|
||||
// plains
|
||||
def(IK::SYNCP, "sync.p"); // Synchronize Shared Memory (Pipeline)
|
||||
def(IK::SYNCL, "sync.l"); // Synchronize Shared Memory (Load)
|
||||
def(IK::ERET, "eret"); // Exception Return
|
||||
def(IK::EI, "ei"); // Enable Interrupt
|
||||
|
||||
drd_srs_srt(def(IK::PPACB, "ppacb")); // Parallel Pack to Byte
|
||||
drd_srs_srt(def(IK::PPACH, "ppach")); // Parallel Pack to Halfword
|
||||
drd_srs_srt(def(IK::PPACW, "ppacw")); // Parallel Pack to Word
|
||||
drd_srs_srt(def(IK::PADDH, "paddh")); // Parallel Add Halfword
|
||||
drd_srs_srt(def(IK::PADDW, "paddw")); // Parallel Add Word
|
||||
drd_srs_srt(def(IK::PSUBW, "psubw")); // Parallel Subtract Word
|
||||
drd_srs_srt(def(IK::PMINH, "pminh")); // Parallel Minimize Halfword
|
||||
drd_srs_srt(def(IK::PMINW, "pminw")); // Parallel Minimize Word
|
||||
drd_srs_srt(def(IK::PMAXH, "pmaxh")); // Parallel Maximize Halfword
|
||||
drd_srs_srt(def(IK::PMAXW, "pmaxw")); // Parallel Maximize Word
|
||||
drd_srs_srt(def(IK::PEXTLB, "pextlb")); // Parallel Extend Lower from Byte
|
||||
drd_srs_srt(def(IK::PEXTLH, "pextlh")); // Parallel Extend Lower from Halfword
|
||||
drd_srs_srt(def(IK::PEXTLW, "pextlw")); // Parallel Extend Lower from Word
|
||||
drd_srs_srt(def(IK::PCGTW, "pcgtw")); // Parallel Compare for Greater Than Word
|
||||
drd_srs_srt(def(IK::PCEQB, "pceqb")); // Parallel Compare for Equal Byte
|
||||
drd_srs_srt(def(IK::PCEQW, "pceqw")); // Parallel Compare for Equal Word
|
||||
drd_srs_srt(def(IK::PEXTUB, "pextub")); // Parallel Extend Upper from Byte
|
||||
drd_srs_srt(def(IK::PEXTUH, "pextuh")); // Parallel Extend Upper from Halfword
|
||||
drd_srs_srt(def(IK::PEXTUW, "pextuw")); // Parallel Extend Upper from Word
|
||||
drd_srs_srt(def(IK::PCPYUD, "pcpyud")); // Parallel Copy Upper Doubleword
|
||||
drd_srs_srt(def(IK::PCPYLD, "pcpyld")); // Parallel Copy Lower Doubleword
|
||||
drd_srs_srt(def(IK::PMADDH, "pmaddh")); // Parallel Multiply-Add Halfword
|
||||
drd_srs_srt(def(IK::PMULTH, "pmulth")); // Parallel Multiply Halfword
|
||||
drd_srs_srt(def(IK::PEXEW, "pexew")); // Parallel Exchange Even Word
|
||||
drd_srs_srt(def(IK::PINTEH, "pinteh")); // Parallel Interleave Even Halfword
|
||||
drd_srs_srt(def(IK::PAND, "pand")); // Parallel And
|
||||
drd_srs_srt(def(IK::POR, "por")); // Parallel Or
|
||||
drd_srs_srt(def(IK::PNOR, "pnor")); // Parallel Not Or
|
||||
|
||||
drd_srt_ssa(def(IK::PSLLW, "psllw")); // Parallel Shift Left Logical Word
|
||||
drd_srt_ssa(def(IK::PSLLH, "psllh")); // Parallel Shift Left Logical Halfword
|
||||
drd_srt_ssa(def(IK::PSRAW, "psraw")); // Parallel Shift Right Arithmetic Word
|
||||
drd_srt_ssa(def(IK::PSRAH, "psrah")); // Parallel Shift Right Arithmetic Halfword
|
||||
drd_srt_ssa(def(IK::PSRLH, "psrlh")); // Parallel Shift Right Logical Halfword
|
||||
|
||||
def(IK::PLZCW, "plzcw").dst_gpr(FT::RD).src_gpr(FT::RS); // Parallel Leading Zero Count Word
|
||||
def(IK::PABSW, "pabsw").dst_gpr(FT::RD).src_gpr(FT::RT); // Parallel Absolute Word
|
||||
def(IK::PROT3W, "prot3w").dst_gpr(FT::RD).src_gpr(FT::RT); // Parallel Rotate 3 Word
|
||||
def(IK::PCPYH, "pcpyh").dst_gpr(FT::RD).src_gpr(FT::RT); // Parallel Copy Halfword
|
||||
|
||||
// COP1
|
||||
|
||||
// branch (no registers)
|
||||
bt(def_branch(IK::BC1F, "bc1f")); // Branch on FP False
|
||||
bt(def_branch(IK::BC1T, "bc1t")); // Branch on FP True
|
||||
bt(def_branch_likely(IK::BC1FL, "bc1fl")); // Branch on FP False Likely
|
||||
bt(def_branch_likely(IK::BC1TL, "bc1tl")); // Branch on FP True Likely
|
||||
|
||||
dfd_sfs_sft(def(IK::ADDS, "add.s")); // Floating Point Add
|
||||
dfd_sfs_sft(def(IK::SUBS, "sub.s")); // Floating Point Subtract
|
||||
dfd_sfs_sft(def(IK::MULS, "mul.s")); // Floating Point Multiply
|
||||
dfd_sfs_sft(def(IK::DIVS, "div.s")); // Floating Point Divide
|
||||
dfd_sfs_sft(def(IK::MINS, "min.s")); // Floating Point Minimum
|
||||
dfd_sfs_sft(def(IK::MAXS, "max.s")); // Floating Point Maximum
|
||||
dfd_sfs_sft(def(IK::MADDS, "madd.s")); // Floating Point Multiply-Add
|
||||
dfd_sfs_sft(def(IK::MSUBS, "msub.s")); // Floating Point Multiply and Subtract
|
||||
dfd_sfs_sft(def(IK::RSQRTS, "rsqrt.s")); // Floating Point Reciporcal Square Root
|
||||
|
||||
dfd_sfs(def(IK::ABSS, "abs.s")); // Floating Point Absolute Value
|
||||
dfd_sfs(def(IK::NEGS, "neg.s")); // Floating Point Negate
|
||||
dfd_sfs(def(IK::CVTSW, "cvt.s.w")); // Fixed-point Convert to Single Floating Point
|
||||
dfd_sfs(def(IK::CVTWS, "cvt.w.s")); // Floating Point Convert to Word Fixed-point
|
||||
dfd_sfs(def(IK::MOVS, "mov.s")); // Floating Point Move
|
||||
dfd_sfs(def(IK::SQRTS, "sqrt.s")); // Floating Point Square Root
|
||||
|
||||
sfs_sft(def(IK::CLTS, "c.lt.s")); // Floating Point Compare
|
||||
sfs_sft(def(IK::CLES, "c.le.s")); // Floating Point Compare
|
||||
sfs_sft(def(IK::CEQS, "c.eq.s")); // Floating Point Compare
|
||||
sfs_sft(def(IK::MULAS, "mula.s")); // Floating Point Multiply to Accumulator
|
||||
sfs_sft(def(IK::MADDAS, "madda.s")); // Floating Point Multiply-Add to Accumulator
|
||||
sfs_sft(def(IK::ADDAS, "adda.s")); // Floating Point Add to Accumulator
|
||||
sfs_sft(def(IK::MSUBAS, "msuba.s")); // Floating Point Multiply and Subtract from Accumulator
|
||||
|
||||
// COP2 weirds
|
||||
def_store(IK::SQC2, "sqc2")
|
||||
.src(FT::FT, DT::VF)
|
||||
.src(FT::SIMM16, DT::IMM)
|
||||
.src_gpr(FT::RS); // Store Quadword from COP2
|
||||
def_load(IK::LQC2, "lqc2")
|
||||
.dst(FT::FT, DT::VF)
|
||||
.src(FT::SIMM16, DT::IMM)
|
||||
.src_gpr(FT::RS); // Load Quadword to COP2
|
||||
|
||||
// COP2
|
||||
cd_dvft_svfs(def(IK::VMOVE, "vmove")); // Transfer between Floating-Point Registers
|
||||
cd_dvft_svfs(def(IK::VFTOI0, "vftoi0")); // Conversion to Fixed Point
|
||||
cd_dvft_svfs(def(IK::VFTOI4, "vftoi4")); // Conversion to Fixed Point
|
||||
cd_dvft_svfs(def(IK::VFTOI12, "vftoi12")); // Conversion to Fixed Point
|
||||
cd_dvft_svfs(def(IK::VITOF0, "vitof0")); // Conversion to Floating Point Number
|
||||
cd_dvft_svfs(def(IK::VITOF12, "vitof12")); // Conversion to Floating Point Number
|
||||
cd_dvft_svfs(def(IK::VITOF15, "vitof15")); // Conversion to Floating Point Number
|
||||
cd_dvft_svfs(def(IK::VABS, "vabs")); // Absolute Value
|
||||
|
||||
cd_dvfd_svfs_svft(def(IK::VADD, "vadd"));
|
||||
cd_dvfd_svfs_svft(def(IK::VSUB, "vsub"));
|
||||
cd_dvfd_svfs_svft(def(IK::VMUL, "vmul"));
|
||||
cd_dvfd_svfs_svft(def(IK::VMINI, "vmini"));
|
||||
cd_dvfd_svfs_svft(def(IK::VMAX, "vmax"));
|
||||
cd_dvfd_svfs_svft(def(IK::VOPMSUB, "vopmsub"));
|
||||
cd_dvfd_svfs_svft(def(IK::VMADD, "vmadd"));
|
||||
cd_dvfd_svfs_svft(def(IK::VMSUB, "vmsub"));
|
||||
|
||||
cb_cd_dvfd_svfs_svft(def(IK::VSUB_BC, "vsub"));
|
||||
cb_cd_dvfd_svfs_svft(def(IK::VADD_BC, "vadd"));
|
||||
cb_cd_dvfd_svfs_svft(def(IK::VMADD_BC, "vmadd"));
|
||||
cb_cd_dvfd_svfs_svft(def(IK::VMSUB_BC, "vmsub"));
|
||||
cb_cd_dvfd_svfs_svft(def(IK::VMUL_BC, "vmul"));
|
||||
cb_cd_dvfd_svfs_svft(def(IK::VMINI_BC, "vmini"));
|
||||
cb_cd_dvfd_svfs_svft(def(IK::VMAX_BC, "vmax"));
|
||||
|
||||
cb_cd_dacc_svfs_svft(def(IK::VADDA_BC, "vadda"));
|
||||
cb_cd_dacc_svfs_svft(def(IK::VMADDA_BC, "vmadda"));
|
||||
cb_cd_dacc_svfs_svft(def(IK::VMULA_BC, "vmula"));
|
||||
cb_cd_dacc_svfs_svft(def(IK::VMSUBA_BC, "vmsuba"));
|
||||
|
||||
cd_dvfd_svfs_sq(def(IK::VADDQ, "vaddq"));
|
||||
cd_dvfd_svfs_sq(def(IK::VSUBQ, "vsubq"));
|
||||
cd_dvfd_svfs_sq(def(IK::VMULQ, "vmulq"));
|
||||
cd_dvfd_svfs_sq(def(IK::VMSUBQ, "vmsubq"));
|
||||
|
||||
cd_dacc_svfs_svft(def(IK::VMULA, "vmula"));
|
||||
cd_dacc_svfs_svft(def(IK::VADDA, "vadda"));
|
||||
cd_dacc_svfs_svft(def(IK::VMADDA, "vmadda"));
|
||||
|
||||
cd_dacc_svfs_svft(def(IK::VOPMULA, "vopmula"));
|
||||
|
||||
// weird
|
||||
def(IK::VDIV, "vdiv")
|
||||
.dst(FT::ZERO, DT::VU_Q)
|
||||
.src_vf(FT::FS)
|
||||
.src_vf(FT::FT)
|
||||
.src(FT::BC, DT::BC); // todo
|
||||
def(IK::VRSQRT, "vrsqrt")
|
||||
.dst(FT::ZERO, DT::VU_Q)
|
||||
.src_vf(FT::FS)
|
||||
.src_vf(FT::FT)
|
||||
.src(FT::BC, DT::BC); // todo
|
||||
def(IK::VCLIP, "vclip").src(FT::DEST, DT::DEST).src_vf(FT::FS).src_vf(FT::FT);
|
||||
def(IK::VMULAQ, "vmulaq")
|
||||
.src(FT::DEST, DT::DEST)
|
||||
.dst(FT::ZERO, DT::VU_ACC)
|
||||
.src_vf(FT::FS)
|
||||
.src(FT::ZERO, DT::VU_Q);
|
||||
|
||||
def(IK::VRGET, "vrget").src(FT::DEST, DT::DEST).dst_vf(FT::FT);
|
||||
|
||||
// integer
|
||||
def(IK::VMTIR, "vmtir").dst(FT::RT, DT::VI).src_vf(FT::FS).src(FT::BC, DT::BC);
|
||||
def(IK::VIAND, "viand").dst_vi(FT::FD).src_vi(FT::FS).src_vi(FT::FT);
|
||||
def(IK::VLQI, "vlqi").src(FT::DEST, DT::DEST).dst_vf(FT::FT).src_vi(FT::FS); // todo inc
|
||||
def(IK::VSQI, "vsqi").src(FT::DEST, DT::DEST).src_vf(FT::FS).src_vi(FT::FT); // todo inc
|
||||
def(IK::VIADDI, "viaddi").dst_vi(FT::FT).src_vi(FT::FS).src(FT::IMM5, DT::IMM);
|
||||
|
||||
def(IK::QMFC2, "qmfc2").src(FT::IL, DT::IL).dst_gpr(FT::RT).src_vf(FT::FS);
|
||||
def(IK::QMTC2, "qmtc2").src(FT::IL, DT::IL).src_gpr(FT::RT).dst_vf(FT::FS);
|
||||
def(IK::VSQRT, "vsqrt").src(FT::BC, DT::BC).dst(FT::ZERO, DT::VU_Q).src_vf(FT::FT);
|
||||
def(IK::VRXOR, "vrxor").src(FT::BC, DT::BC).src_vf(FT::FS);
|
||||
def(IK::VRNEXT, "vrnext").src(FT::DEST, DT::DEST).dst_vf(FT::FT);
|
||||
def(IK::CTC2, "ctc2").src(FT::IL, DT::IL).src_gpr(FT::RT).dst(FT::RD, DT::VI);
|
||||
def(IK::CFC2, "cfc2").src(FT::IL, DT::IL).dst_gpr(FT::RT).src(FT::RD, DT::VI);
|
||||
|
||||
def(IK::VCALLMS, "vcallms").src(FT::IMM15, DT::VCALLMS_TARGET);
|
||||
|
||||
def(IK::VNOP, "vnop");
|
||||
def(IK::VWAITQ, "vwaitq");
|
||||
|
||||
uint32_t valid_count = 0, total_count = 0;
|
||||
for (auto& info : gOpcodeInfo) {
|
||||
if (info.defined) {
|
||||
valid_count++;
|
||||
}
|
||||
total_count++;
|
||||
}
|
||||
|
||||
// for the UNKNOWN op which shouldn't be valid.
|
||||
total_count--;
|
||||
assert(total_count == valid_count);
|
||||
}
|
||||
|
||||
void OpcodeInfo::step(DecodeStep& s) {
|
||||
assert(step_count < MAX_DECODE_STEPS);
|
||||
steps[step_count] = s;
|
||||
step_count++;
|
||||
defined = true;
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::src(FieldType field, DecodeType decode) {
|
||||
DecodeStep new_step;
|
||||
new_step.is_src = true;
|
||||
new_step.field = field;
|
||||
new_step.decode = decode;
|
||||
step(new_step);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::src_gpr(FieldType field) {
|
||||
return src(field, DT::GPR);
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::src_fpr(FieldType field) {
|
||||
return src(field, DT::FPR);
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::src_vf(FieldType field) {
|
||||
return src(field, DT::VF);
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::src_vi(FieldType field) {
|
||||
return src(field, DT::VI);
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::dst(FieldType field, DecodeType decode) {
|
||||
DecodeStep new_step;
|
||||
new_step.is_src = false;
|
||||
new_step.field = field;
|
||||
new_step.decode = decode;
|
||||
step(new_step);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::dst_gpr(FieldType field) {
|
||||
return dst(field, DT::GPR);
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::dst_fpr(FieldType field) {
|
||||
return dst(field, DT::FPR);
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::dst_vf(FieldType field) {
|
||||
return dst(field, DT::VF);
|
||||
}
|
||||
|
||||
OpcodeInfo& OpcodeInfo::dst_vi(FieldType field) {
|
||||
return dst(field, DT::VI);
|
||||
}
|
351
decompiler/Disasm/OpcodeInfo.h
Normal file
351
decompiler/Disasm/OpcodeInfo.h
Normal file
@ -0,0 +1,351 @@
|
||||
/*!
|
||||
* @file OpcodeInfo.h
|
||||
* Decoding info for each opcode.
|
||||
*/
|
||||
|
||||
#ifndef NEXT_OPCODEINFO_H
|
||||
#define NEXT_OPCODEINFO_H
|
||||
|
||||
#include <string>
|
||||
|
||||
enum class InstructionKind {
|
||||
UNKNOWN,
|
||||
|
||||
// Integer Math
|
||||
ADDU, // Add Unsigned Word
|
||||
ADDIU, // Add Immediate Unsigned Word
|
||||
DADDU,
|
||||
DADDIU, // Doubleword Add Immediate Unsigned
|
||||
SUBU,
|
||||
DSUBU,
|
||||
MULT3, // special EE three-operand multiply
|
||||
MULTU3,
|
||||
DIV,
|
||||
DIVU,
|
||||
|
||||
// Stores
|
||||
SB,
|
||||
SH,
|
||||
SW,
|
||||
SWC1,
|
||||
SD,
|
||||
SQ,
|
||||
SQC2,
|
||||
|
||||
// Loads
|
||||
LB,
|
||||
LBU,
|
||||
LH,
|
||||
LHU,
|
||||
LW,
|
||||
LWU,
|
||||
LWL,
|
||||
LWR,
|
||||
LWC1,
|
||||
LD,
|
||||
LDL,
|
||||
LDR,
|
||||
LQ,
|
||||
LQC2,
|
||||
LUI,
|
||||
|
||||
// Logical
|
||||
AND,
|
||||
ANDI,
|
||||
OR,
|
||||
ORI,
|
||||
XOR,
|
||||
XORI,
|
||||
NOR,
|
||||
|
||||
// Moves
|
||||
MOVN,
|
||||
MOVZ,
|
||||
MFHI,
|
||||
MFLO,
|
||||
MFLO1,
|
||||
MTLO1,
|
||||
MFPC,
|
||||
MTPC,
|
||||
MTC0,
|
||||
MFC0,
|
||||
MTDAB,
|
||||
MTDABM,
|
||||
MFC1,
|
||||
MTC1,
|
||||
QMFC2,
|
||||
QMTC2,
|
||||
CTC2,
|
||||
CFC2,
|
||||
|
||||
// Jumps
|
||||
JALR,
|
||||
JR,
|
||||
|
||||
// Branch
|
||||
BEQ,
|
||||
BEQL,
|
||||
BNE,
|
||||
BNEL,
|
||||
BLTZ,
|
||||
BLTZL,
|
||||
BGTZ,
|
||||
BGTZL,
|
||||
BGEZ,
|
||||
BGEZL,
|
||||
BLEZ,
|
||||
BGEZAL,
|
||||
|
||||
// Shift
|
||||
SLL,
|
||||
SLLV,
|
||||
SRL,
|
||||
SRA,
|
||||
DSLL,
|
||||
DSLL32,
|
||||
DSLLV,
|
||||
DSRL,
|
||||
DSRL32,
|
||||
DSRLV,
|
||||
DSRA,
|
||||
DSRA32,
|
||||
DSRAV,
|
||||
|
||||
// Compare
|
||||
SLT,
|
||||
SLTI,
|
||||
SLTU,
|
||||
SLTIU,
|
||||
|
||||
// Weird
|
||||
SYSCALL,
|
||||
SYNCP,
|
||||
SYNCL,
|
||||
ERET,
|
||||
EI,
|
||||
CACHE_DXWBIN,
|
||||
PREF,
|
||||
|
||||
// MMI unsorted
|
||||
PSLLW,
|
||||
PSRAW,
|
||||
PSRAH,
|
||||
PLZCW,
|
||||
PMFHL_UW,
|
||||
PMFHL_LW,
|
||||
PMFHL_LH,
|
||||
PSLLH,
|
||||
PSRLH,
|
||||
|
||||
// MMI 0
|
||||
PEXTLW,
|
||||
PPACH,
|
||||
PSUBW,
|
||||
PCGTW,
|
||||
PEXTLH,
|
||||
PEXTLB,
|
||||
PMAXH,
|
||||
PPACB,
|
||||
PADDW,
|
||||
PADDH,
|
||||
PMAXW,
|
||||
PPACW,
|
||||
|
||||
// MMI 1
|
||||
PCEQW,
|
||||
PEXTUW,
|
||||
PMINH,
|
||||
PEXTUH,
|
||||
PEXTUB,
|
||||
PCEQB,
|
||||
PMINW,
|
||||
PABSW,
|
||||
|
||||
// MMI 2
|
||||
PCPYLD,
|
||||
PROT3W,
|
||||
PAND,
|
||||
PMADDH,
|
||||
PMULTH,
|
||||
PEXEW,
|
||||
|
||||
// MMI 3
|
||||
POR,
|
||||
PCPYUD,
|
||||
PNOR,
|
||||
PCPYH,
|
||||
PINTEH,
|
||||
|
||||
// COP1 / FPU
|
||||
ADDS,
|
||||
SUBS,
|
||||
MULS,
|
||||
DIVS,
|
||||
MINS,
|
||||
MAXS,
|
||||
ABSS,
|
||||
NEGS,
|
||||
CVTSW,
|
||||
CVTWS,
|
||||
CLTS,
|
||||
CLES,
|
||||
CEQS,
|
||||
BC1F,
|
||||
BC1T,
|
||||
BC1FL,
|
||||
BC1TL,
|
||||
MULAS,
|
||||
MADDAS,
|
||||
ADDAS,
|
||||
MSUBAS,
|
||||
MADDS,
|
||||
MSUBS,
|
||||
MOVS,
|
||||
SQRTS,
|
||||
RSQRTS,
|
||||
|
||||
// COP2
|
||||
VMOVE,
|
||||
VFTOI0,
|
||||
VFTOI4,
|
||||
VFTOI12,
|
||||
VITOF0,
|
||||
VITOF12,
|
||||
VITOF15,
|
||||
VABS,
|
||||
|
||||
VADD,
|
||||
VSUB,
|
||||
VMUL,
|
||||
VMINI,
|
||||
VMAX,
|
||||
VOPMSUB,
|
||||
VMADD,
|
||||
VMSUB,
|
||||
|
||||
VADD_BC,
|
||||
VSUB_BC,
|
||||
VMUL_BC,
|
||||
VMULA_BC,
|
||||
VMADD_BC,
|
||||
VADDA_BC,
|
||||
VMADDA_BC,
|
||||
VMSUBA_BC,
|
||||
VMSUB_BC,
|
||||
VMINI_BC,
|
||||
VMAX_BC,
|
||||
|
||||
VADDQ,
|
||||
VSUBQ,
|
||||
VMULQ,
|
||||
VMSUBQ,
|
||||
|
||||
VMULA,
|
||||
VADDA,
|
||||
VMADDA,
|
||||
|
||||
VOPMULA,
|
||||
VDIV,
|
||||
VCLIP,
|
||||
VMULAQ,
|
||||
|
||||
VMTIR,
|
||||
VIAND,
|
||||
VLQI,
|
||||
VIADDI,
|
||||
VSQI,
|
||||
|
||||
VRGET,
|
||||
|
||||
VSQRT,
|
||||
VRSQRT,
|
||||
|
||||
VRXOR,
|
||||
VRNEXT,
|
||||
VNOP,
|
||||
VWAITQ,
|
||||
VCALLMS,
|
||||
|
||||
EE_OP_MAX
|
||||
};
|
||||
|
||||
enum class FieldType {
|
||||
RS,
|
||||
RT,
|
||||
RD,
|
||||
SA,
|
||||
FT,
|
||||
FS,
|
||||
FD,
|
||||
SYSCALL,
|
||||
SIMM16,
|
||||
ZIMM16,
|
||||
PCR,
|
||||
DEST,
|
||||
BC,
|
||||
IMM5,
|
||||
IMM15,
|
||||
IL,
|
||||
ZERO
|
||||
};
|
||||
|
||||
enum class DecodeType {
|
||||
GPR,
|
||||
IMM,
|
||||
FPR,
|
||||
COP0,
|
||||
COP2,
|
||||
PCR,
|
||||
VF,
|
||||
VI,
|
||||
BRANCH_TARGET,
|
||||
VCALLMS_TARGET,
|
||||
DEST,
|
||||
BC,
|
||||
VU_Q,
|
||||
VU_ACC,
|
||||
IL
|
||||
};
|
||||
|
||||
struct DecodeStep {
|
||||
bool is_src = false;
|
||||
FieldType field;
|
||||
DecodeType decode;
|
||||
};
|
||||
|
||||
constexpr int MAX_DECODE_STEPS = 5;
|
||||
|
||||
struct OpcodeInfo {
|
||||
std::string name;
|
||||
|
||||
bool is_branch = false;
|
||||
bool is_branch_likely = false;
|
||||
bool can_lo16_link = false;
|
||||
bool defined = false;
|
||||
bool is_store = false;
|
||||
bool is_load = false;
|
||||
bool has_delay_slot = false;
|
||||
|
||||
void step(DecodeStep& s);
|
||||
|
||||
OpcodeInfo& src(FieldType field, DecodeType decode);
|
||||
OpcodeInfo& src_gpr(FieldType field);
|
||||
OpcodeInfo& src_fpr(FieldType field);
|
||||
OpcodeInfo& src_vf(FieldType field);
|
||||
OpcodeInfo& src_vi(FieldType field);
|
||||
|
||||
OpcodeInfo& dst(FieldType field, DecodeType decode);
|
||||
OpcodeInfo& dst_gpr(FieldType field);
|
||||
OpcodeInfo& dst_fpr(FieldType field);
|
||||
OpcodeInfo& dst_vf(FieldType field);
|
||||
OpcodeInfo& dst_vi(FieldType field);
|
||||
|
||||
uint8_t step_count;
|
||||
DecodeStep steps[MAX_DECODE_STEPS];
|
||||
};
|
||||
|
||||
extern OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX];
|
||||
|
||||
void init_opcode_info();
|
||||
|
||||
#endif // NEXT_OPCODEINFO_H
|
215
decompiler/Disasm/Register.cpp
Normal file
215
decompiler/Disasm/Register.cpp
Normal file
@ -0,0 +1,215 @@
|
||||
/*!
|
||||
* @file Register.cpp
|
||||
* Representation of an EE register.
|
||||
*/
|
||||
|
||||
#include "Register.h"
|
||||
#include <cassert>
|
||||
|
||||
////////////////////////////
|
||||
// Register Name Constants
|
||||
////////////////////////////
|
||||
|
||||
const static char* gpr_names[32] = {
|
||||
"r0", "at", "v0", "v1", "a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
|
||||
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"};
|
||||
|
||||
const static char* fpr_names[32] = {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
|
||||
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
|
||||
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
|
||||
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"};
|
||||
|
||||
const static char* cop0_names[32] = {
|
||||
"Index", "Random", "EntryLo0", "EntryLo1", "Context", "PageMask", "Wired",
|
||||
"INVALID7", "BadVAddr", "Count", "EntryHi", "Compare", "Status", "Cause",
|
||||
"EPC", "PRId", "Config", "INVALID17", "INVALID18", "INVALID19", "INVALID20",
|
||||
"INVALID21", "INVALID22", "BadPAddr", "Debug", "Perf", "INVALID26", "INVALID27",
|
||||
"TagLo", "TagHi", "ErrorEPR", "INVALID31"};
|
||||
|
||||
const static char* vf_names[32] = {"vf0", "vf1", "vf2", "vf3", "vf4", "vf5", "vf6", "vf7",
|
||||
"vf8", "vf9", "vf10", "vf11", "vf12", "vf13", "vf14", "vf15",
|
||||
"vf16", "vf17", "vf18", "vf19", "vf20", "vf21", "vf22", "vf23",
|
||||
"vf24", "vf25", "vf26", "vf27", "vf28", "vf29", "vf30", "vf31"};
|
||||
|
||||
const static char* vi_names[32] = {
|
||||
"vi0", "vi1", "vi2", "vi3", "vi4", "vi5", "vi6", "vi7",
|
||||
"vi8", "vi9", "vi10", "vi11", "vi12", "vi13", "vi14", "vi15",
|
||||
"Status", "MAC", "Clipping", "INVALID3", "R", "I", "Q", "INVALID7",
|
||||
"INVALID8", "INVALID9", "TPC", "CMSAR0", "FBRST", "VPU-STAT", "INVALID14", "CMSAR1"};
|
||||
|
||||
const static char* pcr_names[2] = {"pcr0", "pcr1"};
|
||||
|
||||
/////////////////////////////
|
||||
// Register Names Conversion
|
||||
/////////////////////////////
|
||||
|
||||
namespace {
|
||||
const char* gpr_to_charp(Reg::Gpr gpr) {
|
||||
assert(gpr < 32);
|
||||
return gpr_names[gpr];
|
||||
}
|
||||
|
||||
const char* fpr_to_charp(uint32_t fpr) {
|
||||
assert(fpr < 32);
|
||||
return fpr_names[fpr];
|
||||
}
|
||||
|
||||
const char* cop0_to_charp(Reg::Cop0 cpr) {
|
||||
assert(cpr < 32);
|
||||
return cop0_names[cpr];
|
||||
}
|
||||
|
||||
const char* vf_to_charp(uint32_t vf) {
|
||||
assert(vf < 32);
|
||||
return vf_names[vf];
|
||||
}
|
||||
|
||||
const char* vi_to_charp(uint32_t vi) {
|
||||
assert(vi < 32);
|
||||
return vi_names[vi];
|
||||
}
|
||||
|
||||
const char* pcr_to_charp(uint32_t pcr) {
|
||||
assert(pcr < 2);
|
||||
return pcr_names[pcr];
|
||||
}
|
||||
} // namespace
|
||||
|
||||
/////////////////////////////
|
||||
// Register Class
|
||||
/////////////////////////////
|
||||
// A register is stored as a 16-bit integer, with the top 8 bits indicating the "kind" and the lower
|
||||
// 8 bits representing the register id within that kind. If the integer is -1, it is a special
|
||||
// "invalid" register used to represent an uninitialized Register.
|
||||
|
||||
// Note: VI / COP2 are separate "kinds" of registers, each with 16 registers.
|
||||
// It might make sense to make this a single "kind" instead?
|
||||
|
||||
/*!
|
||||
* Create a register. The kind and num must both be valid.
|
||||
*/
|
||||
Register::Register(Reg::RegisterKind kind, uint32_t num) {
|
||||
id = (kind << 8) | num;
|
||||
|
||||
// check range:
|
||||
switch (kind) {
|
||||
case Reg::GPR:
|
||||
case Reg::FPR:
|
||||
case Reg::VF:
|
||||
case Reg::COP0:
|
||||
case Reg::VI:
|
||||
assert(num < 32);
|
||||
break;
|
||||
case Reg::PCR:
|
||||
assert(num < 2);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert to string. The register must be valid.
|
||||
*/
|
||||
const char* Register::to_charp() const {
|
||||
switch (get_kind()) {
|
||||
case Reg::GPR:
|
||||
return gpr_to_charp(get_gpr());
|
||||
case Reg::FPR:
|
||||
return fpr_to_charp(get_fpr());
|
||||
case Reg::VF:
|
||||
return vf_to_charp(get_vf());
|
||||
case Reg::VI:
|
||||
return vi_to_charp(get_vi());
|
||||
case Reg::COP0:
|
||||
return cop0_to_charp(get_cop0());
|
||||
case Reg::PCR:
|
||||
return pcr_to_charp(get_pcr());
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert to string. The register must be valid.
|
||||
*/
|
||||
std::string Register::to_string() const {
|
||||
return {to_charp()};
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the register kind.
|
||||
*/
|
||||
Reg::RegisterKind Register::get_kind() const {
|
||||
uint16_t kind = id >> 8;
|
||||
assert(kind < Reg::MAX_KIND);
|
||||
return (Reg::RegisterKind)kind;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the GPR number. Must be a GPR.
|
||||
*/
|
||||
Reg::Gpr Register::get_gpr() const {
|
||||
assert(get_kind() == Reg::GPR);
|
||||
uint16_t kind = id & 0xff;
|
||||
assert(kind < Reg::MAX_GPR);
|
||||
return (Reg::Gpr)(kind);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the FPR number. Must be an FPR.
|
||||
*/
|
||||
uint32_t Register::get_fpr() const {
|
||||
assert(get_kind() == Reg::FPR);
|
||||
uint16_t kind = id & 0xff;
|
||||
assert(kind < 32);
|
||||
return kind;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the VF number. Must be a VF.
|
||||
*/
|
||||
uint32_t Register::get_vf() const {
|
||||
assert(get_kind() == Reg::VF);
|
||||
uint16_t kind = id & 0xff;
|
||||
assert(kind < 32);
|
||||
return kind;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the VI number. Must be a VI.
|
||||
*/
|
||||
uint32_t Register::get_vi() const {
|
||||
assert(get_kind() == Reg::VI);
|
||||
uint16_t kind = id & 0xff;
|
||||
assert(kind < 32);
|
||||
return kind;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the COP0 number. Must be a COP0.
|
||||
*/
|
||||
Reg::Cop0 Register::get_cop0() const {
|
||||
assert(get_kind() == Reg::COP0);
|
||||
uint16_t kind = id & 0xff;
|
||||
assert(kind < Reg::MAX_COP0);
|
||||
return (Reg::Cop0)(kind);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the PCR number. Must be a PCR.
|
||||
*/
|
||||
uint32_t Register::get_pcr() const {
|
||||
assert(get_kind() == Reg::PCR);
|
||||
uint16_t kind = id & 0xff;
|
||||
assert(kind < 2);
|
||||
return kind;
|
||||
}
|
||||
|
||||
bool Register::operator==(const Register& other) const {
|
||||
return id == other.id;
|
||||
}
|
||||
|
||||
bool Register::operator!=(const Register& other) const {
|
||||
return id != other.id;
|
||||
}
|
145
decompiler/Disasm/Register.h
Normal file
145
decompiler/Disasm/Register.h
Normal file
@ -0,0 +1,145 @@
|
||||
/*!
|
||||
* @file Register.h
|
||||
* Representation of an EE register.
|
||||
*/
|
||||
|
||||
#ifndef NEXT_REGISTER_H
|
||||
#define NEXT_REGISTER_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
// Namespace for register name constants
|
||||
namespace Reg {
|
||||
enum RegisterKind {
|
||||
GPR = 0, // EE General purpose registers, these have nicknames.
|
||||
FPR = 1, // EE Floating point registers, just called f0 - f31
|
||||
VF = 2, // VU0 Floating point vector registers from EE, just called vf0 - vf31
|
||||
VI =
|
||||
3, // VU0 Integer registers from EE, the first 16 are vi00 - vi15, the rest are control regs.
|
||||
COP0 = 4, // EE COP0 Control Registers: full of fancy names (there are 32 of them)
|
||||
PCR = 5, // Performance Counter registers (PCR0, PCR1)
|
||||
MAX_KIND = 6
|
||||
};
|
||||
|
||||
// nicknames for GPRs
|
||||
enum Gpr {
|
||||
R0 = 0, // hardcoded to zero
|
||||
AT = 1, // temp, not used by GOAL compiler, but used by GOAL's kernel inline assembly (an other
|
||||
// places?)
|
||||
V0 = 2, // return, temp
|
||||
V1 = 3, // temp
|
||||
A0 = 4, // arg0, temp
|
||||
A1 = 5, // arg1, temp
|
||||
A2 = 6, // arg2, temp
|
||||
A3 = 7, // arg3, temp
|
||||
T0 = 8, // arg4, temp
|
||||
T1 = 9, // arg5, temp
|
||||
T2 = 10, // arg6, temp
|
||||
T3 = 11, // arg7, temp
|
||||
T4 = 12, // temp
|
||||
T5 = 13, // temp
|
||||
T6 = 14, // temp
|
||||
T7 = 15, // temp
|
||||
S0 = 16, // saved
|
||||
S1 = 17, // saved
|
||||
S2 = 18, // saved
|
||||
S3 = 19, // saved
|
||||
S4 = 20, // saved
|
||||
S5 = 21, // saved
|
||||
S6 = 22, // process pointer
|
||||
S7 = 23, // symbol table
|
||||
T8 = 24, // temp
|
||||
T9 = 25, // function pointer
|
||||
K0 = 26, // reserved
|
||||
K1 = 27, // reserved
|
||||
GP = 28, // saved (C code uses this a global pointer)
|
||||
SP = 29, // stack pointer
|
||||
FP = 30, // global pointer (address of current function)
|
||||
RA = 31, // return address
|
||||
MAX_GPR = 32
|
||||
};
|
||||
|
||||
// nicknames for COP0 registers
|
||||
enum Cop0 {
|
||||
INDEX = 0,
|
||||
RANDOM = 1,
|
||||
ENTRYLO0 = 2,
|
||||
ENTRYLO1 = 3,
|
||||
CONTEXT = 4,
|
||||
PAGEMASK = 5,
|
||||
WIRED = 6,
|
||||
INVALID7 = 7,
|
||||
BADVADDR = 8,
|
||||
COUNT = 9,
|
||||
ENTRYHI = 10,
|
||||
COMPARE = 11,
|
||||
COP0_STATUS = 12,
|
||||
CAUSE = 13,
|
||||
EPC = 14,
|
||||
PRID = 15,
|
||||
CONFIG = 16,
|
||||
INVALID17 = 17,
|
||||
INVALID18 = 18,
|
||||
INVALID19 = 19,
|
||||
INVALID20 = 20,
|
||||
INVALID21 = 21,
|
||||
INVALID22 = 22,
|
||||
BADPADDR = 23,
|
||||
DEBUG = 24,
|
||||
PERF = 25,
|
||||
INVALID26 = 26,
|
||||
INVALID27 = 27,
|
||||
TAGLO = 28,
|
||||
TAGHI = 29,
|
||||
ERROREPC = 30,
|
||||
INVALID31 = 31,
|
||||
MAX_COP0 = 32
|
||||
};
|
||||
|
||||
// nicknames for COP2 Integer (VI) registers
|
||||
// the first 16 are vi0 - vi15, so they don't have nicknames
|
||||
enum Vi {
|
||||
COP2_STATUS = 16,
|
||||
MAC = 17,
|
||||
CLIPPING = 18,
|
||||
COP2_INVALID3 = 19,
|
||||
R = 20,
|
||||
I = 21,
|
||||
Q = 22,
|
||||
COP2_INVALID7 = 23,
|
||||
COP2_INVALID8 = 24,
|
||||
COP2_INVALID9 = 25,
|
||||
TPC = 26,
|
||||
CMSAR0 = 27,
|
||||
FBRST = 28,
|
||||
VPUSTAT = 29,
|
||||
COP2_INVALID14 = 30,
|
||||
CMSAR1 = 31,
|
||||
MAX_COP2 = 32
|
||||
};
|
||||
} // namespace Reg
|
||||
|
||||
// Representation of a register. Uses a 32-bit integer internally.
|
||||
class Register {
|
||||
public:
|
||||
Register() = default;
|
||||
Register(Reg::RegisterKind kind, uint32_t num);
|
||||
const char* to_charp() const;
|
||||
std::string to_string() const;
|
||||
Reg::RegisterKind get_kind() const;
|
||||
Reg::Gpr get_gpr() const;
|
||||
uint32_t get_fpr() const;
|
||||
uint32_t get_vf() const;
|
||||
uint32_t get_vi() const;
|
||||
Reg::Cop0 get_cop0() const;
|
||||
uint32_t get_pcr() const;
|
||||
|
||||
bool operator==(const Register& other) const;
|
||||
bool operator!=(const Register& other) const;
|
||||
|
||||
private:
|
||||
uint16_t id = -1;
|
||||
};
|
||||
|
||||
#endif // NEXT_REGISTER_H
|
51
decompiler/Function/BasicBlocks.cpp
Normal file
51
decompiler/Function/BasicBlocks.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include "BasicBlocks.h"
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include "decompiler/Disasm/InstructionMatching.h"
|
||||
|
||||
/*!
|
||||
* Find all basic blocks in a function.
|
||||
* All delay slot instructions are grouped with the branch instruction.
|
||||
* This is done by finding all "dividers", which are after branch delay instructions and before
|
||||
* branch destinations, then sorting them, ignoring duplicates, and creating the blocks.
|
||||
*/
|
||||
std::vector<BasicBlock> find_blocks_in_function(const LinkedObjectFile& file,
|
||||
int seg,
|
||||
const Function& func) {
|
||||
std::vector<BasicBlock> basic_blocks;
|
||||
|
||||
// note - the first word of a function is the "function" type and should go in any basic block
|
||||
std::vector<int> dividers = {0, int(func.instructions.size())};
|
||||
|
||||
for (int i = 0; i < int(func.instructions.size()); i++) {
|
||||
const auto& instr = func.instructions.at(i);
|
||||
const auto& instr_info = instr.get_info();
|
||||
|
||||
if (instr_info.is_branch || instr_info.is_branch_likely) {
|
||||
// make sure the delay slot of this branch is included in the function
|
||||
assert(i + func.start_word < func.end_word - 1);
|
||||
// divider after delay slot
|
||||
dividers.push_back(i + 2);
|
||||
auto label_id = instr.get_label_target();
|
||||
assert(label_id != -1);
|
||||
const auto& label = file.labels.at(label_id);
|
||||
// should only jump to within our own function
|
||||
assert(label.target_segment == seg);
|
||||
assert(label.offset / 4 > func.start_word);
|
||||
assert(label.offset / 4 < func.end_word - 1);
|
||||
dividers.push_back(label.offset / 4 - func.start_word);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(dividers.begin(), dividers.end());
|
||||
|
||||
for (size_t i = 0; i < dividers.size() - 1; i++) {
|
||||
if (dividers[i] != dividers[i + 1]) {
|
||||
basic_blocks.emplace_back(dividers[i], dividers[i + 1]);
|
||||
assert(dividers[i] >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
return basic_blocks;
|
||||
}
|
23
decompiler/Function/BasicBlocks.h
Normal file
23
decompiler/Function/BasicBlocks.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef JAK_DISASSEMBLER_BASICBLOCKS_H
|
||||
#define JAK_DISASSEMBLER_BASICBLOCKS_H
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "CfgVtx.h"
|
||||
|
||||
class LinkedObjectFile;
|
||||
class Function;
|
||||
|
||||
struct BasicBlock {
|
||||
int start_word;
|
||||
int end_word;
|
||||
|
||||
BasicBlock(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {}
|
||||
};
|
||||
|
||||
std::vector<BasicBlock> find_blocks_in_function(const LinkedObjectFile& file,
|
||||
int seg,
|
||||
const Function& func);
|
||||
|
||||
#endif // JAK_DISASSEMBLER_BASICBLOCKS_H
|
1754
decompiler/Function/CfgVtx.cpp
Normal file
1754
decompiler/Function/CfgVtx.cpp
Normal file
File diff suppressed because it is too large
Load Diff
336
decompiler/Function/CfgVtx.h
Normal file
336
decompiler/Function/CfgVtx.h
Normal file
@ -0,0 +1,336 @@
|
||||
#ifndef JAK_DISASSEMBLER_CFGVTX_H
|
||||
#define JAK_DISASSEMBLER_CFGVTX_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include "decompiler/util/LispPrint.h"
|
||||
|
||||
/*!
|
||||
* In v, find an item equal to old, and replace it with replace.
|
||||
* Will throw an error is there is not exactly one thing equal to old.
|
||||
*/
|
||||
template <typename T>
|
||||
void replace_exactly_one_in(std::vector<T>& v, T old, T replace) {
|
||||
bool replaced = false;
|
||||
for (auto& x : v) {
|
||||
if (x == old) {
|
||||
assert(!replaced);
|
||||
x = replace;
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
assert(replaced);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Representation of a vertex in the control flow graph.
|
||||
*
|
||||
* The desired state of the control flow graph is to have a single "top-level" node, with NULL as
|
||||
* its parent. This top level node can then be viewed as the entire control flow for the function.
|
||||
* When the graph is fully understood, the only relation between vertices should be parent-child.
|
||||
* For example, an "if_else" vertex will have a "condition" vertex, "true_case" vertex, and "false
|
||||
* case" vertex as children.
|
||||
*
|
||||
* However, the initial state of the CFG is to have all the vertices be in the top level. When there
|
||||
* are multiple top level vertices, the graph is considered to be "unresolved", as there are
|
||||
* relations between these that are not explained by parent-child control structuring. These
|
||||
* relations are either pred/succ, indicating program control flow, and next/prev indicating code
|
||||
* layout order. These are undesirable because these do not map to high-level program structure.
|
||||
*
|
||||
* The graph attempts to "resolve" itself, meaning these pred/succ relations are destroyed and
|
||||
* replaced with nested control flow. The pred/succ and next/prev relations should only exist at the
|
||||
* top level.
|
||||
*
|
||||
* Once resolved, there will be a single "top level" node containing the entire control flow
|
||||
* structure.
|
||||
*
|
||||
* All CfgVtxs should be created from the ControlFlowGraph::alloc function, which allocates them
|
||||
* from a pool and cleans them up when the ControlFlowGraph is destroyed. This approach avoids
|
||||
* circular reference issues from a referencing counting approach, but does mean that temporary
|
||||
* allocations aren't cleaned up until the entire graph is deleted, but this is probably fine.
|
||||
*
|
||||
* Note - there are two special "top-level" vertices that are always present, called Entry and Exit.
|
||||
* These always exist and don't count toward making the graph unresolved.
|
||||
* These vertices won't be counted in the get_top_level_vertices_count.
|
||||
*
|
||||
* Desired end state of the graph:
|
||||
* Entry -> some-top-level-control-flow-structure -> Exit
|
||||
*/
|
||||
class CfgVtx {
|
||||
public:
|
||||
virtual std::string to_string() = 0; // convert to a single line string for debugging
|
||||
virtual std::shared_ptr<Form> to_form() = 0; // recursive print as LISP form.
|
||||
virtual ~CfgVtx() = default;
|
||||
|
||||
CfgVtx* parent = nullptr; // parent structure, or nullptr if top level
|
||||
CfgVtx* succ_branch = nullptr; // possible successor from branching, or NULL if no branch
|
||||
CfgVtx* succ_ft = nullptr; // possible successor from falling through, or NULL if impossible
|
||||
CfgVtx* next = nullptr; // next code in memory
|
||||
CfgVtx* prev = nullptr; // previous code in memory
|
||||
std::vector<CfgVtx*> pred; // all vertices which have us as succ_branch or succ_ft
|
||||
int uid = -1;
|
||||
|
||||
struct {
|
||||
bool has_branch = false; // does the block end in a branch (any kind)?
|
||||
bool branch_likely = false; // does the block end in a likely branch?
|
||||
bool branch_always = false; // does the branch always get taken?
|
||||
} end_branch;
|
||||
|
||||
// each child class of CfgVtx will define its own children.
|
||||
|
||||
/*!
|
||||
* Do we have s as a successor?
|
||||
*/
|
||||
bool has_succ(CfgVtx* s) const { return succ_branch == s || succ_ft == s; }
|
||||
|
||||
/*!
|
||||
* Do we have p as a predecessor?
|
||||
*/
|
||||
bool has_pred(CfgVtx* p) const {
|
||||
for (auto* x : pred) {
|
||||
if (x == p)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Lazy function for getting all non-null succesors
|
||||
*/
|
||||
std::vector<CfgVtx*> succs() {
|
||||
std::vector<CfgVtx*> result;
|
||||
if (succ_branch) {
|
||||
result.push_back(succ_branch);
|
||||
}
|
||||
if (succ_ft && succ_ft != succ_branch) {
|
||||
result.push_back(succ_ft);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void parent_claim(CfgVtx* new_parent);
|
||||
void replace_pred_and_check(CfgVtx* old_pred, CfgVtx* new_pred);
|
||||
void replace_succ_and_check(CfgVtx* old_succ, CfgVtx* new_succ);
|
||||
void replace_preds_with_and_check(std::vector<CfgVtx*> old_preds, CfgVtx* new_pred);
|
||||
|
||||
std::string links_to_string();
|
||||
};
|
||||
|
||||
/*!
|
||||
* Special Entry vertex representing the beginning of the function
|
||||
*/
|
||||
class EntryVtx : public CfgVtx {
|
||||
public:
|
||||
EntryVtx() = default;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
std::string to_string() override;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Special Exit vertex representing the end of the function
|
||||
*/
|
||||
class ExitVtx : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
};
|
||||
|
||||
/*!
|
||||
* A vertex which represents a single basic block. It has no children.
|
||||
*/
|
||||
class BlockVtx : public CfgVtx {
|
||||
public:
|
||||
explicit BlockVtx(int id) : block_id(id) {}
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
int block_id = -1; // which block are we?
|
||||
bool is_early_exit_block = false; // are we an empty block at the end for early exits to jump to?
|
||||
};
|
||||
|
||||
/*!
|
||||
* A vertex representing a sequence of child vertices which are always represented in order.
|
||||
* Child vertices in here don't set their next/prev pred/succ pointers as this counts as resolved.
|
||||
*/
|
||||
class SequenceVtx : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
std::vector<CfgVtx*> seq;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Representing a (cond ((a b) (c d) ... (else z))) structure.
|
||||
* Note that the first condition ("a" in the above example) may "steal" instructions belonging
|
||||
* to an outer scope and these may eventually need to be "unstolen"
|
||||
*/
|
||||
class CondWithElse : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
|
||||
struct Entry {
|
||||
Entry() = default;
|
||||
Entry(CfgVtx* _c, CfgVtx* _b) : condition(_c), body(_b) {}
|
||||
CfgVtx* condition = nullptr;
|
||||
CfgVtx* body = nullptr;
|
||||
};
|
||||
|
||||
std::vector<Entry> entries;
|
||||
CfgVtx* else_vtx = nullptr;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Representing a (cond ((a b) (c d) ... )) structure.
|
||||
* Note that the first condition ("a" in the above example) may "steal" instructions belonging
|
||||
* to an outer scope and these may eventually need to be "unstolen"
|
||||
*/
|
||||
class CondNoElse : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
|
||||
struct Entry {
|
||||
Entry() = default;
|
||||
Entry(CfgVtx* _c, CfgVtx* _b) : condition(_c), body(_b) {}
|
||||
CfgVtx* condition = nullptr;
|
||||
CfgVtx* body = nullptr;
|
||||
};
|
||||
|
||||
std::vector<Entry> entries;
|
||||
};
|
||||
|
||||
class WhileLoop : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
|
||||
CfgVtx* condition = nullptr;
|
||||
CfgVtx* body = nullptr;
|
||||
};
|
||||
|
||||
class UntilLoop : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
|
||||
CfgVtx* condition = nullptr;
|
||||
CfgVtx* body = nullptr;
|
||||
};
|
||||
|
||||
class UntilLoop_single : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
|
||||
CfgVtx* block = nullptr;
|
||||
};
|
||||
|
||||
class ShortCircuit : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
std::vector<CfgVtx*> entries;
|
||||
};
|
||||
|
||||
class InfiniteLoopBlock : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
CfgVtx* block;
|
||||
};
|
||||
|
||||
class GotoEnd : public CfgVtx {
|
||||
public:
|
||||
std::string to_string() override;
|
||||
std::shared_ptr<Form> to_form() override;
|
||||
CfgVtx* body = nullptr;
|
||||
CfgVtx* unreachable_block = nullptr;
|
||||
};
|
||||
|
||||
struct BasicBlock;
|
||||
|
||||
/*!
|
||||
* The actual CFG class, which owns all the vertices.
|
||||
*/
|
||||
class ControlFlowGraph {
|
||||
public:
|
||||
ControlFlowGraph();
|
||||
~ControlFlowGraph();
|
||||
|
||||
std::shared_ptr<Form> to_form();
|
||||
std::string to_form_string();
|
||||
std::string to_dot();
|
||||
int get_top_level_vertices_count();
|
||||
bool is_fully_resolved();
|
||||
CfgVtx* get_single_top_level();
|
||||
|
||||
void flag_early_exit(const std::vector<BasicBlock>& blocks);
|
||||
|
||||
const std::vector<BlockVtx*>& create_blocks(int count);
|
||||
void link_fall_through(BlockVtx* first, BlockVtx* second);
|
||||
void link_branch(BlockVtx* first, BlockVtx* second);
|
||||
bool find_cond_w_else();
|
||||
bool find_cond_n_else();
|
||||
|
||||
// bool find_if_else_top_level();
|
||||
bool find_seq_top_level();
|
||||
bool find_while_loop_top_level();
|
||||
bool find_until_loop();
|
||||
bool find_until1_loop();
|
||||
bool find_short_circuits();
|
||||
bool find_goto_end();
|
||||
bool find_infinite_loop();
|
||||
|
||||
/*!
|
||||
* Apply a function f to each top-level vertex.
|
||||
* If f returns false, stops.
|
||||
*/
|
||||
template <typename Func>
|
||||
void for_each_top_level_vtx(Func f) {
|
||||
for (auto* x : m_node_pool) {
|
||||
if (!x->parent && x != entry() && x != exit()) {
|
||||
if (!f(x)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EntryVtx* entry() { return m_entry; }
|
||||
ExitVtx* exit() { return m_exit; }
|
||||
|
||||
/*!
|
||||
* Allocate and construct a node of the specified type.
|
||||
*/
|
||||
template <typename T, class... Args>
|
||||
T* alloc(Args&&... args) {
|
||||
T* new_obj = new T(std::forward<Args>(args)...);
|
||||
m_node_pool.push_back(new_obj);
|
||||
new_obj->uid = m_uid++;
|
||||
return new_obj;
|
||||
}
|
||||
|
||||
private:
|
||||
// bool compact_one_in_top_level();
|
||||
// bool is_if_else(CfgVtx* b0, CfgVtx* b1, CfgVtx* b2, CfgVtx* b3);
|
||||
bool is_sequence(CfgVtx* b0, CfgVtx* b1);
|
||||
bool is_sequence_of_non_sequences(CfgVtx* b0, CfgVtx* b1);
|
||||
bool is_sequence_of_sequence_and_non_sequence(CfgVtx* b0, CfgVtx* b1);
|
||||
bool is_sequence_of_sequence_and_sequence(CfgVtx* b0, CfgVtx* b1);
|
||||
bool is_sequence_of_non_sequence_and_sequence(CfgVtx* b0, CfgVtx* b1);
|
||||
bool is_while_loop(CfgVtx* b0, CfgVtx* b1, CfgVtx* b2);
|
||||
bool is_until_loop(CfgVtx* b1, CfgVtx* b2);
|
||||
bool is_goto_end_and_unreachable(CfgVtx* b0, CfgVtx* b1);
|
||||
std::vector<BlockVtx*> m_blocks; // all block nodes, in order.
|
||||
std::vector<CfgVtx*> m_node_pool; // all nodes allocated
|
||||
EntryVtx* m_entry; // the entry vertex
|
||||
ExitVtx* m_exit; // the exit vertex
|
||||
int m_uid = 0;
|
||||
};
|
||||
|
||||
class LinkedObjectFile;
|
||||
class Function;
|
||||
std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int seg, Function& func);
|
||||
|
||||
#endif // JAK_DISASSEMBLER_CFGVTX_H
|
552
decompiler/Function/Function.cpp
Normal file
552
decompiler/Function/Function.cpp
Normal file
@ -0,0 +1,552 @@
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include "Function.h"
|
||||
#include "decompiler/Disasm/InstructionMatching.h"
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include "decompiler/TypeSystem/TypeInfo.h"
|
||||
|
||||
namespace {
|
||||
std::vector<Register> gpr_backups = {make_gpr(Reg::GP), make_gpr(Reg::S5), make_gpr(Reg::S4),
|
||||
make_gpr(Reg::S3), make_gpr(Reg::S2), make_gpr(Reg::S1),
|
||||
make_gpr(Reg::S0)};
|
||||
|
||||
std::vector<Register> fpr_backups = {make_fpr(30), make_fpr(28), make_fpr(26),
|
||||
make_fpr(24), make_fpr(22), make_fpr(20)};
|
||||
|
||||
Register get_expected_gpr_backup(int n, int total) {
|
||||
assert(total <= int(gpr_backups.size()));
|
||||
assert(n < total);
|
||||
return gpr_backups.at((total - 1) - n);
|
||||
}
|
||||
|
||||
Register get_expected_fpr_backup(int n, int total) {
|
||||
assert(total <= int(fpr_backups.size()));
|
||||
assert(n < total);
|
||||
return fpr_backups.at((total - 1) - n);
|
||||
}
|
||||
|
||||
uint32_t align16(uint32_t in) {
|
||||
return (in + 15) & (~15);
|
||||
}
|
||||
|
||||
uint32_t align8(uint32_t in) {
|
||||
return (in + 7) & (~7);
|
||||
}
|
||||
|
||||
uint32_t align4(uint32_t in) {
|
||||
return (in + 3) & (~3);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Function::Function(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {}
|
||||
|
||||
/*!
|
||||
* Remove the function prologue from the first basic block and populate this->prologue with info.
|
||||
*/
|
||||
void Function::analyze_prologue(const LinkedObjectFile& file) {
|
||||
int idx = 1;
|
||||
|
||||
// first we look for daddiu sp, sp, -x to determine how much stack is used
|
||||
if (is_gpr_2_imm_int(instructions.at(idx), InstructionKind::DADDIU, make_gpr(Reg::SP),
|
||||
make_gpr(Reg::SP), {})) {
|
||||
prologue.total_stack_usage = -instructions.at(idx).get_imm_src_int();
|
||||
idx++;
|
||||
} else {
|
||||
prologue.total_stack_usage = 0;
|
||||
}
|
||||
|
||||
// don't include type tag
|
||||
prologue_end = 1;
|
||||
|
||||
// if we use the stack, we may back up some registers onto it
|
||||
if (prologue.total_stack_usage) {
|
||||
// heuristics to detect asm functions
|
||||
{
|
||||
auto& instr = instructions.at(idx);
|
||||
// storing stack pointer on the stack is done by some ASM kernel functions
|
||||
if (instr.kind == InstructionKind::SW && instr.get_src(0).get_reg() == make_gpr(Reg::SP)) {
|
||||
printf("[Warning] %s Suspected ASM function based on this instruction in prologue: %s\n",
|
||||
guessed_name.to_string().c_str(), instr.to_string(file).c_str());
|
||||
warnings += "Flagged as ASM function because of " + instr.to_string(file) + "\n";
|
||||
suspected_asm = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// ra backup is always first
|
||||
if (is_no_link_gpr_store(instructions.at(idx), 8, Register(Reg::GPR, Reg::RA), {},
|
||||
Register(Reg::GPR, Reg::SP))) {
|
||||
prologue.ra_backed_up = true;
|
||||
prologue.ra_backup_offset = get_gpr_store_offset_as_int(instructions.at(idx));
|
||||
assert(prologue.ra_backup_offset == 0);
|
||||
idx++;
|
||||
}
|
||||
|
||||
{
|
||||
auto& instr = instructions.at(idx);
|
||||
|
||||
// storing s7 on the stack is done by interrupt handlers, which we probably don't want to
|
||||
// support
|
||||
if (instr.kind == InstructionKind::SD && instr.get_src(0).get_reg() == make_gpr(Reg::S7)) {
|
||||
printf("[Warning] %s Suspected ASM function based on this instruction in prologue: %s\n",
|
||||
guessed_name.to_string().c_str(), instr.to_string(file).c_str());
|
||||
warnings += "Flagged as ASM function because of " + instr.to_string(file) + "\n";
|
||||
suspected_asm = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// next is fp backup
|
||||
if (is_no_link_gpr_store(instructions.at(idx), 8, Register(Reg::GPR, Reg::FP), {},
|
||||
Register(Reg::GPR, Reg::SP))) {
|
||||
prologue.fp_backed_up = true;
|
||||
prologue.fp_backup_offset = get_gpr_store_offset_as_int(instructions.at(idx));
|
||||
// in Jak 1 like we never backup fp unless ra is also backed up, so the offset is always 8.
|
||||
// but it seems like it could be possible to do one without the other?
|
||||
assert(prologue.fp_backup_offset == 8);
|
||||
idx++;
|
||||
|
||||
// after backing up fp, we always set it to t9.
|
||||
prologue.fp_set = is_gpr_3(instructions.at(idx), InstructionKind::OR, make_gpr(Reg::FP),
|
||||
make_gpr(Reg::T9), make_gpr(Reg::R0));
|
||||
assert(prologue.fp_set);
|
||||
idx++;
|
||||
}
|
||||
|
||||
// next is gpr backups. these are in reverse order, so we should first find the length
|
||||
// GOAL will always do the exact same thing when the same number of gprs needs to be backed up
|
||||
// so we just need to determine the number of GPR backups, and we have all the info we need
|
||||
int n_gpr_backups = 0;
|
||||
int gpr_idx = idx;
|
||||
bool expect_nothing_after_gprs = false;
|
||||
|
||||
while (is_no_link_gpr_store(instructions.at(gpr_idx), 16, {}, {}, make_gpr(Reg::SP))) {
|
||||
auto store_reg = instructions.at(gpr_idx).get_src(0).get_reg();
|
||||
|
||||
// sometimes stack memory is zeroed immediately after gpr backups, and this fools the previous
|
||||
// check.
|
||||
if (store_reg == make_gpr(Reg::R0)) {
|
||||
printf(
|
||||
"[Warning] %s Stack Zeroing Detected in Function::analyze_prologue, prologue may be "
|
||||
"wrong\n",
|
||||
guessed_name.to_string().c_str());
|
||||
warnings += "Stack Zeroing Detected, prologue may be wrong\n";
|
||||
expect_nothing_after_gprs = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// this also happens a few times per game. this a0/r0 check seems to be all that's needed to
|
||||
// avoid false positives here!
|
||||
if (store_reg == make_gpr(Reg::A0)) {
|
||||
suspected_asm = true;
|
||||
printf(
|
||||
"[Warning] %s Suspected ASM function because register $a0 was stored on the stack!\n",
|
||||
guessed_name.to_string().c_str());
|
||||
warnings += "a0 on stack detected, flagging as asm\n";
|
||||
return;
|
||||
}
|
||||
|
||||
n_gpr_backups++;
|
||||
gpr_idx++;
|
||||
}
|
||||
|
||||
if (n_gpr_backups) {
|
||||
prologue.gpr_backup_offset = get_gpr_store_offset_as_int(instructions.at(idx));
|
||||
for (int i = 0; i < n_gpr_backups; i++) {
|
||||
int this_offset = get_gpr_store_offset_as_int(instructions.at(idx + i));
|
||||
auto this_reg = instructions.at(idx + i).get_src(0).get_reg();
|
||||
assert(this_offset == prologue.gpr_backup_offset + 16 * i);
|
||||
if (this_reg != get_expected_gpr_backup(i, n_gpr_backups)) {
|
||||
suspected_asm = true;
|
||||
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
|
||||
guessed_name.to_string().c_str(),
|
||||
instructions.at(idx + i).to_string(file).c_str());
|
||||
warnings += "Suspected asm function due to stack store: " +
|
||||
instructions.at(idx + i).to_string(file) + "\n";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
prologue.n_gpr_backup = n_gpr_backups;
|
||||
idx = gpr_idx;
|
||||
|
||||
int n_fpr_backups = 0;
|
||||
int fpr_idx = idx;
|
||||
if (!expect_nothing_after_gprs) {
|
||||
// FPR backups
|
||||
while (is_no_ll_fpr_store(instructions.at(fpr_idx), {}, {}, make_gpr(Reg::SP))) {
|
||||
// auto store_reg = instructions.at(gpr_idx).get_src(0).get_reg();
|
||||
n_fpr_backups++;
|
||||
fpr_idx++;
|
||||
}
|
||||
|
||||
if (n_fpr_backups) {
|
||||
prologue.fpr_backup_offset = instructions.at(idx).get_src(1).get_imm();
|
||||
for (int i = 0; i < n_fpr_backups; i++) {
|
||||
int this_offset = instructions.at(idx + i).get_src(1).get_imm();
|
||||
auto this_reg = instructions.at(idx + i).get_src(0).get_reg();
|
||||
assert(this_offset == prologue.fpr_backup_offset + 4 * i);
|
||||
if (this_reg != get_expected_fpr_backup(i, n_fpr_backups)) {
|
||||
suspected_asm = true;
|
||||
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
|
||||
guessed_name.to_string().c_str(),
|
||||
instructions.at(idx + i).to_string(file).c_str());
|
||||
warnings += "Suspected asm function due to stack store: " +
|
||||
instructions.at(idx + i).to_string(file) + "\n";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
prologue.n_fpr_backup = n_fpr_backups;
|
||||
idx = fpr_idx;
|
||||
|
||||
prologue_start = 1;
|
||||
prologue_end = idx;
|
||||
|
||||
prologue.stack_var_offset = 0;
|
||||
if (prologue.ra_backed_up) {
|
||||
prologue.stack_var_offset = 8;
|
||||
}
|
||||
if (prologue.fp_backed_up) {
|
||||
prologue.stack_var_offset = 16;
|
||||
}
|
||||
|
||||
if (n_gpr_backups == 0 && n_fpr_backups == 0) {
|
||||
prologue.n_stack_var_bytes = prologue.total_stack_usage - prologue.stack_var_offset;
|
||||
} else if (n_gpr_backups == 0) {
|
||||
// fprs only
|
||||
prologue.n_stack_var_bytes = prologue.fpr_backup_offset - prologue.stack_var_offset;
|
||||
} else if (n_fpr_backups == 0) {
|
||||
// gprs only
|
||||
prologue.n_stack_var_bytes = prologue.gpr_backup_offset - prologue.stack_var_offset;
|
||||
} else {
|
||||
// both, use gprs
|
||||
assert(prologue.fpr_backup_offset > prologue.gpr_backup_offset);
|
||||
prologue.n_stack_var_bytes = prologue.gpr_backup_offset - prologue.stack_var_offset;
|
||||
}
|
||||
|
||||
assert(prologue.n_stack_var_bytes >= 0);
|
||||
|
||||
// check that the stack lines up by going in order
|
||||
|
||||
// RA backup
|
||||
int total_stack = 0;
|
||||
if (prologue.ra_backed_up) {
|
||||
total_stack = align8(total_stack);
|
||||
assert(prologue.ra_backup_offset == total_stack);
|
||||
total_stack += 8;
|
||||
}
|
||||
|
||||
if (!prologue.ra_backed_up && prologue.fp_backed_up) {
|
||||
// GOAL does this for an unknown reason.
|
||||
total_stack += 8;
|
||||
}
|
||||
|
||||
// FP backup
|
||||
if (prologue.fp_backed_up) {
|
||||
total_stack = align8(total_stack);
|
||||
assert(prologue.fp_backup_offset == total_stack);
|
||||
total_stack += 8;
|
||||
assert(prologue.fp_set);
|
||||
}
|
||||
|
||||
// Stack Variables
|
||||
if (prologue.n_stack_var_bytes) {
|
||||
// no alignment because we don't know how the stack vars are aligned.
|
||||
// stack var padding counts toward this section.
|
||||
assert(prologue.stack_var_offset == total_stack);
|
||||
total_stack += prologue.n_stack_var_bytes;
|
||||
}
|
||||
|
||||
// GPRS
|
||||
if (prologue.n_gpr_backup) {
|
||||
total_stack = align16(total_stack);
|
||||
assert(prologue.gpr_backup_offset == total_stack);
|
||||
total_stack += 16 * prologue.n_gpr_backup;
|
||||
}
|
||||
|
||||
// FPRS
|
||||
if (prologue.n_fpr_backup) {
|
||||
total_stack = align4(total_stack);
|
||||
assert(prologue.fpr_backup_offset == total_stack);
|
||||
total_stack += 4 * prologue.n_fpr_backup;
|
||||
}
|
||||
|
||||
total_stack = align16(total_stack);
|
||||
|
||||
// End!
|
||||
assert(prologue.total_stack_usage == total_stack);
|
||||
}
|
||||
|
||||
// it's fine to have the entire first basic block be the prologue - you could loop back to the
|
||||
// first instruction past the prologue.
|
||||
assert(basic_blocks.at(0).end_word >= prologue_end);
|
||||
basic_blocks.at(0).start_word = prologue_end;
|
||||
prologue.decoded = true;
|
||||
|
||||
check_epilogue(file);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Print info about the prologue and stack.
|
||||
*/
|
||||
std::string Function::Prologue::to_string(int indent) const {
|
||||
char buff[512];
|
||||
char* buff_ptr = buff;
|
||||
std::string indent_str(indent, ' ');
|
||||
if (!decoded) {
|
||||
return indent_str + ";BAD PROLOGUE";
|
||||
}
|
||||
buff_ptr += sprintf(buff_ptr, "%s;stack: total 0x%02x, fp? %d ra? %d ep? %d", indent_str.c_str(),
|
||||
total_stack_usage, fp_set, ra_backed_up, epilogue_ok);
|
||||
if (n_stack_var_bytes) {
|
||||
buff_ptr += sprintf(buff_ptr, "\n%s;stack_vars: %d bytes at %d", indent_str.c_str(),
|
||||
n_stack_var_bytes, stack_var_offset);
|
||||
}
|
||||
if (n_gpr_backup) {
|
||||
buff_ptr += sprintf(buff_ptr, "\n%s;gprs:", indent_str.c_str());
|
||||
for (int i = 0; i < n_gpr_backup; i++) {
|
||||
buff_ptr += sprintf(buff_ptr, " %s", gpr_backups.at(i).to_string().c_str());
|
||||
}
|
||||
}
|
||||
if (n_fpr_backup) {
|
||||
buff_ptr += sprintf(buff_ptr, "\n%s;fprs:", indent_str.c_str());
|
||||
for (int i = 0; i < n_fpr_backup; i++) {
|
||||
buff_ptr += sprintf(buff_ptr, " %s", fpr_backups.at(i).to_string().c_str());
|
||||
}
|
||||
}
|
||||
return {buff};
|
||||
}
|
||||
|
||||
/*!
|
||||
* Check that the epilogue matches the prologue.
|
||||
*/
|
||||
void Function::check_epilogue(const LinkedObjectFile& file) {
|
||||
(void)file;
|
||||
if (!prologue.decoded || suspected_asm) {
|
||||
printf("not decoded, or suspected asm, skipping epilogue\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// start at the end and move up.
|
||||
int idx = int(instructions.size()) - 1;
|
||||
|
||||
// seek past alignment nops
|
||||
while (is_nop(instructions.at(idx))) {
|
||||
idx--;
|
||||
}
|
||||
|
||||
epilogue_end = idx;
|
||||
// stack restore
|
||||
if (prologue.total_stack_usage) {
|
||||
// hack - sometimes an asm function has a compiler inserted jr ra/daddu sp sp r0 that follows
|
||||
// the "true" return. We really should have this function flagged as asm, but for now, we can
|
||||
// simply skip over the compiler-generated jr ra/daddu sp sp r0.
|
||||
if (is_gpr_3(instructions.at(idx), InstructionKind::DADDU, make_gpr(Reg::SP), make_gpr(Reg::SP),
|
||||
make_gpr(Reg::R0))) {
|
||||
idx--;
|
||||
assert(is_jr_ra(instructions.at(idx)));
|
||||
idx--;
|
||||
printf(
|
||||
"[Warning] %s Double Return Epilogue Hack! This is probably an ASM function in "
|
||||
"disguise\n",
|
||||
guessed_name.to_string().c_str());
|
||||
warnings += "Double Return Epilogue - this is probably an ASM function\n";
|
||||
}
|
||||
// delay slot should be daddiu sp, sp, offset
|
||||
assert(is_gpr_2_imm_int(instructions.at(idx), InstructionKind::DADDIU, make_gpr(Reg::SP),
|
||||
make_gpr(Reg::SP), prologue.total_stack_usage));
|
||||
idx--;
|
||||
} else {
|
||||
// delay slot is always daddu sp, sp, r0...
|
||||
assert(is_gpr_3(instructions.at(idx), InstructionKind::DADDU, make_gpr(Reg::SP),
|
||||
make_gpr(Reg::SP), make_gpr(Reg::R0)));
|
||||
idx--;
|
||||
}
|
||||
|
||||
// jr ra
|
||||
assert(is_jr_ra(instructions.at(idx)));
|
||||
idx--;
|
||||
|
||||
// restore gprs
|
||||
for (int i = 0; i < prologue.n_gpr_backup; i++) {
|
||||
int gpr_idx = prologue.n_gpr_backup - (1 + i);
|
||||
const auto& expected_reg = gpr_backups.at(gpr_idx);
|
||||
auto expected_offset = prologue.gpr_backup_offset + 16 * i;
|
||||
assert(is_no_ll_gpr_load(instructions.at(idx), 16, true, expected_reg, expected_offset,
|
||||
make_gpr(Reg::SP)));
|
||||
idx--;
|
||||
}
|
||||
|
||||
// restore fprs
|
||||
for (int i = 0; i < prologue.n_fpr_backup; i++) {
|
||||
int fpr_idx = prologue.n_fpr_backup - (1 + i);
|
||||
const auto& expected_reg = fpr_backups.at(fpr_idx);
|
||||
auto expected_offset = prologue.fpr_backup_offset + 4 * i;
|
||||
assert(
|
||||
is_no_ll_fpr_load(instructions.at(idx), expected_reg, expected_offset, make_gpr(Reg::SP)));
|
||||
idx--;
|
||||
}
|
||||
|
||||
// restore fp
|
||||
if (prologue.fp_backed_up) {
|
||||
assert(is_no_ll_gpr_load(instructions.at(idx), 8, true, make_gpr(Reg::FP),
|
||||
prologue.fp_backup_offset, make_gpr(Reg::SP)));
|
||||
idx--;
|
||||
}
|
||||
|
||||
// restore ra
|
||||
if (prologue.ra_backed_up) {
|
||||
assert(is_no_ll_gpr_load(instructions.at(idx), 8, true, make_gpr(Reg::RA),
|
||||
prologue.ra_backup_offset, make_gpr(Reg::SP)));
|
||||
idx--;
|
||||
}
|
||||
|
||||
assert(!basic_blocks.empty());
|
||||
assert(idx + 1 >= basic_blocks.back().start_word);
|
||||
basic_blocks.back().end_word = idx + 1;
|
||||
prologue.epilogue_ok = true;
|
||||
epilogue_start = idx + 1;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Look through all blocks in this function for storing the address of a function into a symbol.
|
||||
* This indicates the stored function address belongs to a global function with the same name as
|
||||
* the symbol.
|
||||
*
|
||||
* Updates the guessed_name of the function and updates type_info
|
||||
*/
|
||||
void Function::find_global_function_defs(LinkedObjectFile& file) {
|
||||
int state = 0;
|
||||
int label_id = -1;
|
||||
Register reg;
|
||||
|
||||
for (const auto& instr : instructions) {
|
||||
// look for LUIs always
|
||||
if (instr.kind == InstructionKind::LUI && instr.get_src(0).kind == InstructionAtom::LABEL) {
|
||||
state = 1;
|
||||
reg = instr.get_dst(0).get_reg();
|
||||
label_id = instr.get_src(0).get_label();
|
||||
assert(label_id != -1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (state == 1) {
|
||||
// Look for ORI
|
||||
if (instr.kind == InstructionKind::ORI && instr.get_src(0).get_reg() == reg &&
|
||||
instr.get_src(1).get_label() == label_id) {
|
||||
state = 2;
|
||||
reg = instr.get_dst(0).get_reg();
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 2) {
|
||||
// Look for SW
|
||||
if (instr.kind == InstructionKind::SW && instr.get_src(0).get_reg() == reg &&
|
||||
instr.get_src(2).get_reg() == make_gpr(Reg::S7)) {
|
||||
// done!
|
||||
std::string name = instr.get_src(1).get_sym();
|
||||
if (!file.label_points_to_code(label_id)) {
|
||||
// printf("discard as not code: %s\n", name.c_str());
|
||||
} else {
|
||||
auto& func = file.get_function_at_label(label_id);
|
||||
assert(func.guessed_name.empty());
|
||||
func.guessed_name.set_as_global(name);
|
||||
get_type_info().inform_symbol(name, TypeSpec("function"));
|
||||
// todo - inform function.
|
||||
}
|
||||
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Look through this function to find calls to method-set! which define methods.
|
||||
* Updates the guessed_name of the function and updates type_info.
|
||||
*/
|
||||
void Function::find_method_defs(LinkedObjectFile& file) {
|
||||
int state = 0;
|
||||
int label_id = -1;
|
||||
int method_id = -1;
|
||||
Register lui_reg;
|
||||
std::string type_name;
|
||||
|
||||
for (const auto& instr : instructions) {
|
||||
// look for lw t9, method-set!(s7)
|
||||
if (instr.kind == InstructionKind::LW && instr.get_dst(0).get_reg() == make_gpr(Reg::T9) &&
|
||||
instr.get_src(0).kind == InstructionAtom::IMM_SYM &&
|
||||
instr.get_src(0).get_sym() == "method-set!" &&
|
||||
instr.get_src(1).get_reg() == make_gpr(Reg::S7)) {
|
||||
state = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (state == 1) {
|
||||
// look for lw a0, type-name(s7)
|
||||
if (instr.kind == InstructionKind::LW && instr.get_dst(0).get_reg() == make_gpr(Reg::A0) &&
|
||||
instr.get_src(0).kind == InstructionAtom::IMM_SYM &&
|
||||
instr.get_src(1).get_reg() == make_gpr(Reg::S7)) {
|
||||
type_name = instr.get_src(0).get_sym();
|
||||
state = 2;
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 2) {
|
||||
// look for addiu a1, r0, x
|
||||
if (instr.kind == InstructionKind::ADDIU && instr.get_dst(0).get_reg() == make_gpr(Reg::A1) &&
|
||||
instr.get_src(0).get_reg() == make_gpr(Reg::R0)) {
|
||||
method_id = instr.get_src(1).get_imm();
|
||||
state = 3;
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 3) {
|
||||
// look for lui
|
||||
if (instr.kind == InstructionKind::LUI && instr.get_src(0).kind == InstructionAtom::LABEL) {
|
||||
state = 4;
|
||||
lui_reg = instr.get_dst(0).get_reg();
|
||||
label_id = instr.get_src(0).get_label();
|
||||
assert(label_id != -1);
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 4) {
|
||||
if (instr.kind == InstructionKind::ORI && instr.get_src(0).get_reg() == lui_reg &&
|
||||
instr.get_src(1).get_label() == label_id) {
|
||||
state = 5;
|
||||
lui_reg = instr.get_dst(0).get_reg();
|
||||
continue;
|
||||
} else {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (state == 5) {
|
||||
if (instr.kind == InstructionKind::JALR && instr.get_dst(0).get_reg() == make_gpr(Reg::RA) &&
|
||||
instr.get_src(0).get_reg() == make_gpr(Reg::T9)) {
|
||||
auto& func = file.get_function_at_label(label_id);
|
||||
assert(func.guessed_name.empty());
|
||||
func.guessed_name.set_as_method(type_name, method_id);
|
||||
state = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
122
decompiler/Function/Function.h
Normal file
122
decompiler/Function/Function.h
Normal file
@ -0,0 +1,122 @@
|
||||
#ifndef NEXT_FUNCTION_H
|
||||
#define NEXT_FUNCTION_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "decompiler/Disasm/Instruction.h"
|
||||
#include "BasicBlocks.h"
|
||||
#include "CfgVtx.h"
|
||||
|
||||
struct FunctionName {
|
||||
enum class FunctionKind {
|
||||
UNIDENTIFIED, // hasn't been identified yet.
|
||||
GLOBAL, // global named function
|
||||
METHOD,
|
||||
TOP_LEVEL_INIT,
|
||||
} kind = FunctionKind::UNIDENTIFIED;
|
||||
|
||||
std::string function_name; // only applicable for GLOBAL
|
||||
std::string type_name; // only applicable for METHOD
|
||||
int method_id = -1; // only applicable for METHOD
|
||||
|
||||
std::string to_string() const {
|
||||
switch(kind) {
|
||||
case FunctionKind::GLOBAL:
|
||||
return function_name;
|
||||
case FunctionKind::METHOD:
|
||||
return "(method " + std::to_string(method_id) + " " + type_name + ")";
|
||||
case FunctionKind::TOP_LEVEL_INIT:
|
||||
return "(top-level-login)";
|
||||
case FunctionKind::UNIDENTIFIED:
|
||||
return "(?)";
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return kind == FunctionKind::UNIDENTIFIED;
|
||||
}
|
||||
|
||||
void set_as_top_level() {
|
||||
kind = FunctionKind::TOP_LEVEL_INIT;
|
||||
}
|
||||
|
||||
void set_as_global(std::string name) {
|
||||
kind = FunctionKind::GLOBAL;
|
||||
function_name = std::move(name);
|
||||
}
|
||||
|
||||
void set_as_method(std::string tn, int id) {
|
||||
kind = FunctionKind::METHOD;
|
||||
type_name = std::move(tn);
|
||||
method_id = id;
|
||||
}
|
||||
|
||||
bool expected_unique() const {
|
||||
return kind == FunctionKind::GLOBAL || kind == FunctionKind::METHOD;
|
||||
}
|
||||
};
|
||||
|
||||
class Function {
|
||||
public:
|
||||
Function(int _start_word, int _end_word);
|
||||
void analyze_prologue(const LinkedObjectFile& file);
|
||||
void find_global_function_defs(LinkedObjectFile& file);
|
||||
void find_method_defs(LinkedObjectFile& file);
|
||||
|
||||
int segment = -1;
|
||||
int start_word = -1;
|
||||
int end_word = -1; // not inclusive, but does include padding.
|
||||
|
||||
FunctionName guessed_name;
|
||||
|
||||
bool suspected_asm = false;
|
||||
|
||||
std::vector<Instruction> instructions;
|
||||
std::vector<BasicBlock> basic_blocks;
|
||||
std::shared_ptr<ControlFlowGraph> cfg = nullptr;
|
||||
|
||||
int prologue_start = -1;
|
||||
int prologue_end = -1;
|
||||
|
||||
int epilogue_start = -1;
|
||||
int epilogue_end = -1;
|
||||
|
||||
std::string warnings;
|
||||
|
||||
struct Prologue {
|
||||
bool decoded = false; // have we removed the prologue from basic blocks?
|
||||
int total_stack_usage = -1;
|
||||
|
||||
// ra/fp are treated differently from other register backups
|
||||
bool ra_backed_up = false;
|
||||
int ra_backup_offset = -1;
|
||||
|
||||
bool fp_backed_up = false;
|
||||
int fp_backup_offset = -1;
|
||||
|
||||
bool fp_set = false;
|
||||
|
||||
int n_gpr_backup = 0;
|
||||
int gpr_backup_offset = -1;
|
||||
|
||||
int n_fpr_backup = 0;
|
||||
int fpr_backup_offset = -1;
|
||||
|
||||
int n_stack_var_bytes = 0;
|
||||
int stack_var_offset = -1;
|
||||
|
||||
bool epilogue_ok = false;
|
||||
|
||||
std::string to_string(int indent = 0) const;
|
||||
|
||||
} prologue;
|
||||
|
||||
bool uses_fp_register = false;
|
||||
|
||||
private:
|
||||
void check_epilogue(const LinkedObjectFile& file);
|
||||
};
|
||||
|
||||
#endif // NEXT_FUNCTION_H
|
853
decompiler/ObjectFile/LinkedObjectFile.cpp
Normal file
853
decompiler/ObjectFile/LinkedObjectFile.cpp
Normal file
@ -0,0 +1,853 @@
|
||||
/*!
|
||||
* @file LinkedObjectFile.cpp
|
||||
* An object file's data with linking information included.
|
||||
*/
|
||||
#include "LinkedObjectFile.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <numeric>
|
||||
#include "decompiler/Disasm/InstructionDecode.h"
|
||||
#include "decompiler/config.h"
|
||||
|
||||
/*!
|
||||
* Set the number of segments in this object file.
|
||||
* This can only be done once, and must be done before adding any words.
|
||||
*/
|
||||
void LinkedObjectFile::set_segment_count(int n_segs) {
|
||||
assert(segments == 0);
|
||||
segments = n_segs;
|
||||
words_by_seg.resize(n_segs);
|
||||
label_per_seg_by_offset.resize(n_segs);
|
||||
offset_of_data_zone_by_seg.resize(n_segs);
|
||||
functions_by_seg.resize(n_segs);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add a single word to the given segment.
|
||||
*/
|
||||
void LinkedObjectFile::push_back_word_to_segment(uint32_t word, int segment) {
|
||||
words_by_seg.at(segment).emplace_back(word);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get a label ID for a label which points to the given offset in the given segment.
|
||||
* Will return an existing label if one exists.
|
||||
*/
|
||||
int LinkedObjectFile::get_label_id_for(int seg, int offset) {
|
||||
auto kv = label_per_seg_by_offset.at(seg).find(offset);
|
||||
if (kv == label_per_seg_by_offset.at(seg).end()) {
|
||||
// create a new label
|
||||
int id = labels.size();
|
||||
Label label;
|
||||
label.target_segment = seg;
|
||||
label.offset = offset;
|
||||
label.name = "L" + std::to_string(id);
|
||||
label_per_seg_by_offset.at(seg)[offset] = id;
|
||||
labels.push_back(label);
|
||||
return id;
|
||||
} else {
|
||||
// return an existing label
|
||||
auto& label = labels.at(kv->second);
|
||||
assert(label.offset == offset);
|
||||
assert(label.target_segment == seg);
|
||||
return kv->second;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the ID of the label which points to the given offset in the given segment.
|
||||
* Returns -1 if there is no label.
|
||||
*/
|
||||
int LinkedObjectFile::get_label_at(int seg, int offset) const {
|
||||
auto kv = label_per_seg_by_offset.at(seg).find(offset);
|
||||
if (kv == label_per_seg_by_offset.at(seg).end()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return kv->second;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Does this label point to code? Can point to the middle of a function, or the start of a function.
|
||||
*/
|
||||
bool LinkedObjectFile::label_points_to_code(int label_id) const {
|
||||
auto& label = labels.at(label_id);
|
||||
auto data_start = int(offset_of_data_zone_by_seg.at(label.target_segment)) * 4;
|
||||
return label.offset < data_start;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the function starting at this label, or error if there is none.
|
||||
*/
|
||||
Function& LinkedObjectFile::get_function_at_label(int label_id) {
|
||||
auto& label = labels.at(label_id);
|
||||
for (auto& func : functions_by_seg.at(label.target_segment)) {
|
||||
// + 4 to skip past type tag to the first word, which is were the label points.
|
||||
if (func.start_word * 4 + 4 == label.offset) {
|
||||
return func;
|
||||
}
|
||||
}
|
||||
|
||||
assert(false);
|
||||
return functions_by_seg.front().front(); // to avoid error
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the name of the label.
|
||||
*/
|
||||
std::string LinkedObjectFile::get_label_name(int label_id) const {
|
||||
return labels.at(label_id).name;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add link information that a word is a pointer to another word.
|
||||
*/
|
||||
bool LinkedObjectFile::pointer_link_word(int source_segment,
|
||||
int source_offset,
|
||||
int dest_segment,
|
||||
int dest_offset) {
|
||||
assert((source_offset % 4) == 0);
|
||||
|
||||
auto& word = words_by_seg.at(source_segment).at(source_offset / 4);
|
||||
assert(word.kind == LinkedWord::PLAIN_DATA);
|
||||
|
||||
if (dest_offset / 4 > (int)words_by_seg.at(dest_segment).size()) {
|
||||
// printf("HACK bad link ignored!\n");
|
||||
return false;
|
||||
}
|
||||
assert(dest_offset / 4 <= (int)words_by_seg.at(dest_segment).size());
|
||||
|
||||
word.kind = LinkedWord::PTR;
|
||||
word.label_id = get_label_id_for(dest_segment, dest_offset);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add link information that a word is linked to a symbol/type/empty list.
|
||||
*/
|
||||
void LinkedObjectFile::symbol_link_word(int source_segment,
|
||||
int source_offset,
|
||||
const char* name,
|
||||
LinkedWord::Kind kind) {
|
||||
assert((source_offset % 4) == 0);
|
||||
auto& word = words_by_seg.at(source_segment).at(source_offset / 4);
|
||||
// assert(word.kind == LinkedWord::PLAIN_DATA);
|
||||
if (word.kind != LinkedWord::PLAIN_DATA) {
|
||||
printf("bad symbol link word\n");
|
||||
}
|
||||
word.kind = kind;
|
||||
word.symbol_name = name;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add link information that a word's lower 16 bits are the offset of the given symbol relative to
|
||||
* the symbol table register.
|
||||
*/
|
||||
void LinkedObjectFile::symbol_link_offset(int source_segment, int source_offset, const char* name) {
|
||||
assert((source_offset % 4) == 0);
|
||||
auto& word = words_by_seg.at(source_segment).at(source_offset / 4);
|
||||
assert(word.kind == LinkedWord::PLAIN_DATA);
|
||||
word.kind = LinkedWord::SYM_OFFSET;
|
||||
word.symbol_name = name;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add link information that a lui/ori pair will load a pointer.
|
||||
*/
|
||||
void LinkedObjectFile::pointer_link_split_word(int source_segment,
|
||||
int source_hi_offset,
|
||||
int source_lo_offset,
|
||||
int dest_segment,
|
||||
int dest_offset) {
|
||||
assert((source_hi_offset % 4) == 0);
|
||||
assert((source_lo_offset % 4) == 0);
|
||||
|
||||
auto& hi_word = words_by_seg.at(source_segment).at(source_hi_offset / 4);
|
||||
auto& lo_word = words_by_seg.at(source_segment).at(source_lo_offset / 4);
|
||||
|
||||
// assert(dest_offset / 4 <= (int)words_by_seg.at(dest_segment).size());
|
||||
assert(hi_word.kind == LinkedWord::PLAIN_DATA);
|
||||
assert(lo_word.kind == LinkedWord::PLAIN_DATA);
|
||||
|
||||
hi_word.kind = LinkedWord::HI_PTR;
|
||||
hi_word.label_id = get_label_id_for(dest_segment, dest_offset);
|
||||
|
||||
lo_word.kind = LinkedWord::LO_PTR;
|
||||
lo_word.label_id = hi_word.label_id;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Rename the labels so they are named L1, L2, ..., in the order of the addresses that they refer
|
||||
* to. Will clear any custom label names.
|
||||
*/
|
||||
uint32_t LinkedObjectFile::set_ordered_label_names() {
|
||||
std::vector<int> indices(labels.size());
|
||||
std::iota(indices.begin(), indices.end(), 0);
|
||||
|
||||
std::sort(indices.begin(), indices.end(), [&](int a, int b) {
|
||||
auto& la = labels.at(a);
|
||||
auto& lb = labels.at(b);
|
||||
if (la.target_segment == lb.target_segment) {
|
||||
return la.offset < lb.offset;
|
||||
}
|
||||
return la.target_segment < lb.target_segment;
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < indices.size(); i++) {
|
||||
auto& label = labels.at(indices[i]);
|
||||
label.name = "L" + std::to_string(i + 1);
|
||||
}
|
||||
|
||||
return labels.size();
|
||||
}
|
||||
|
||||
static const char* segment_names[] = {"main segment", "debug segment", "top-level segment"};
|
||||
|
||||
/*!
|
||||
* Print all the words, with link information and labels.
|
||||
*/
|
||||
std::string LinkedObjectFile::print_words() {
|
||||
std::string result;
|
||||
|
||||
assert(segments <= 3);
|
||||
for (int seg = segments; seg-- > 0;) {
|
||||
// segment header
|
||||
result += ";------------------------------------------\n; ";
|
||||
result += segment_names[seg];
|
||||
result += "\n;------------------------------------------\n";
|
||||
|
||||
// print each word in the segment
|
||||
for (size_t i = 0; i < words_by_seg.at(seg).size(); i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
auto label_id = get_label_at(seg, i * 4 + j);
|
||||
if (label_id != -1) {
|
||||
result += labels.at(label_id).name + ":";
|
||||
if (j != 0) {
|
||||
result += " (offset " + std::to_string(j) + ")";
|
||||
}
|
||||
result += "\n";
|
||||
}
|
||||
}
|
||||
|
||||
auto& word = words_by_seg[seg][i];
|
||||
append_word_to_string(result, word);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add a word's printed representation to the end of a string. Internal helper for print_words.
|
||||
*/
|
||||
void LinkedObjectFile::append_word_to_string(std::string& dest, const LinkedWord& word) const {
|
||||
char buff[128];
|
||||
|
||||
switch (word.kind) {
|
||||
case LinkedWord::PLAIN_DATA:
|
||||
sprintf(buff, " .word 0x%x\n", word.data);
|
||||
break;
|
||||
case LinkedWord::PTR:
|
||||
sprintf(buff, " .word %s\n", labels.at(word.label_id).name.c_str());
|
||||
break;
|
||||
case LinkedWord::SYM_PTR:
|
||||
sprintf(buff, " .symbol %s\n", word.symbol_name.c_str());
|
||||
break;
|
||||
case LinkedWord::TYPE_PTR:
|
||||
sprintf(buff, " .type %s\n", word.symbol_name.c_str());
|
||||
break;
|
||||
case LinkedWord::EMPTY_PTR:
|
||||
sprintf(buff, " .empty-list\n"); // ?
|
||||
break;
|
||||
case LinkedWord::HI_PTR:
|
||||
sprintf(buff, " .ptr-hi 0x%x %s\n", word.data >> 16,
|
||||
labels.at(word.label_id).name.c_str());
|
||||
break;
|
||||
case LinkedWord::LO_PTR:
|
||||
sprintf(buff, " .ptr-lo 0x%x %s\n", word.data >> 16,
|
||||
labels.at(word.label_id).name.c_str());
|
||||
break;
|
||||
case LinkedWord::SYM_OFFSET:
|
||||
sprintf(buff, " .sym-off 0x%x %s\n", word.data >> 16, word.symbol_name.c_str());
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("nyi");
|
||||
}
|
||||
|
||||
dest += buff;
|
||||
}
|
||||
|
||||
/*!
|
||||
* For each segment, determine where the data area starts. Before the data area is the code area.
|
||||
*/
|
||||
void LinkedObjectFile::find_code() {
|
||||
if (segments == 1) {
|
||||
// single segment object files should never have any code.
|
||||
auto& seg = words_by_seg.front();
|
||||
for (auto& word : seg) {
|
||||
if (!word.symbol_name.empty()) {
|
||||
assert(word.symbol_name != "function");
|
||||
}
|
||||
}
|
||||
offset_of_data_zone_by_seg.at(0) = 0;
|
||||
stats.data_bytes = words_by_seg.front().size() * 4;
|
||||
stats.code_bytes = 0;
|
||||
|
||||
} else if (segments == 3) {
|
||||
// V3 object files will have all the functions, then all the static data. So to find the
|
||||
// divider, we look for the last "function" tag, then find the last jr $ra instruction after
|
||||
// that (plus one for delay slot) and assume that after that is data. Additionally, we check to
|
||||
// make sure that there are no "function" type tags in the data section, although this is
|
||||
// redundant.
|
||||
for (int i = 0; i < segments; i++) {
|
||||
// try to find the last reference to "function":
|
||||
bool found_function = false;
|
||||
size_t function_loc = -1;
|
||||
for (size_t j = words_by_seg.at(i).size(); j-- > 0;) {
|
||||
auto& word = words_by_seg.at(i).at(j);
|
||||
if (word.kind == LinkedWord::TYPE_PTR && word.symbol_name == "function") {
|
||||
function_loc = j;
|
||||
found_function = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_function) {
|
||||
// look forward until we find "jr ra"
|
||||
const uint32_t jr_ra = 0x3e00008;
|
||||
bool found_jr_ra = false;
|
||||
size_t jr_ra_loc = -1;
|
||||
|
||||
for (size_t j = function_loc; j < words_by_seg.at(i).size(); j++) {
|
||||
auto& word = words_by_seg.at(i).at(j);
|
||||
if (word.kind == LinkedWord::PLAIN_DATA && word.data == jr_ra) {
|
||||
found_jr_ra = true;
|
||||
jr_ra_loc = j;
|
||||
}
|
||||
}
|
||||
|
||||
assert(found_jr_ra);
|
||||
assert(jr_ra_loc + 1 < words_by_seg.at(i).size());
|
||||
offset_of_data_zone_by_seg.at(i) = jr_ra_loc + 2;
|
||||
|
||||
} else {
|
||||
// no functions
|
||||
offset_of_data_zone_by_seg.at(i) = 0;
|
||||
}
|
||||
|
||||
// add label for debug purposes
|
||||
if (offset_of_data_zone_by_seg.at(i) < words_by_seg.at(i).size()) {
|
||||
auto data_label_id = get_label_id_for(i, 4 * (offset_of_data_zone_by_seg.at(i)));
|
||||
labels.at(data_label_id).name = "L-data-start";
|
||||
}
|
||||
|
||||
// verify there are no functions after the data section starts
|
||||
for (size_t j = offset_of_data_zone_by_seg.at(i); j < words_by_seg.at(i).size(); j++) {
|
||||
auto& word = words_by_seg.at(i).at(j);
|
||||
if (word.kind == LinkedWord::TYPE_PTR && word.symbol_name == "function") {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
// sizes:
|
||||
stats.data_bytes += 4 * (words_by_seg.at(i).size() - offset_of_data_zone_by_seg.at(i)) * 4;
|
||||
stats.code_bytes += 4 * offset_of_data_zone_by_seg.at(i);
|
||||
}
|
||||
} else {
|
||||
// for files which we couldn't extract link data yet, they will have 0 segments and its ok.
|
||||
assert(segments == 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Find all the functions in each segment.
|
||||
*/
|
||||
void LinkedObjectFile::find_functions() {
|
||||
if (segments == 1) {
|
||||
// it's a v2 file, shouldn't have any functions
|
||||
assert(offset_of_data_zone_by_seg.at(0) == 0);
|
||||
} else {
|
||||
// we assume functions don't have any data in between them, so we use the "function" type tag to
|
||||
// mark the end of the previous function and the start of the next. This means that some
|
||||
// functions will have a few 0x0 words after then for padding (GOAL functions are aligned), but
|
||||
// this is something that the disassembler should handle.
|
||||
for (int seg = 0; seg < segments; seg++) {
|
||||
// start at the end and work backward...
|
||||
int function_end = offset_of_data_zone_by_seg.at(seg);
|
||||
while (function_end > 0) {
|
||||
// back up until we find function type tag
|
||||
int function_tag_loc = function_end;
|
||||
bool found_function_tag_loc = false;
|
||||
for (; function_tag_loc-- > 0;) {
|
||||
auto& word = words_by_seg.at(seg).at(function_tag_loc);
|
||||
if (word.kind == LinkedWord::TYPE_PTR && word.symbol_name == "function") {
|
||||
found_function_tag_loc = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// mark this as a function, and try again from the current function start
|
||||
assert(found_function_tag_loc);
|
||||
stats.function_count++;
|
||||
functions_by_seg.at(seg).emplace_back(function_tag_loc, function_end);
|
||||
function_end = function_tag_loc;
|
||||
}
|
||||
|
||||
std::reverse(functions_by_seg.at(seg).begin(), functions_by_seg.at(seg).end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Run the disassembler on all functions.
|
||||
*/
|
||||
void LinkedObjectFile::disassemble_functions() {
|
||||
for (int seg = 0; seg < segments; seg++) {
|
||||
for (auto& function : functions_by_seg.at(seg)) {
|
||||
for (auto word = function.start_word; word < function.end_word; word++) {
|
||||
// decode!
|
||||
function.instructions.push_back(
|
||||
decode_instruction(words_by_seg.at(seg).at(word), *this, seg, word));
|
||||
if (function.instructions.back().is_valid()) {
|
||||
stats.decoded_ops++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Analyze disassembly for use of the FP register, and add labels for fp-relative data access
|
||||
*/
|
||||
void LinkedObjectFile::process_fp_relative_links() {
|
||||
for (int seg = 0; seg < segments; seg++) {
|
||||
for (auto& function : functions_by_seg.at(seg)) {
|
||||
for (size_t instr_idx = 0; instr_idx < function.instructions.size(); instr_idx++) {
|
||||
// we possibly need to look at three instructions
|
||||
auto& instr = function.instructions[instr_idx];
|
||||
auto* prev_instr = (instr_idx > 0) ? &function.instructions[instr_idx - 1] : nullptr;
|
||||
auto* pprev_instr = (instr_idx > 1) ? &function.instructions[instr_idx - 2] : nullptr;
|
||||
|
||||
// ignore storing FP onto the stack
|
||||
if ((instr.kind == InstructionKind::SD || instr.kind == InstructionKind::SQ) &&
|
||||
instr.get_src(0).get_reg() == Register(Reg::GPR, Reg::FP)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// HACKs
|
||||
if (instr.kind == InstructionKind::PEXTLW) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// search over instruction sources
|
||||
for (int i = 0; i < instr.n_src; i++) {
|
||||
auto& src = instr.src[i];
|
||||
if (src.kind == InstructionAtom::REGISTER // must be reg
|
||||
&& src.get_reg().get_kind() == Reg::GPR // gpr
|
||||
&& src.get_reg().get_gpr() == Reg::FP) { // fp reg.
|
||||
|
||||
stats.n_fp_reg_use++;
|
||||
|
||||
// offset of fp at this instruction.
|
||||
int current_fp = 4 * (function.start_word + 1);
|
||||
function.uses_fp_register = true;
|
||||
|
||||
switch (instr.kind) {
|
||||
// fp-relative load
|
||||
case InstructionKind::LW:
|
||||
case InstructionKind::LWC1:
|
||||
case InstructionKind::LD:
|
||||
// generate pointer to fp-relative data
|
||||
case InstructionKind::DADDIU: {
|
||||
auto& atom = instr.get_imm_src();
|
||||
atom.set_label(get_label_id_for(seg, current_fp + atom.get_imm()));
|
||||
stats.n_fp_reg_use_resolved++;
|
||||
} break;
|
||||
|
||||
// in the case that addiu doesn't have enough range (+/- 2^15), GOAL has two
|
||||
// strategies: 1). use ori + daddu (ori doesn't sign extend, so this lets us go +2^16,
|
||||
// -0) 2). use lui + ori + daddu (can reach anywhere in the address space) It seems
|
||||
// that addu is used to get pointers to floating point values and daddu is used in
|
||||
// other cases. Also, the position of the fp register is swapped between the two.
|
||||
case InstructionKind::DADDU:
|
||||
case InstructionKind::ADDU: {
|
||||
assert(prev_instr);
|
||||
assert(prev_instr->kind == InstructionKind::ORI);
|
||||
int offset_reg_src_id = instr.kind == InstructionKind::DADDU ? 0 : 1;
|
||||
auto offset_reg = instr.get_src(offset_reg_src_id).get_reg();
|
||||
assert(offset_reg == prev_instr->get_dst(0).get_reg());
|
||||
assert(offset_reg == prev_instr->get_src(0).get_reg());
|
||||
auto& atom = prev_instr->get_imm_src();
|
||||
int additional_offset = 0;
|
||||
if (pprev_instr && pprev_instr->kind == InstructionKind::LUI) {
|
||||
assert(pprev_instr->get_dst(0).get_reg() == offset_reg);
|
||||
additional_offset = (1 << 16) * pprev_instr->get_imm_src().get_imm();
|
||||
}
|
||||
atom.set_label(
|
||||
get_label_id_for(seg, current_fp + atom.get_imm() + additional_offset));
|
||||
stats.n_fp_reg_use_resolved++;
|
||||
} break;
|
||||
|
||||
default:
|
||||
printf("unknown fp using op: %s\n", instr.to_string(*this).c_str());
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Print disassembled functions and data segments.
|
||||
*/
|
||||
std::string LinkedObjectFile::print_disassembly() {
|
||||
bool write_hex = get_config().write_hex_near_instructions;
|
||||
std::string result;
|
||||
|
||||
assert(segments <= 3);
|
||||
for (int seg = segments; seg-- > 0;) {
|
||||
// segment header
|
||||
result += ";------------------------------------------\n; ";
|
||||
result += segment_names[seg];
|
||||
result += "\n;------------------------------------------\n\n";
|
||||
|
||||
// functions
|
||||
for (auto& func : functions_by_seg.at(seg)) {
|
||||
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
|
||||
result += "; .function " + func.guessed_name.to_string() + "\n";
|
||||
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
|
||||
result += func.prologue.to_string(2) + "\n";
|
||||
if(!func.warnings.empty()) {
|
||||
result += "Warnings: " + func.warnings + "\n";
|
||||
}
|
||||
|
||||
// print each instruction in the function.
|
||||
bool in_delay_slot = false;
|
||||
|
||||
for (int i = 1; i < func.end_word - func.start_word; i++) {
|
||||
auto label_id = get_label_at(seg, (func.start_word + i) * 4);
|
||||
if (label_id != -1) {
|
||||
result += labels.at(label_id).name + ":\n";
|
||||
}
|
||||
|
||||
for (int j = 1; j < 4; j++) {
|
||||
// assert(get_label_at(seg, (func.start_word + i)*4 + j) == -1);
|
||||
if (get_label_at(seg, (func.start_word + i) * 4 + j) != -1) {
|
||||
result += "BAD OFFSET LABEL: ";
|
||||
result += labels.at(get_label_at(seg, (func.start_word + i) * 4 + j)).name + "\n";
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
auto& instr = func.instructions.at(i);
|
||||
std::string line = " " + instr.to_string(*this);
|
||||
|
||||
if (write_hex) {
|
||||
if (line.length() < 60) {
|
||||
line.append(60 - line.length(), ' ');
|
||||
}
|
||||
result += line;
|
||||
result += " ;;";
|
||||
auto& word = words_by_seg[seg].at(func.start_word + i);
|
||||
append_word_to_string(result, word);
|
||||
} else {
|
||||
result += line + "\n";
|
||||
}
|
||||
|
||||
if (in_delay_slot) {
|
||||
result += "\n";
|
||||
in_delay_slot = false;
|
||||
}
|
||||
|
||||
if (gOpcodeInfo[(int)instr.kind].has_delay_slot) {
|
||||
in_delay_slot = true;
|
||||
}
|
||||
}
|
||||
result += "\n";
|
||||
//
|
||||
// int bid = 0;
|
||||
// for(auto& bblock : func.basic_blocks) {
|
||||
// result += "BLOCK " + std::to_string(bid++)+ "\n";
|
||||
// for(int i = bblock.start_word; i < bblock.end_word; i++) {
|
||||
// if(i >= 0 && i < func.instructions.size()) {
|
||||
// result += func.instructions.at(i).to_string(*this) + "\n";
|
||||
// } else {
|
||||
// result += "BAD BBLOCK INSTR ID " + std::to_string(i);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// hack
|
||||
if(func.cfg && !func.cfg->is_fully_resolved()) {
|
||||
result += func.cfg->to_dot();
|
||||
result += "\n";
|
||||
}
|
||||
if(func.cfg) {
|
||||
result += func.cfg->to_form_string() + "\n";
|
||||
|
||||
// To debug block stuff.
|
||||
/*
|
||||
int bid = 0;
|
||||
for(auto& block : func.basic_blocks) {
|
||||
in_delay_slot = false;
|
||||
result += "B" + std::to_string(bid++) + "\n";
|
||||
for(auto i = block.start_word; i < block.end_word; i++) {
|
||||
auto label_id = get_label_at(seg, (func.start_word + i) * 4);
|
||||
if (label_id != -1) {
|
||||
result += labels.at(label_id).name + ":\n";
|
||||
}
|
||||
auto& instr = func.instructions.at(i);
|
||||
result += " " + instr.to_string(*this) + "\n";
|
||||
if (in_delay_slot) {
|
||||
result += "\n";
|
||||
in_delay_slot = false;
|
||||
}
|
||||
|
||||
if (gOpcodeInfo[(int)instr.kind].has_delay_slot) {
|
||||
in_delay_slot = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
result += "\n\n\n";
|
||||
}
|
||||
|
||||
// print data
|
||||
for (size_t i = offset_of_data_zone_by_seg.at(seg); i < words_by_seg.at(seg).size(); i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
auto label_id = get_label_at(seg, i * 4 + j);
|
||||
if (label_id != -1) {
|
||||
result += labels.at(label_id).name + ":";
|
||||
if (j != 0) {
|
||||
result += " (offset " + std::to_string(j) + ")";
|
||||
}
|
||||
result += "\n";
|
||||
}
|
||||
}
|
||||
|
||||
auto& word = words_by_seg[seg][i];
|
||||
append_word_to_string(result, word);
|
||||
|
||||
if (word.kind == LinkedWord::TYPE_PTR && word.symbol_name == "string") {
|
||||
result += "; " + get_goal_string(seg, i) + "\n";
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Hacky way to get a GOAL string object
|
||||
*/
|
||||
std::string LinkedObjectFile::get_goal_string(int seg, int word_idx) {
|
||||
std::string result = "\"";
|
||||
// next should be the size
|
||||
if (word_idx + 1 >= int(words_by_seg[seg].size())) {
|
||||
return "invalid string!\n";
|
||||
}
|
||||
LinkedWord& size_word = words_by_seg[seg].at(word_idx + 1);
|
||||
if (size_word.kind != LinkedWord::PLAIN_DATA) {
|
||||
// sometimes an array of string pointer triggers this!
|
||||
return "invalid string!\n";
|
||||
}
|
||||
|
||||
// result += "(size " + std::to_string(size_word.data) + "): ";
|
||||
// now characters...
|
||||
for (size_t i = 0; i < size_word.data; i++) {
|
||||
int word_offset = word_idx + 2 + (i / 4);
|
||||
int byte_offset = i % 4;
|
||||
auto& word = words_by_seg[seg].at(word_offset);
|
||||
if (word.kind != LinkedWord::PLAIN_DATA) {
|
||||
return "invalid string! (check me!)\n";
|
||||
}
|
||||
char cword[4];
|
||||
memcpy(cword, &word.data, 4);
|
||||
result += cword[byte_offset];
|
||||
}
|
||||
return result + "\"";
|
||||
}
|
||||
|
||||
/*!
|
||||
* Return true if the object file contains any functions at all.
|
||||
*/
|
||||
bool LinkedObjectFile::has_any_functions() {
|
||||
for (auto& fv : functions_by_seg) {
|
||||
if (!fv.empty())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Print all scripts in this file.
|
||||
*/
|
||||
std::string LinkedObjectFile::print_scripts() {
|
||||
std::string result;
|
||||
for (int seg = 0; seg < segments; seg++) {
|
||||
std::vector<bool> already_printed(words_by_seg[seg].size(), false);
|
||||
|
||||
// the linked list layout algorithm of GOAL puts the first pair first.
|
||||
// so we want to go in forward order to catch the beginning correctly
|
||||
for (size_t word_idx = 0; word_idx < words_by_seg[seg].size(); word_idx++) {
|
||||
// don't print parts of scripts we've already seen
|
||||
// (note that scripts could share contents, which is supported, this is just for starting
|
||||
// off a script print)
|
||||
if (already_printed[word_idx])
|
||||
continue;
|
||||
|
||||
// check for linked list by looking for anything that accesses this as a pair (offset of 2)
|
||||
auto label_id = get_label_at(seg, 4 * word_idx + 2);
|
||||
if (label_id != -1) {
|
||||
auto& label = labels.at(label_id);
|
||||
if ((label.offset & 7) == 2) {
|
||||
result += to_form_script(seg, word_idx, already_printed)->toStringPretty(0, 100) + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Is the object pointed to the empty list?
|
||||
*/
|
||||
bool LinkedObjectFile::is_empty_list(int seg, int byte_idx) {
|
||||
assert((byte_idx % 4) == 0);
|
||||
auto& word = words_by_seg.at(seg).at(byte_idx / 4);
|
||||
return word.kind == LinkedWord::EMPTY_PTR;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert a linked list to a Form for easy printing.
|
||||
* Note : this takes the address of the car of the pair. which is perhaps a bit confusing
|
||||
* (in GOAL, this would be (&-> obj car))
|
||||
*/
|
||||
std::shared_ptr<Form> LinkedObjectFile::to_form_script(int seg,
|
||||
int word_idx,
|
||||
std::vector<bool>& seen) {
|
||||
// the object to currently print. to start off, create pair from the car address we've been given.
|
||||
int goal_print_obj = word_idx * 4 + 2;
|
||||
|
||||
// resulting form. we can't have a totally empty list (as an empty list looks like a symbol,
|
||||
// so it wouldn't be flagged), so it's safe to make this a pair.
|
||||
auto result = std::make_shared<Form>();
|
||||
result->kind = FormKind::PAIR;
|
||||
|
||||
// the current pair to fill out.
|
||||
auto fill = result;
|
||||
|
||||
// loop until we run out of things to add
|
||||
for (;;) {
|
||||
// check the thing to print is a a pair.
|
||||
if ((goal_print_obj & 7) == 2) {
|
||||
// first convert the car (again, with (&-> obj car))
|
||||
fill->pair[0] = to_form_script_object(seg, goal_print_obj - 2, seen);
|
||||
seen.at(goal_print_obj / 4) = true;
|
||||
|
||||
auto cdr_addr = goal_print_obj + 2;
|
||||
|
||||
if (is_empty_list(seg, cdr_addr)) {
|
||||
// the list has ended!
|
||||
fill->pair[1] = gSymbolTable.getEmptyPair();
|
||||
return result;
|
||||
} else {
|
||||
// cdr object should be aligned.
|
||||
assert((cdr_addr % 4) == 0);
|
||||
auto& cdr_word = words_by_seg.at(seg).at(cdr_addr / 4);
|
||||
// check for proper list
|
||||
if (cdr_word.kind == LinkedWord::PTR && (labels.at(cdr_word.label_id).offset & 7) == 2) {
|
||||
// yes, proper list. add another pair and link it in to the list.
|
||||
goal_print_obj = labels.at(cdr_word.label_id).offset;
|
||||
fill->pair[1] = std::make_shared<Form>();
|
||||
fill->pair[1]->kind = FormKind::PAIR;
|
||||
fill = fill->pair[1];
|
||||
} else {
|
||||
// improper list, put the last thing in and end
|
||||
fill->pair[1] = to_form_script_object(seg, cdr_addr, seen);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// improper list, should be impossible to get here because of earlier checks
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Is the thing pointed to a string?
|
||||
*/
|
||||
bool LinkedObjectFile::is_string(int seg, int byte_idx) {
|
||||
if (byte_idx % 4) {
|
||||
return false; // must be aligned pointer.
|
||||
}
|
||||
int type_tag_ptr = byte_idx - 4;
|
||||
// must fit in segment
|
||||
if (type_tag_ptr < 0 || size_t(type_tag_ptr) >= words_by_seg.at(seg).size() * 4) {
|
||||
return false;
|
||||
}
|
||||
auto& type_word = words_by_seg.at(seg).at(type_tag_ptr / 4);
|
||||
return type_word.kind == LinkedWord::TYPE_PTR && type_word.symbol_name == "string";
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert a (pointer object) to some nice representation.
|
||||
*/
|
||||
std::shared_ptr<Form> LinkedObjectFile::to_form_script_object(int seg,
|
||||
int byte_idx,
|
||||
std::vector<bool>& seen) {
|
||||
std::shared_ptr<Form> result;
|
||||
|
||||
switch (byte_idx & 7) {
|
||||
case 0:
|
||||
case 4: {
|
||||
auto& word = words_by_seg.at(seg).at(byte_idx / 4);
|
||||
if (word.kind == LinkedWord::SYM_PTR) {
|
||||
// .symbol xxxx
|
||||
result = toForm(word.symbol_name);
|
||||
} else if (word.kind == LinkedWord::PLAIN_DATA) {
|
||||
// .word xxxxx
|
||||
result = toForm(std::to_string(word.data));
|
||||
} else if (word.kind == LinkedWord::PTR) {
|
||||
// might be a sub-list, or some other random pointer
|
||||
auto offset = labels.at(word.label_id).offset;
|
||||
if ((offset & 7) == 2) {
|
||||
// list!
|
||||
result = to_form_script(seg, offset / 4, seen);
|
||||
} else {
|
||||
if (is_string(seg, offset)) {
|
||||
result = toForm(get_goal_string(seg, offset / 4 - 1));
|
||||
} else {
|
||||
// some random pointer, just print the label.
|
||||
result = toForm(labels.at(word.label_id).name);
|
||||
}
|
||||
}
|
||||
} else if (word.kind == LinkedWord::EMPTY_PTR) {
|
||||
result = gSymbolTable.getEmptyPair();
|
||||
} else {
|
||||
std::string debug;
|
||||
append_word_to_string(debug, word);
|
||||
printf("don't know how to print %s\n", debug.c_str());
|
||||
assert(false);
|
||||
}
|
||||
} break;
|
||||
|
||||
case 2: // bad, a pair snuck through.
|
||||
default:
|
||||
// pointers should be aligned!
|
||||
printf("align %d\n", byte_idx & 7);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
131
decompiler/ObjectFile/LinkedObjectFile.h
Normal file
131
decompiler/ObjectFile/LinkedObjectFile.h
Normal file
@ -0,0 +1,131 @@
|
||||
/*!
|
||||
* @file LinkedObjectFile.h
|
||||
* An object file's data with linking information included.
|
||||
*/
|
||||
|
||||
#ifndef NEXT_LINKEDOBJECTFILE_H
|
||||
#define NEXT_LINKEDOBJECTFILE_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include "LinkedWord.h"
|
||||
#include "decompiler/Function/Function.h"
|
||||
#include "decompiler/util/LispPrint.h"
|
||||
|
||||
|
||||
/*!
|
||||
* A label to a location in this object file.
|
||||
* Doesn't have to be word aligned.
|
||||
*/
|
||||
struct Label {
|
||||
std::string name;
|
||||
int target_segment;
|
||||
int offset; // in bytes
|
||||
};
|
||||
|
||||
/*!
|
||||
* An object file's data with linking information included.
|
||||
*/
|
||||
class LinkedObjectFile {
|
||||
public:
|
||||
LinkedObjectFile() = default;
|
||||
void set_segment_count(int n_segs);
|
||||
void push_back_word_to_segment(uint32_t word, int segment);
|
||||
int get_label_id_for(int seg, int offset);
|
||||
int get_label_at(int seg, int offset) const;
|
||||
bool label_points_to_code(int label_id) const;
|
||||
bool pointer_link_word(int source_segment, int source_offset, int dest_segment, int dest_offset);
|
||||
void pointer_link_split_word(int source_segment, int source_hi_offset, int source_lo_offset, int dest_segment, int dest_offset);
|
||||
void symbol_link_word(int source_segment, int source_offset, const char* name, LinkedWord::Kind kind);
|
||||
void symbol_link_offset(int source_segment, int source_offset, const char* name);
|
||||
Function& get_function_at_label(int label_id);
|
||||
std::string get_label_name(int label_id) const;
|
||||
uint32_t set_ordered_label_names();
|
||||
void find_code();
|
||||
std::string print_words();
|
||||
void find_functions();
|
||||
void disassemble_functions();
|
||||
void process_fp_relative_links();
|
||||
std::string print_scripts();
|
||||
std::string print_disassembly();
|
||||
bool has_any_functions();
|
||||
void append_word_to_string(std::string& dest, const LinkedWord& word) const;
|
||||
|
||||
struct Stats {
|
||||
uint32_t total_code_bytes = 0;
|
||||
uint32_t total_v2_code_bytes = 0;
|
||||
uint32_t total_v2_pointers = 0;
|
||||
uint32_t total_v2_pointer_seeks = 0;
|
||||
uint32_t total_v2_link_bytes = 0;
|
||||
uint32_t total_v2_symbol_links = 0;
|
||||
uint32_t total_v2_symbol_count = 0;
|
||||
|
||||
uint32_t v3_code_bytes = 0;
|
||||
uint32_t v3_pointers = 0;
|
||||
uint32_t v3_split_pointers = 0;
|
||||
uint32_t v3_word_pointers = 0;
|
||||
uint32_t v3_pointer_seeks = 0;
|
||||
uint32_t v3_link_bytes = 0;
|
||||
|
||||
uint32_t v3_symbol_count = 0;
|
||||
uint32_t v3_symbol_link_offset = 0;
|
||||
uint32_t v3_symbol_link_word = 0;
|
||||
|
||||
uint32_t data_bytes = 0;
|
||||
uint32_t code_bytes = 0;
|
||||
|
||||
uint32_t function_count = 0;
|
||||
uint32_t decoded_ops = 0;
|
||||
|
||||
uint32_t n_fp_reg_use = 0;
|
||||
uint32_t n_fp_reg_use_resolved = 0;
|
||||
|
||||
|
||||
void add(const Stats& other) {
|
||||
total_code_bytes += other.total_code_bytes;
|
||||
total_v2_code_bytes += other.total_v2_code_bytes;
|
||||
total_v2_pointers += other.total_v2_pointers;
|
||||
total_v2_pointer_seeks += other.total_v2_pointer_seeks;
|
||||
total_v2_link_bytes += other.total_v2_link_bytes;
|
||||
total_v2_symbol_links += other.total_v2_symbol_links;
|
||||
total_v2_symbol_count += other.total_v2_symbol_count;
|
||||
v3_code_bytes += other.v3_code_bytes;
|
||||
v3_pointers += other.v3_pointers;
|
||||
v3_pointer_seeks += other.v3_pointer_seeks;
|
||||
v3_link_bytes += other.v3_link_bytes;
|
||||
v3_word_pointers += other.v3_word_pointers;
|
||||
v3_split_pointers += other.v3_split_pointers;
|
||||
v3_symbol_count += other.v3_symbol_count;
|
||||
v3_symbol_link_offset += other.v3_symbol_link_offset;
|
||||
v3_symbol_link_word += other.v3_symbol_link_word;
|
||||
data_bytes += other.data_bytes;
|
||||
code_bytes += other.code_bytes;
|
||||
function_count += other.function_count;
|
||||
decoded_ops += other.decoded_ops;
|
||||
n_fp_reg_use += other.n_fp_reg_use;
|
||||
n_fp_reg_use_resolved += other.n_fp_reg_use_resolved;
|
||||
}
|
||||
} stats;
|
||||
|
||||
int segments = 0;
|
||||
std::vector<std::vector<LinkedWord>> words_by_seg;
|
||||
std::vector<uint32_t> offset_of_data_zone_by_seg;
|
||||
std::vector<std::vector<Function>> functions_by_seg;
|
||||
std::vector<Label> labels;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Form> to_form_script(int seg, int word_idx, std::vector<bool>& seen);
|
||||
std::shared_ptr<Form> to_form_script_object(int seg, int byte_idx, std::vector<bool> &seen);
|
||||
bool is_empty_list(int seg, int byte_idx);
|
||||
bool is_string(int seg, int byte_idx);
|
||||
std::string get_goal_string(int seg, int word_idx);
|
||||
|
||||
std::vector<std::unordered_map<int, int>> label_per_seg_by_offset;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif //NEXT_LINKEDOBJECTFILE_H
|
797
decompiler/ObjectFile/LinkedObjectFileCreation.cpp
Normal file
797
decompiler/ObjectFile/LinkedObjectFileCreation.cpp
Normal file
@ -0,0 +1,797 @@
|
||||
/*!
|
||||
* @file LinkedObjectFileCreation.cpp
|
||||
* Create a LinkedObjectFile from raw object file data.
|
||||
* This implements a decoder for the GOAL linking format.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include "LinkedObjectFileCreation.h"
|
||||
#include "decompiler/config.h"
|
||||
#include "decompiler/TypeSystem/TypeInfo.h"
|
||||
|
||||
// There are three link versions:
|
||||
// V2 - not really in use anymore, but V4 will resue logic from it (and the game didn't rename the
|
||||
// functions) V3 - optimized for code and small stuff. Supports segments (main, debug, top-level) V4
|
||||
// - optimized for data (never code) and big stuff, special optimization possible for large V4
|
||||
// objects at the end of DGO.
|
||||
// internally V4 is really just a V2, but with the link data coming after the object data.
|
||||
// there's a V4 header at the beginning, the object data, and then a V2 header and V2 link data.
|
||||
|
||||
// Header for link data used for V2, V3, V4 objects. For V3/V4, this is found at the beginning of
|
||||
// the object data.
|
||||
struct LinkHeaderCommon {
|
||||
uint32_t type_tag; // for the basic offset, is 0 or -1 depending on version
|
||||
uint32_t length; // different exact meanings, but length of the link data.
|
||||
uint16_t version; // what version (2, 3, 4)
|
||||
};
|
||||
|
||||
// Header for link data used for V2 linking data
|
||||
struct LinkHeaderV2 {
|
||||
uint32_t type_tag; // always -1
|
||||
uint32_t length; // length of link data
|
||||
uint32_t version; // always 2
|
||||
};
|
||||
|
||||
// Header for link data used for V4
|
||||
struct LinkHeaderV4 {
|
||||
uint32_t type_tag; // always -1
|
||||
uint32_t length; // length of V2 link data found after object.
|
||||
uint32_t version; // always 4
|
||||
uint32_t code_size; // length of object data before link data starts
|
||||
};
|
||||
|
||||
// Per-segment info for V3 and V5 link data
|
||||
struct SegmentInfo {
|
||||
uint32_t relocs; // offset of relocation table
|
||||
uint32_t data; // offset of segment data
|
||||
uint32_t size; // segment data size (0 if segment doesn't exist)
|
||||
uint32_t magic; // always 0
|
||||
};
|
||||
|
||||
struct LinkHeaderV3 {
|
||||
uint32_t type_tag; // always 0
|
||||
uint32_t length; // length of link data
|
||||
uint32_t version; // always 3
|
||||
uint32_t segments; // always 3
|
||||
char name[64]; // name of object file
|
||||
SegmentInfo segment_info[3];
|
||||
};
|
||||
|
||||
struct LinkHeaderV5 {
|
||||
uint32_t type_tag; // 0 always 0?
|
||||
uint32_t length_to_get_to_code; // 4 length.. of link data?
|
||||
uint16_t version; // 8
|
||||
uint16_t unknown; // 10
|
||||
uint32_t pad; // 12
|
||||
uint32_t link_length; // 16
|
||||
uint8_t n_segments; // 20
|
||||
char name[59]; // 21 (really??)
|
||||
SegmentInfo segment_info[3];
|
||||
};
|
||||
|
||||
// The types of symbol links
|
||||
enum class SymbolLinkKind {
|
||||
EMPTY_LIST, // link to the empty list
|
||||
TYPE, // link to a type
|
||||
SYMBOL // link to a symbol
|
||||
};
|
||||
|
||||
/*!
|
||||
* Handle symbol links for a single symbol in a V2/V4 object file.
|
||||
*/
|
||||
static uint32_t c_symlink2(LinkedObjectFile& f,
|
||||
const std::vector<uint8_t>& data,
|
||||
uint32_t code_ptr_offset,
|
||||
uint32_t link_ptr_offset,
|
||||
SymbolLinkKind kind,
|
||||
const char* name,
|
||||
int seg_id) {
|
||||
get_type_info().inform_symbol_with_no_type_info(name);
|
||||
auto initial_offset = code_ptr_offset;
|
||||
do {
|
||||
auto table_value = data.at(link_ptr_offset);
|
||||
const uint8_t* relocPtr = &data.at(link_ptr_offset);
|
||||
|
||||
// link table has a series of variable-length-encoded integers indicating the seek amount to hit
|
||||
// each reference to the symbol. It ends when the seek is 0, and all references to this symbol
|
||||
// have been patched.
|
||||
uint32_t seek = table_value;
|
||||
uint32_t next_reloc = link_ptr_offset + 1;
|
||||
|
||||
if (seek & 3) {
|
||||
seek = (relocPtr[1] << 8) | table_value;
|
||||
next_reloc = link_ptr_offset + 2;
|
||||
if (seek & 2) {
|
||||
seek = (relocPtr[2] << 16) | seek;
|
||||
next_reloc = link_ptr_offset + 3;
|
||||
if (seek & 1) {
|
||||
seek = (relocPtr[3] << 24) | seek;
|
||||
next_reloc = link_ptr_offset + 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f.stats.total_v2_symbol_links++;
|
||||
link_ptr_offset = next_reloc;
|
||||
|
||||
code_ptr_offset += (seek & 0xfffffffc);
|
||||
|
||||
// the value of the code gives us more information
|
||||
uint32_t code_value = *(const uint32_t*)(&data.at(code_ptr_offset));
|
||||
if (code_value == 0xffffffff) {
|
||||
// absolute link - replace entire word with a pointer.
|
||||
LinkedWord::Kind word_kind;
|
||||
switch (kind) {
|
||||
case SymbolLinkKind::SYMBOL:
|
||||
word_kind = LinkedWord::SYM_PTR;
|
||||
break;
|
||||
case SymbolLinkKind::EMPTY_LIST:
|
||||
word_kind = LinkedWord::EMPTY_PTR;
|
||||
break;
|
||||
case SymbolLinkKind::TYPE:
|
||||
get_type_info().inform_type(name);
|
||||
word_kind = LinkedWord::TYPE_PTR;
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("unhandled SymbolLinkKind");
|
||||
}
|
||||
|
||||
f.symbol_link_word(seg_id, code_ptr_offset - initial_offset, name, word_kind);
|
||||
} else {
|
||||
// offset link - replace lower 16 bits with symbol table offset.
|
||||
|
||||
assert((code_value & 0xffff) == 0 || (code_value & 0xffff) == 0xffff);
|
||||
assert(kind == SymbolLinkKind::SYMBOL);
|
||||
// assert(false); // this case does not occur in V2/V4. It does in V3.
|
||||
f.symbol_link_offset(seg_id, code_ptr_offset - initial_offset, name);
|
||||
}
|
||||
|
||||
} while (data.at(link_ptr_offset));
|
||||
|
||||
// seek past terminating 0.
|
||||
return link_ptr_offset + 1;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Handle symbol links for a single symbol in a V3 object file.
|
||||
*/
|
||||
static uint32_t c_symlink3(LinkedObjectFile& f,
|
||||
const std::vector<uint8_t>& data,
|
||||
uint32_t code_ptr,
|
||||
uint32_t link_ptr,
|
||||
SymbolLinkKind kind,
|
||||
const char* name,
|
||||
int seg) {
|
||||
get_type_info().inform_symbol_with_no_type_info(name);
|
||||
auto initial_offset = code_ptr;
|
||||
do {
|
||||
// seek, with a variable length encoding that sucks.
|
||||
uint8_t c;
|
||||
do {
|
||||
c = data.at(link_ptr);
|
||||
link_ptr++;
|
||||
code_ptr += c * 4;
|
||||
} while (c == 0xff);
|
||||
|
||||
// identical logic to symlink 2
|
||||
uint32_t code_value = *(const uint32_t*)(&data.at(code_ptr));
|
||||
if (code_value == 0xffffffff) {
|
||||
f.stats.v3_symbol_link_word++;
|
||||
LinkedWord::Kind word_kind;
|
||||
switch (kind) {
|
||||
case SymbolLinkKind::SYMBOL:
|
||||
word_kind = LinkedWord::SYM_PTR;
|
||||
break;
|
||||
case SymbolLinkKind::EMPTY_LIST:
|
||||
word_kind = LinkedWord::EMPTY_PTR;
|
||||
break;
|
||||
case SymbolLinkKind::TYPE:
|
||||
get_type_info().inform_type(name);
|
||||
word_kind = LinkedWord::TYPE_PTR;
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("unhandled SymbolLinkKind");
|
||||
}
|
||||
|
||||
f.symbol_link_word(seg, code_ptr - initial_offset, name, word_kind);
|
||||
} else {
|
||||
f.stats.v3_symbol_link_offset++;
|
||||
assert(kind == SymbolLinkKind::SYMBOL);
|
||||
f.symbol_link_offset(seg, code_ptr - initial_offset, name);
|
||||
}
|
||||
|
||||
} while (data.at(link_ptr));
|
||||
return link_ptr + 1;
|
||||
}
|
||||
|
||||
static uint32_t align64(uint32_t in) {
|
||||
return (in + 63) & (~63);
|
||||
}
|
||||
|
||||
static uint32_t align16(uint32_t in) {
|
||||
return (in + 15) & (~15);
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
* Process link data for a "V4" object file.
|
||||
* In reality a V4 seems to be just a V2 object, but with the link data after the real data.
|
||||
* There's a V4 header at the very beginning, but another V2 header/link data at the end
|
||||
* -----------------------------------------------
|
||||
* | V4 header | data | V2 header | V2 link data |
|
||||
* -----------------------------------------------
|
||||
*/
|
||||
static void link_v4(LinkedObjectFile& f,
|
||||
const std::vector<uint8_t>& data,
|
||||
const std::string& name) {
|
||||
// read the V4 header to find where the link data really is
|
||||
const auto* header = (const LinkHeaderV4*)&data.at(0);
|
||||
uint32_t link_data_offset = header->code_size + sizeof(LinkHeaderV4); // no basic offset
|
||||
|
||||
// code starts immediately after the header
|
||||
uint32_t code_offset = sizeof(LinkHeaderV4);
|
||||
uint32_t code_size = header->code_size;
|
||||
|
||||
f.stats.total_code_bytes += code_size;
|
||||
f.stats.total_v2_code_bytes += code_size;
|
||||
|
||||
// add all code
|
||||
const uint8_t* code_start = &data.at(code_offset);
|
||||
const uint8_t* code_end =
|
||||
&data.at(code_offset + code_size); // safe because link data is after code.
|
||||
assert(((code_end - code_start) % 4) == 0);
|
||||
f.set_segment_count(1);
|
||||
for (auto x = code_start; x < code_end; x += 4) {
|
||||
f.push_back_word_to_segment(*((const uint32_t*)x), 0);
|
||||
}
|
||||
|
||||
// read v2 header after the code
|
||||
const uint8_t* link_data = &data.at(link_data_offset);
|
||||
const auto* link_header_v2 = (const LinkHeaderV2*)(link_data); // subtract off type tag
|
||||
assert(link_header_v2->type_tag == 0xffffffff);
|
||||
assert(link_header_v2->version == 2);
|
||||
assert(link_header_v2->length == header->length);
|
||||
f.stats.total_v2_link_bytes += link_header_v2->length;
|
||||
uint32_t link_ptr_offset = link_data_offset + sizeof(LinkHeaderV2);
|
||||
|
||||
// first "section" of link data is a list of where all the pointer are.
|
||||
if (data.at(link_ptr_offset) == 0) {
|
||||
// there are no pointers.
|
||||
link_ptr_offset++;
|
||||
} else {
|
||||
// there are pointers.
|
||||
// there are a series of variable-length coded integers, indicating where the pointers are, in
|
||||
// the form: seek_amount, number_of_consecutive_pointers, seek_amount,
|
||||
// number_of_consecutive_pointers, ... , 0
|
||||
|
||||
uint32_t code_ptr_offset = code_offset;
|
||||
bool fixing = false; // either seeking or fixing
|
||||
|
||||
while (true) { // loop over entire table
|
||||
while (true) { // loop over current mode (fixing/seeking)
|
||||
// get count from table
|
||||
auto count = data.at(link_ptr_offset);
|
||||
link_ptr_offset++;
|
||||
|
||||
if (!fixing) {
|
||||
// then we are seeking
|
||||
code_ptr_offset += 4 * count;
|
||||
f.stats.total_v2_pointer_seeks++;
|
||||
} else {
|
||||
// then we are fixing consecutive pointers
|
||||
for (uint8_t i = 0; i < count; i++) {
|
||||
if (!f.pointer_link_word(0, code_ptr_offset - code_offset, 0,
|
||||
*((const uint32_t*)(&data.at(code_ptr_offset))))) {
|
||||
printf("WARNING bad link in %s\n", name.c_str());
|
||||
}
|
||||
f.stats.total_v2_pointers++;
|
||||
code_ptr_offset += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// check if we are done with the current integer
|
||||
if (count != 0xff)
|
||||
break;
|
||||
|
||||
// when we "end" an encoded integer on an 0xff, we need an explicit zero byte to change
|
||||
// modes. this handles this special case.
|
||||
if (data.at(link_ptr_offset) == 0) {
|
||||
link_ptr_offset++;
|
||||
fixing = !fixing;
|
||||
}
|
||||
}
|
||||
|
||||
// mode ended, switch
|
||||
fixing = !fixing;
|
||||
|
||||
// we got a zero, that means we're done with pointer fixing.
|
||||
if (data.at(link_ptr_offset) == 0)
|
||||
break;
|
||||
}
|
||||
link_ptr_offset++;
|
||||
}
|
||||
|
||||
// second "section" of link data is a list of symbols to fix up.
|
||||
if (data.at(link_ptr_offset) == 0) {
|
||||
// no symbols
|
||||
} else {
|
||||
while (true) {
|
||||
uint32_t reloc = data.at(link_ptr_offset);
|
||||
link_ptr_offset++;
|
||||
|
||||
const char* s_name;
|
||||
SymbolLinkKind kind;
|
||||
|
||||
if ((reloc & 0x80) == 0) {
|
||||
// it's a symbol
|
||||
if (reloc > 9) {
|
||||
// always happens.
|
||||
link_ptr_offset--;
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
s_name = (const char*)(&data.at(link_ptr_offset));
|
||||
kind = SymbolLinkKind::SYMBOL;
|
||||
|
||||
} else {
|
||||
// it's a type
|
||||
kind = SymbolLinkKind::TYPE;
|
||||
uint8_t method_count = reloc & 0x7f;
|
||||
s_name = (const char*)(&data.at(link_ptr_offset));
|
||||
if (method_count == 0) {
|
||||
method_count = 1;
|
||||
// hack which will add 44 methods to _newly created_ types
|
||||
// I assume the thing generating V2 objects didn't know about method counts.
|
||||
// so this was a "safe" backup - if linking a V2 object requires allocating a type.
|
||||
// just be on the safe side.
|
||||
// (see the !symbolValue case in intern_type_from_c)
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (std::string("_empty_") == s_name) {
|
||||
assert(kind == SymbolLinkKind::SYMBOL);
|
||||
kind = SymbolLinkKind::EMPTY_LIST;
|
||||
}
|
||||
|
||||
link_ptr_offset += strlen(s_name) + 1;
|
||||
f.stats.total_v2_symbol_count++;
|
||||
link_ptr_offset = c_symlink2(f, data, code_offset, link_ptr_offset, kind, s_name, 0);
|
||||
if (data.at(link_ptr_offset) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// check length
|
||||
assert(link_header_v2->length == align64(link_ptr_offset - link_data_offset + 1));
|
||||
while (link_ptr_offset < data.size()) {
|
||||
assert(data.at(link_ptr_offset) == 0);
|
||||
link_ptr_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
static void assert_string_empty_after(const char* str, int size) {
|
||||
auto ptr = str;
|
||||
while (*ptr)
|
||||
ptr++;
|
||||
while (ptr - str < size) {
|
||||
assert(!*ptr);
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
static void link_v5(LinkedObjectFile& f,
|
||||
const std::vector<uint8_t>& data,
|
||||
const std::string& name) {
|
||||
auto header = (const LinkHeaderV5*)(&data.at(0));
|
||||
if (header->n_segments == 1) {
|
||||
printf("abandon %s!\n", name.c_str());
|
||||
return;
|
||||
}
|
||||
assert(header->type_tag == 0);
|
||||
assert(name == header->name);
|
||||
assert(header->n_segments == 3);
|
||||
assert(header->pad == 0x50);
|
||||
assert(header->length_to_get_to_code - header->link_length == 0x50);
|
||||
|
||||
f.set_segment_count(3);
|
||||
|
||||
// link v3's data size is data.size() - link_length
|
||||
// link v5's data size is data.size() - new_link_length - 0x50.
|
||||
|
||||
// lbp + 4 points to version?
|
||||
// lbp points to 4 past start of header.
|
||||
|
||||
// lbp[1] = version + unknown 16 bit thing.
|
||||
// lbp[3] = link block length (minus 0x50)
|
||||
|
||||
// todo - check this against the code size we actually got.
|
||||
// size_t expected_code_size = data.size() - (header->link_length + 0x50);
|
||||
|
||||
uint32_t data_ptr_offset = header->length_to_get_to_code;
|
||||
|
||||
uint32_t segment_data_offsets[3];
|
||||
uint32_t segment_link_offsets[3];
|
||||
uint32_t segment_link_ends[3];
|
||||
for (int i = 0; i < 3; i++) {
|
||||
segment_data_offsets[i] = data_ptr_offset + header->segment_info[i].data;
|
||||
segment_link_offsets[i] = header->segment_info[i].relocs + 0x50;
|
||||
assert(header->segment_info[i].magic == 1);
|
||||
}
|
||||
|
||||
// check that the data region is filled
|
||||
for (int i = 0; i < 2; i++) {
|
||||
assert(align16(segment_data_offsets[i] + header->segment_info[i].size) ==
|
||||
segment_data_offsets[i + 1]);
|
||||
}
|
||||
assert(align16(segment_data_offsets[2] + header->segment_info[2].size) == data.size());
|
||||
|
||||
// loop over segments (reverse order for now)
|
||||
for (int seg_id = 3; seg_id-- > 0;) {
|
||||
// ?? is this right?
|
||||
if (header->segment_info[seg_id].size == 0)
|
||||
continue;
|
||||
|
||||
auto segment_size = header->segment_info[seg_id].size;
|
||||
f.stats.v3_code_bytes += segment_size;
|
||||
|
||||
// if(gGameVersion == JAK2) {
|
||||
bool adjusted = false;
|
||||
while (segment_size % 4) {
|
||||
segment_size++;
|
||||
adjusted = true;
|
||||
}
|
||||
|
||||
if (adjusted) {
|
||||
printf(
|
||||
"Adjusted the size of segment %d in %s, this is fine, but rare (and may indicate a "
|
||||
"bigger problem if it happens often)\n",
|
||||
seg_id, name.c_str());
|
||||
}
|
||||
// }
|
||||
|
||||
auto base_ptr = segment_data_offsets[seg_id];
|
||||
auto data_ptr = base_ptr - 4;
|
||||
auto link_ptr = segment_link_offsets[seg_id];
|
||||
|
||||
assert((data_ptr % 4) == 0);
|
||||
assert((segment_size % 4) == 0);
|
||||
|
||||
auto code_start = (const uint32_t*)(&data.at(data_ptr + 4));
|
||||
auto code_end = ((const uint32_t*)(&data.at(data_ptr + segment_size))) + 1;
|
||||
for (auto x = code_start; x < code_end; x++) {
|
||||
f.push_back_word_to_segment(*((const uint32_t*)x), seg_id);
|
||||
}
|
||||
bool fixing = false;
|
||||
|
||||
if (data.at(link_ptr)) {
|
||||
// we have pointers
|
||||
while (true) {
|
||||
while (true) {
|
||||
if (!fixing) {
|
||||
// seeking
|
||||
data_ptr += 4 * data.at(link_ptr);
|
||||
f.stats.v3_pointer_seeks++;
|
||||
} else {
|
||||
// fixing.
|
||||
for (uint32_t i = 0; i < data.at(link_ptr); i++) {
|
||||
f.stats.v3_pointers++;
|
||||
uint32_t old_code = *(const uint32_t*)(&data.at(data_ptr));
|
||||
if ((old_code >> 24) == 0) {
|
||||
f.stats.v3_word_pointers++;
|
||||
if (!f.pointer_link_word(seg_id, data_ptr - base_ptr, seg_id, old_code)) {
|
||||
printf("WARNING bad pointer_link_word (2) in %s\n", name.c_str());
|
||||
}
|
||||
} else {
|
||||
f.stats.v3_split_pointers++;
|
||||
auto dest_seg = (old_code >> 8) & 0xf;
|
||||
auto lo_hi_offset = (old_code >> 12) & 0xf;
|
||||
assert(lo_hi_offset);
|
||||
assert(dest_seg < 3);
|
||||
auto offset_upper = old_code & 0xff;
|
||||
// assert(offset_upper == 0);
|
||||
uint32_t low_code = *(const uint32_t*)(&data.at(data_ptr + 4 * lo_hi_offset));
|
||||
uint32_t offset = low_code & 0xffff;
|
||||
if (offset_upper) {
|
||||
// seems to work fine, no need to warn.
|
||||
// printf("WARNING - offset upper is set in %s\n", name.c_str());
|
||||
offset += (offset_upper << 16);
|
||||
}
|
||||
f.pointer_link_split_word(seg_id, data_ptr - base_ptr,
|
||||
data_ptr + 4 * lo_hi_offset - base_ptr, dest_seg, offset);
|
||||
}
|
||||
data_ptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
if (data.at(link_ptr) != 0xff)
|
||||
break;
|
||||
link_ptr++;
|
||||
if (data.at(link_ptr) == 0) {
|
||||
link_ptr++;
|
||||
fixing = !fixing;
|
||||
}
|
||||
}
|
||||
|
||||
link_ptr++;
|
||||
fixing = !fixing;
|
||||
if (data.at(link_ptr) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
link_ptr++;
|
||||
|
||||
if (data.at(link_ptr)) {
|
||||
auto sub_link_ptr = link_ptr;
|
||||
|
||||
while (true) {
|
||||
auto reloc = data.at(sub_link_ptr);
|
||||
auto next_link_ptr = sub_link_ptr + 1;
|
||||
link_ptr = next_link_ptr;
|
||||
|
||||
if ((reloc & 0x80) == 0) {
|
||||
link_ptr = sub_link_ptr + 3; //
|
||||
const char* sname = (const char*)(&data.at(link_ptr));
|
||||
link_ptr += strlen(sname) + 1;
|
||||
// todo segment data offsets...
|
||||
|
||||
if (std::string("_empty_") == sname) {
|
||||
link_ptr = c_symlink2(f, data, segment_data_offsets[seg_id], link_ptr,
|
||||
SymbolLinkKind::EMPTY_LIST, sname, seg_id);
|
||||
} else {
|
||||
link_ptr = c_symlink2(f, data, segment_data_offsets[seg_id], link_ptr,
|
||||
SymbolLinkKind::SYMBOL, sname, seg_id);
|
||||
}
|
||||
} else if ((reloc & 0x3f) == 0x3f) {
|
||||
assert(false); // todo, does this ever get hit?
|
||||
} else {
|
||||
int n_methods_base = reloc & 0x3f;
|
||||
int n_methods = n_methods_base * 4;
|
||||
if (n_methods_base) {
|
||||
n_methods += 3;
|
||||
}
|
||||
link_ptr += 2; // ghidra misses some aliasing here and would have you think this is +1!
|
||||
const char* sname = (const char*)(&data.at(link_ptr));
|
||||
link_ptr += strlen(sname) + 1;
|
||||
link_ptr = c_symlink2(f, data, segment_data_offsets[seg_id], link_ptr,
|
||||
SymbolLinkKind::TYPE, sname, seg_id);
|
||||
}
|
||||
|
||||
sub_link_ptr = link_ptr;
|
||||
if (!data.at(sub_link_ptr))
|
||||
break;
|
||||
}
|
||||
}
|
||||
segment_link_ends[seg_id] = link_ptr;
|
||||
}
|
||||
|
||||
assert(segment_link_offsets[0] == 128);
|
||||
|
||||
if (header->segment_info[0].size) {
|
||||
assert(segment_link_ends[0] + 1 == segment_link_offsets[1]);
|
||||
} else {
|
||||
assert(segment_link_offsets[0] + 2 == segment_link_offsets[1]);
|
||||
}
|
||||
|
||||
if (header->segment_info[1].size) {
|
||||
assert(segment_link_ends[1] + 1 == segment_link_offsets[2]);
|
||||
} else {
|
||||
assert(segment_link_offsets[1] + 2 == segment_link_offsets[2]);
|
||||
}
|
||||
|
||||
assert(align16(segment_link_ends[2] + 2) == segment_data_offsets[0]);
|
||||
}
|
||||
|
||||
static void link_v3(LinkedObjectFile& f,
|
||||
const std::vector<uint8_t>& data,
|
||||
const std::string& name) {
|
||||
auto header = (const LinkHeaderV3*)(&data.at(0));
|
||||
assert(name == header->name);
|
||||
assert(header->segments == 3);
|
||||
|
||||
f.set_segment_count(3);
|
||||
assert_string_empty_after(header->name, 64);
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
assert(header->segment_info[i].magic == 0);
|
||||
// printf(" [%d] %d %d %d %d\n", i, header->segment_info[i].size,
|
||||
// header->segment_info[i].data, header->segment_info[i].magic,
|
||||
// header->segment_info[i].relocs);
|
||||
}
|
||||
|
||||
f.stats.v3_link_bytes += header->length;
|
||||
uint32_t data_ptr_offset = header->length;
|
||||
|
||||
uint32_t segment_data_offsets[3];
|
||||
uint32_t segment_link_offsets[3];
|
||||
uint32_t segment_link_ends[3];
|
||||
for (int i = 0; i < 3; i++) {
|
||||
segment_data_offsets[i] = data_ptr_offset + header->segment_info[i].data;
|
||||
segment_link_offsets[i] = header->segment_info[i].relocs;
|
||||
}
|
||||
|
||||
// check that the data region is filled
|
||||
for (int i = 0; i < 2; i++) {
|
||||
assert(align16(segment_data_offsets[i] + header->segment_info[i].size) ==
|
||||
segment_data_offsets[i + 1]);
|
||||
}
|
||||
assert(align16(segment_data_offsets[2] + header->segment_info[2].size) == data.size());
|
||||
|
||||
// todo - check link region is filled.
|
||||
|
||||
// loop over segments (reverse order for now)
|
||||
for (int seg_id = 3; seg_id-- > 0;) {
|
||||
// ?? is this right?
|
||||
if (header->segment_info[seg_id].size == 0)
|
||||
continue;
|
||||
|
||||
auto segment_size = header->segment_info[seg_id].size;
|
||||
f.stats.v3_code_bytes += segment_size;
|
||||
|
||||
// HACK!
|
||||
// why is this a thing?
|
||||
// HACK!
|
||||
if (get_config().game_version == 1 && name == "level-h" && seg_id == 0) {
|
||||
segment_size++;
|
||||
}
|
||||
|
||||
if (get_config().game_version == 2) {
|
||||
bool adjusted = false;
|
||||
while (segment_size % 4) {
|
||||
segment_size++;
|
||||
adjusted = true;
|
||||
}
|
||||
|
||||
if (adjusted) {
|
||||
printf(
|
||||
"Adjusted the size of segment %d in %s, this is fine, but rare (and may indicate a "
|
||||
"bigger problem if it happens often)\n",
|
||||
seg_id, name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
auto base_ptr = segment_data_offsets[seg_id];
|
||||
auto data_ptr = base_ptr - 4;
|
||||
auto link_ptr = segment_link_offsets[seg_id];
|
||||
|
||||
assert((data_ptr % 4) == 0);
|
||||
assert((segment_size % 4) == 0);
|
||||
|
||||
auto code_start = (const uint32_t*)(&data.at(data_ptr + 4));
|
||||
auto code_end = ((const uint32_t*)(&data.at(data_ptr + segment_size))) + 1;
|
||||
for (auto x = code_start; x < code_end; x++) {
|
||||
f.push_back_word_to_segment(*((const uint32_t*)x), seg_id);
|
||||
}
|
||||
bool fixing = false;
|
||||
|
||||
if (data.at(link_ptr)) {
|
||||
// we have pointers
|
||||
while (true) {
|
||||
while (true) {
|
||||
if (!fixing) {
|
||||
// seeking
|
||||
data_ptr += 4 * data.at(link_ptr);
|
||||
f.stats.v3_pointer_seeks++;
|
||||
} else {
|
||||
// fixing.
|
||||
for (uint32_t i = 0; i < data.at(link_ptr); i++) {
|
||||
f.stats.v3_pointers++;
|
||||
uint32_t old_code = *(const uint32_t*)(&data.at(data_ptr));
|
||||
if ((old_code >> 24) == 0) {
|
||||
f.stats.v3_word_pointers++;
|
||||
if (!f.pointer_link_word(seg_id, data_ptr - base_ptr, seg_id, old_code)) {
|
||||
printf("WARNING bad pointer_link_word (2) in %s\n", name.c_str());
|
||||
}
|
||||
} else {
|
||||
f.stats.v3_split_pointers++;
|
||||
auto dest_seg = (old_code >> 8) & 0xf;
|
||||
auto lo_hi_offset = (old_code >> 12) & 0xf;
|
||||
assert(lo_hi_offset);
|
||||
assert(dest_seg < 3);
|
||||
auto offset_upper = old_code & 0xff;
|
||||
// assert(offset_upper == 0);
|
||||
uint32_t low_code = *(const uint32_t*)(&data.at(data_ptr + 4 * lo_hi_offset));
|
||||
uint32_t offset = low_code & 0xffff;
|
||||
if (offset_upper) {
|
||||
// seems to work fine, no need to warn.
|
||||
// printf("WARNING - offset upper is set in %s\n", name.c_str());
|
||||
offset += (offset_upper << 16);
|
||||
}
|
||||
f.pointer_link_split_word(seg_id, data_ptr - base_ptr,
|
||||
data_ptr + 4 * lo_hi_offset - base_ptr, dest_seg, offset);
|
||||
}
|
||||
data_ptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
if (data.at(link_ptr) != 0xff)
|
||||
break;
|
||||
link_ptr++;
|
||||
if (data.at(link_ptr) == 0) {
|
||||
link_ptr++;
|
||||
fixing = !fixing;
|
||||
}
|
||||
}
|
||||
|
||||
link_ptr++;
|
||||
fixing = !fixing;
|
||||
if (data.at(link_ptr) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
link_ptr++;
|
||||
|
||||
while (data.at(link_ptr)) {
|
||||
auto reloc = data.at(link_ptr);
|
||||
SymbolLinkKind kind;
|
||||
link_ptr++;
|
||||
|
||||
const char* s_name = nullptr;
|
||||
if ((reloc & 0x80) == 0) {
|
||||
// it's a symbol
|
||||
kind = SymbolLinkKind::SYMBOL;
|
||||
link_ptr--;
|
||||
s_name = (const char*)(&data.at(link_ptr));
|
||||
} else {
|
||||
// methods todo
|
||||
|
||||
s_name = (const char*)(&data.at(link_ptr));
|
||||
get_type_info().inform_type_method_count(s_name, reloc & 0x7f);
|
||||
kind = SymbolLinkKind::TYPE;
|
||||
}
|
||||
|
||||
if (std::string("_empty_") == s_name) {
|
||||
assert(kind == SymbolLinkKind::SYMBOL);
|
||||
kind = SymbolLinkKind::EMPTY_LIST;
|
||||
}
|
||||
|
||||
link_ptr += strlen(s_name) + 1;
|
||||
f.stats.v3_symbol_count++;
|
||||
link_ptr = c_symlink3(f, data, base_ptr, link_ptr, kind, s_name, seg_id);
|
||||
}
|
||||
segment_link_ends[seg_id] = link_ptr;
|
||||
}
|
||||
|
||||
assert(segment_link_offsets[0] == 128);
|
||||
|
||||
if (header->segment_info[0].size) {
|
||||
assert(segment_link_ends[0] + 1 == segment_link_offsets[1]);
|
||||
} else {
|
||||
assert(segment_link_offsets[0] + 2 == segment_link_offsets[1]);
|
||||
}
|
||||
|
||||
if (header->segment_info[1].size) {
|
||||
assert(segment_link_ends[1] + 1 == segment_link_offsets[2]);
|
||||
} else {
|
||||
assert(segment_link_offsets[1] + 2 == segment_link_offsets[2]);
|
||||
}
|
||||
|
||||
assert(align16(segment_link_ends[2] + 2) == segment_data_offsets[0]);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Main function to generate LinkedObjectFiles from raw object data.
|
||||
*/
|
||||
LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data, const std::string& name) {
|
||||
LinkedObjectFile result;
|
||||
const auto* header = (const LinkHeaderCommon*)&data.at(0);
|
||||
|
||||
// use appropriate linker
|
||||
if (header->version == 3) {
|
||||
assert(header->type_tag == 0);
|
||||
link_v3(result, data, name);
|
||||
} else if (header->version == 4) {
|
||||
assert(header->type_tag == 0xffffffff);
|
||||
link_v4(result, data, name);
|
||||
} else if (header->version == 5) {
|
||||
link_v5(result, data, name);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
14
decompiler/ObjectFile/LinkedObjectFileCreation.h
Normal file
14
decompiler/ObjectFile/LinkedObjectFileCreation.h
Normal file
@ -0,0 +1,14 @@
|
||||
/*!
|
||||
* @file LinkedObjectFileCreation.h
|
||||
* Create a LinkedObjectFile from raw object file data.
|
||||
* This implements a decoder for the GOAL linking format.
|
||||
*/
|
||||
|
||||
#ifndef NEXT_LINKEDOBJECTFILECREATION_H
|
||||
#define NEXT_LINKEDOBJECTFILECREATION_H
|
||||
|
||||
#include "LinkedObjectFile.h"
|
||||
|
||||
LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data, const std::string& name);
|
||||
|
||||
#endif //NEXT_LINKEDOBJECTFILECREATION_H
|
33
decompiler/ObjectFile/LinkedWord.h
Normal file
33
decompiler/ObjectFile/LinkedWord.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*!
|
||||
* @file LinkedWord.h
|
||||
* A word (4 bytes), possibly with some linking info.
|
||||
*/
|
||||
|
||||
#ifndef JAK2_DISASSEMBLER_LINKEDWORD_H
|
||||
#define JAK2_DISASSEMBLER_LINKEDWORD_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
class LinkedWord {
|
||||
public:
|
||||
explicit LinkedWord(uint32_t _data) : data(_data) {}
|
||||
|
||||
enum Kind {
|
||||
PLAIN_DATA, // just plain data
|
||||
PTR, // pointer to a location
|
||||
HI_PTR, // lower 16-bits of this data are the upper 16 bits of a pointer
|
||||
LO_PTR, // lower 16-bits of this data are the lower 16 bits of a pointer
|
||||
SYM_PTR, // this is a pointer to a symbol
|
||||
EMPTY_PTR, // this is a pointer to the empty list
|
||||
SYM_OFFSET, // this is an offset of a symbol in the symbol table
|
||||
TYPE_PTR // this is a pointer to a type
|
||||
} kind = PLAIN_DATA;
|
||||
|
||||
uint32_t data = 0;
|
||||
|
||||
int label_id = -1;
|
||||
std::string symbol_name;
|
||||
};
|
||||
|
||||
#endif // JAK2_DISASSEMBLER_LINKEDWORD_H
|
512
decompiler/ObjectFile/ObjectFileDB.cpp
Normal file
512
decompiler/ObjectFile/ObjectFileDB.cpp
Normal file
@ -0,0 +1,512 @@
|
||||
/*!
|
||||
* @file ObjectFileDB.cpp
|
||||
* A "database" of object files found in DGO files.
|
||||
* Eliminates duplicate object files, and also assigns unique names to all object files
|
||||
* (there may be different object files with the same name sometimes)
|
||||
*/
|
||||
|
||||
#include "ObjectFileDB.h"
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include "LinkedObjectFileCreation.h"
|
||||
#include "decompiler/config.h"
|
||||
#include "third-party/minilzo/minilzo.h"
|
||||
#include "decompiler/util/BinaryReader.h"
|
||||
#include "decompiler/util/FileIO.h"
|
||||
#include "decompiler/util/Timer.h"
|
||||
#include "decompiler/Function/BasicBlocks.h"
|
||||
|
||||
/*!
|
||||
* Get a unique name for this object file.
|
||||
*/
|
||||
std::string ObjectFileRecord::to_unique_name() const {
|
||||
return name + "-v" + std::to_string(version);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Build an object file DB for the given list of DGOs.
|
||||
*/
|
||||
ObjectFileDB::ObjectFileDB(const std::vector<std::string>& _dgos) {
|
||||
Timer timer;
|
||||
|
||||
printf("- Initializing ObjectFileDB...\n");
|
||||
for (auto& dgo : _dgos) {
|
||||
get_objs_from_dgo(dgo);
|
||||
}
|
||||
|
||||
printf("ObjectFileDB Initialized:\n");
|
||||
printf(" total dgos: %ld\n", _dgos.size());
|
||||
printf(" total data: %d bytes\n", stats.total_dgo_bytes);
|
||||
printf(" total objs: %d\n", stats.total_obj_files);
|
||||
printf(" unique objs: %d\n", stats.unique_obj_files);
|
||||
printf(" unique data: %d bytes\n", stats.unique_obj_bytes);
|
||||
printf(" total %.1f ms (%.3f MB/sec, %.3f obj/sec)\n", timer.getMs(),
|
||||
stats.total_dgo_bytes / ((1u << 20u) * timer.getSeconds()),
|
||||
stats.total_obj_files / timer.getSeconds());
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Header for a DGO file
|
||||
struct DgoHeader {
|
||||
uint32_t size;
|
||||
char name[60];
|
||||
};
|
||||
|
||||
namespace {
|
||||
/*!
|
||||
* Assert false if the char[] has non-null data after the null terminated string.
|
||||
* Used to sanity check the sizes of strings in DGO/object file headers.
|
||||
*/
|
||||
void assert_string_empty_after(const char* str, int size) {
|
||||
auto ptr = str;
|
||||
while (*ptr)
|
||||
ptr++;
|
||||
while (ptr - str < size) {
|
||||
assert(!*ptr);
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
constexpr int MAX_CHUNK_SIZE = 0x8000;
|
||||
/*!
|
||||
* Load the objects stored in the given DGO into the ObjectFileDB
|
||||
*/
|
||||
void ObjectFileDB::get_objs_from_dgo(const std::string& filename) {
|
||||
auto dgo_data = read_binary_file(filename);
|
||||
stats.total_dgo_bytes += dgo_data.size();
|
||||
|
||||
const char jak2_header[] = "oZlB";
|
||||
bool is_jak2 = true;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (jak2_header[i] != dgo_data[i]) {
|
||||
is_jak2 = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_jak2) {
|
||||
if (lzo_init() != LZO_E_OK) {
|
||||
assert(false);
|
||||
}
|
||||
BinaryReader compressed_reader(dgo_data);
|
||||
// seek past oZlB
|
||||
compressed_reader.ffwd(4);
|
||||
auto decompressed_size = compressed_reader.read<uint32_t>();
|
||||
std::vector<uint8_t> decompressed_data;
|
||||
decompressed_data.resize(decompressed_size);
|
||||
size_t output_offset = 0;
|
||||
while (true) {
|
||||
// seek past alignment bytes and read the next chunk size
|
||||
uint32_t chunk_size = 0;
|
||||
while (!chunk_size) {
|
||||
chunk_size = compressed_reader.read<uint32_t>();
|
||||
}
|
||||
|
||||
if (chunk_size < MAX_CHUNK_SIZE) {
|
||||
lzo_uint bytes_written;
|
||||
auto lzo_rv =
|
||||
lzo1x_decompress(compressed_reader.here(), chunk_size,
|
||||
decompressed_data.data() + output_offset, &bytes_written, nullptr);
|
||||
assert(lzo_rv == LZO_E_OK);
|
||||
compressed_reader.ffwd(chunk_size);
|
||||
output_offset += bytes_written;
|
||||
} else {
|
||||
// nope - sometimes chunk_size is bigger than MAX, but we should still use max.
|
||||
// assert(chunk_size == MAX_CHUNK_SIZE);
|
||||
memcpy(decompressed_data.data() + output_offset, compressed_reader.here(), MAX_CHUNK_SIZE);
|
||||
compressed_reader.ffwd(MAX_CHUNK_SIZE);
|
||||
output_offset += MAX_CHUNK_SIZE;
|
||||
}
|
||||
|
||||
if (output_offset >= decompressed_size)
|
||||
break;
|
||||
while (compressed_reader.get_seek() % 4) {
|
||||
compressed_reader.ffwd(1);
|
||||
}
|
||||
}
|
||||
dgo_data = decompressed_data;
|
||||
}
|
||||
|
||||
BinaryReader reader(dgo_data);
|
||||
auto header = reader.read<DgoHeader>();
|
||||
|
||||
auto dgo_base_name = base_name(filename);
|
||||
assert(header.name == dgo_base_name);
|
||||
assert_string_empty_after(header.name, 60);
|
||||
|
||||
// get all obj files...
|
||||
for (uint32_t i = 0; i < header.size; i++) {
|
||||
auto obj_header = reader.read<DgoHeader>();
|
||||
assert(reader.bytes_left() >= obj_header.size);
|
||||
assert_string_empty_after(obj_header.name, 60);
|
||||
|
||||
add_obj_from_dgo(obj_header.name, reader.here(), obj_header.size, dgo_base_name);
|
||||
reader.ffwd(obj_header.size);
|
||||
}
|
||||
|
||||
// check we're at the end
|
||||
assert(0 == reader.bytes_left());
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add an object file to the ObjectFileDB
|
||||
*/
|
||||
void ObjectFileDB::add_obj_from_dgo(const std::string& obj_name,
|
||||
uint8_t* obj_data,
|
||||
uint32_t obj_size,
|
||||
const std::string& dgo_name) {
|
||||
stats.total_obj_files++;
|
||||
|
||||
auto hash = crc32(obj_data, obj_size);
|
||||
|
||||
// first, check to see if we already got it...
|
||||
for (auto& e : obj_files_by_name[obj_name]) {
|
||||
if (e.data.size() == obj_size && e.record.hash == hash) {
|
||||
// already got it!
|
||||
e.reference_count++;
|
||||
auto rec = e.record;
|
||||
obj_files_by_dgo[dgo_name].push_back(rec);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// nope, have to add a new one.
|
||||
ObjectFileData data;
|
||||
data.data.resize(obj_size);
|
||||
memcpy(data.data.data(), obj_data, obj_size);
|
||||
data.record.hash = hash;
|
||||
data.record.name = obj_name;
|
||||
if (obj_files_by_name[obj_name].empty()) {
|
||||
// if this is the first time we've seen this object file name, add it in the order.
|
||||
obj_file_order.push_back(obj_name);
|
||||
}
|
||||
data.record.version = obj_files_by_name[obj_name].size();
|
||||
obj_files_by_dgo[dgo_name].push_back(data.record);
|
||||
obj_files_by_name[obj_name].emplace_back(std::move(data));
|
||||
stats.unique_obj_files++;
|
||||
stats.unique_obj_bytes += obj_size;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Generate a listing of what object files go in which dgos
|
||||
*/
|
||||
std::string ObjectFileDB::generate_dgo_listing() {
|
||||
std::string result = ";; DGO File Listing\n\n";
|
||||
std::vector<std::string> dgo_names;
|
||||
for (auto& kv : obj_files_by_dgo) {
|
||||
dgo_names.push_back(kv.first);
|
||||
}
|
||||
|
||||
std::sort(dgo_names.begin(), dgo_names.end());
|
||||
|
||||
for (const auto& name : dgo_names) {
|
||||
result += "(\"" + name + "\"\n";
|
||||
for (auto& obj : obj_files_by_dgo[name]) {
|
||||
result += " " + obj.name + " :version " + std::to_string(obj.version) + "\n";
|
||||
}
|
||||
result += " )\n\n";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Process all of the linking data of all objects.
|
||||
*/
|
||||
void ObjectFileDB::process_link_data() {
|
||||
printf("- Processing Link Data...\n");
|
||||
Timer process_link_timer;
|
||||
|
||||
LinkedObjectFile::Stats combined_stats;
|
||||
|
||||
for_each_obj([&](ObjectFileData& obj) {
|
||||
obj.linked_data = to_linked_object_file(obj.data, obj.record.name);
|
||||
combined_stats.add(obj.linked_data.stats);
|
||||
});
|
||||
|
||||
printf("Processed Link Data:\n");
|
||||
printf(" code %d bytes\n", combined_stats.total_code_bytes);
|
||||
printf(" v2 code %d bytes\n", combined_stats.total_v2_code_bytes);
|
||||
printf(" v2 link data %d bytes\n", combined_stats.total_v2_link_bytes);
|
||||
printf(" v2 pointers %d\n", combined_stats.total_v2_pointers);
|
||||
printf(" v2 pointer seeks %d\n", combined_stats.total_v2_pointer_seeks);
|
||||
printf(" v2 symbols %d\n", combined_stats.total_v2_symbol_count);
|
||||
printf(" v2 symbol links %d\n", combined_stats.total_v2_symbol_links);
|
||||
|
||||
printf(" v3 code %d bytes\n", combined_stats.v3_code_bytes);
|
||||
printf(" v3 link data %d bytes\n", combined_stats.v3_link_bytes);
|
||||
printf(" v3 pointers %d\n", combined_stats.v3_pointers);
|
||||
printf(" split %d\n", combined_stats.v3_split_pointers);
|
||||
printf(" word %d\n", combined_stats.v3_word_pointers);
|
||||
printf(" v3 pointer seeks %d\n", combined_stats.v3_pointer_seeks);
|
||||
printf(" v3 symbols %d\n", combined_stats.v3_symbol_count);
|
||||
printf(" v3 offset symbol links %d\n", combined_stats.v3_symbol_link_offset);
|
||||
printf(" v3 word symbol links %d\n", combined_stats.v3_symbol_link_word);
|
||||
|
||||
printf(" total %.3f ms\n", process_link_timer.getMs());
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*!
|
||||
* Process all of the labels generated from linking and give them reasonable names.
|
||||
*/
|
||||
void ObjectFileDB::process_labels() {
|
||||
printf("- Processing Labels...\n");
|
||||
Timer process_label_timer;
|
||||
uint32_t total = 0;
|
||||
for_each_obj([&](ObjectFileData& obj) { total += obj.linked_data.set_ordered_label_names(); });
|
||||
|
||||
printf("Processed Labels:\n");
|
||||
printf(" total %d labels\n", total);
|
||||
printf(" total %.3f ms\n", process_label_timer.getMs());
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*!
|
||||
* Dump object files and their linking data to text files for debugging
|
||||
*/
|
||||
void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool dump_v3_only) {
|
||||
if (dump_v3_only) {
|
||||
printf("- Writing object file dumps (v3 only)...\n");
|
||||
} else {
|
||||
printf("- Writing object file dumps (all)...\n");
|
||||
}
|
||||
|
||||
Timer timer;
|
||||
uint32_t total_bytes = 0, total_files = 0;
|
||||
|
||||
for_each_obj([&](ObjectFileData& obj) {
|
||||
if (obj.linked_data.segments == 3 || !dump_v3_only) {
|
||||
auto file_text = obj.linked_data.print_words();
|
||||
auto file_name = combine_path(output_dir, obj.record.to_unique_name() + ".txt");
|
||||
total_bytes += file_text.size();
|
||||
write_text_file(file_name, file_text);
|
||||
total_files++;
|
||||
}
|
||||
});
|
||||
|
||||
printf("Wrote object file dumps:\n");
|
||||
printf(" total %d files\n", total_files);
|
||||
printf(" total %.3f MB\n", total_bytes / ((float)(1u << 20u)));
|
||||
printf(" total %.3f ms (%.3f MB/sec)\n", timer.getMs(),
|
||||
total_bytes / ((1u << 20u) * timer.getSeconds()));
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*!
|
||||
* Dump disassembly for object files containing code. Data zones will also be dumped.
|
||||
*/
|
||||
void ObjectFileDB::write_disassembly(const std::string& output_dir,
|
||||
bool disassemble_objects_without_functions) {
|
||||
printf("- Writing functions...\n");
|
||||
Timer timer;
|
||||
uint32_t total_bytes = 0, total_files = 0;
|
||||
|
||||
for_each_obj([&](ObjectFileData& obj) {
|
||||
if (obj.linked_data.has_any_functions() || disassemble_objects_without_functions) {
|
||||
auto file_text = obj.linked_data.print_disassembly();
|
||||
auto file_name = combine_path(output_dir, obj.record.to_unique_name() + ".func");
|
||||
total_bytes += file_text.size();
|
||||
write_text_file(file_name, file_text);
|
||||
total_files++;
|
||||
}
|
||||
});
|
||||
|
||||
printf("Wrote functions dumps:\n");
|
||||
printf(" total %d files\n", total_files);
|
||||
printf(" total %.3f MB\n", total_bytes / ((float)(1u << 20u)));
|
||||
printf(" total %.3f ms (%.3f MB/sec)\n", timer.getMs(),
|
||||
total_bytes / ((1u << 20u) * timer.getSeconds()));
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*!
|
||||
* Find code/data zones, identify functions, and disassemble
|
||||
*/
|
||||
void ObjectFileDB::find_code() {
|
||||
printf("- Finding code in object files...\n");
|
||||
LinkedObjectFile::Stats combined_stats;
|
||||
Timer timer;
|
||||
|
||||
for_each_obj([&](ObjectFileData& obj) {
|
||||
// printf("fc %s\n", obj.record.to_unique_name().c_str());
|
||||
obj.linked_data.find_code();
|
||||
obj.linked_data.find_functions();
|
||||
obj.linked_data.disassemble_functions();
|
||||
|
||||
if (get_config().game_version == 1 || obj.record.to_unique_name() != "effect-control-v0") {
|
||||
obj.linked_data.process_fp_relative_links();
|
||||
} else {
|
||||
printf("skipping process_fp_relative_links in %s\n", obj.record.to_unique_name().c_str());
|
||||
}
|
||||
|
||||
auto& obj_stats = obj.linked_data.stats;
|
||||
if (obj_stats.code_bytes / 4 > obj_stats.decoded_ops) {
|
||||
printf("Failed to decode all in %s (%d / %d)\n", obj.record.to_unique_name().c_str(),
|
||||
obj_stats.decoded_ops, obj_stats.code_bytes / 4);
|
||||
}
|
||||
combined_stats.add(obj.linked_data.stats);
|
||||
});
|
||||
|
||||
printf("Found code:\n");
|
||||
printf(" code %.3f MB\n", combined_stats.code_bytes / (float)(1 << 20));
|
||||
printf(" data %.3f MB\n", combined_stats.data_bytes / (float)(1 << 20));
|
||||
printf(" functions: %d\n", combined_stats.function_count);
|
||||
printf(" fp uses resolved: %d / %d (%.3f %%)\n", combined_stats.n_fp_reg_use_resolved,
|
||||
combined_stats.n_fp_reg_use,
|
||||
100.f * (float)combined_stats.n_fp_reg_use_resolved / combined_stats.n_fp_reg_use);
|
||||
auto total_ops = combined_stats.code_bytes / 4;
|
||||
printf(" decoded %d / %d (%.3f %%)\n", combined_stats.decoded_ops, total_ops,
|
||||
100.f * (float)combined_stats.decoded_ops / total_ops);
|
||||
printf(" total %.3f ms\n", timer.getMs());
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*!
|
||||
* Finds and writes all scripts into a file named all_scripts.lisp.
|
||||
* Doesn't change any state in ObjectFileDB.
|
||||
*/
|
||||
void ObjectFileDB::find_and_write_scripts(const std::string& output_dir) {
|
||||
printf("- Finding scripts in object files...\n");
|
||||
Timer timer;
|
||||
std::string all_scripts;
|
||||
|
||||
for_each_obj([&](ObjectFileData& obj) {
|
||||
auto scripts = obj.linked_data.print_scripts();
|
||||
if (!scripts.empty()) {
|
||||
all_scripts += ";--------------------------------------\n";
|
||||
all_scripts += "; " + obj.record.to_unique_name() + "\n";
|
||||
all_scripts += ";---------------------------------------\n";
|
||||
all_scripts += scripts;
|
||||
}
|
||||
});
|
||||
|
||||
auto file_name = combine_path(output_dir, "all_scripts.lisp");
|
||||
write_text_file(file_name, all_scripts);
|
||||
|
||||
printf("Found scripts:\n");
|
||||
printf(" total %.3f ms\n", timer.getMs());
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void ObjectFileDB::analyze_functions() {
|
||||
printf("- Analyzing Functions...\n");
|
||||
Timer timer;
|
||||
|
||||
int total_functions = 0;
|
||||
int resolved_cfg_functions = 0;
|
||||
const auto& config = get_config();
|
||||
|
||||
{
|
||||
timer.start();
|
||||
for_each_obj([&](ObjectFileData& data) {
|
||||
if (data.linked_data.segments == 3) {
|
||||
// the top level segment should have a single function
|
||||
assert(data.linked_data.functions_by_seg.at(2).size() == 1);
|
||||
|
||||
auto& func = data.linked_data.functions_by_seg.at(2).front();
|
||||
assert(func.guessed_name.empty());
|
||||
func.guessed_name.set_as_top_level();
|
||||
func.find_global_function_defs(data.linked_data);
|
||||
func.find_method_defs(data.linked_data);
|
||||
}
|
||||
});
|
||||
|
||||
// check for function uniqueness.
|
||||
std::unordered_set<std::string> unique_names;
|
||||
std::unordered_map<std::string, std::unordered_set<std::string>> duplicated_functions;
|
||||
|
||||
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
(void)segment_id;
|
||||
auto name = func.guessed_name.to_string();
|
||||
if (func.guessed_name.expected_unique()) {
|
||||
if(unique_names.find(name) != unique_names.end()) {
|
||||
duplicated_functions[name].insert(data.record.to_unique_name());
|
||||
}
|
||||
|
||||
unique_names.insert(name);
|
||||
}
|
||||
|
||||
if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) {
|
||||
func.warnings += "flagged as asm by config\n";
|
||||
func.suspected_asm = true;
|
||||
}
|
||||
});
|
||||
|
||||
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
(void)segment_id;
|
||||
auto name = func.guessed_name.to_string();
|
||||
if(func.guessed_name.expected_unique()) {
|
||||
if(duplicated_functions.find(name) != duplicated_functions.end()) {
|
||||
duplicated_functions[name].insert(data.record.to_unique_name());
|
||||
func.warnings += "this function exists in multiple non-identical object files";
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// for(const auto& kv : duplicated_functions) {
|
||||
// printf("Function %s is found in non-identical object files:\n", kv.first.c_str());
|
||||
// for(const auto& obj : kv.second) {
|
||||
// printf(" %s\n", obj.c_str());
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
int total_nontrivial_functions = 0;
|
||||
int total_resolved_nontrivial_functions = 0;
|
||||
int total_named_functions = 0;
|
||||
|
||||
std::map<int, std::vector<std::string>> unresolved_by_length;
|
||||
if (get_config().find_basic_blocks) {
|
||||
timer.start();
|
||||
int total_basic_blocks = 0;
|
||||
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
auto blocks = find_blocks_in_function(data.linked_data, segment_id, func);
|
||||
total_basic_blocks += blocks.size();
|
||||
func.basic_blocks = blocks;
|
||||
|
||||
if(!func.suspected_asm) {
|
||||
func.analyze_prologue(data.linked_data);
|
||||
func.cfg = build_cfg(data.linked_data, segment_id, func);
|
||||
total_functions++;
|
||||
if (func.cfg->is_fully_resolved()) {
|
||||
resolved_cfg_functions++;
|
||||
}
|
||||
} else {
|
||||
resolved_cfg_functions++;
|
||||
}
|
||||
|
||||
|
||||
if(func.basic_blocks.size() > 1 && !func.suspected_asm) {
|
||||
total_nontrivial_functions++;
|
||||
if(func.cfg->is_fully_resolved()) {
|
||||
total_resolved_nontrivial_functions++;
|
||||
} else {
|
||||
if(!func.guessed_name.empty()) {
|
||||
unresolved_by_length[func.end_word - func.start_word].push_back(func.guessed_name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!func.guessed_name.empty()) {
|
||||
total_named_functions++;
|
||||
}
|
||||
});
|
||||
|
||||
printf("Found %d functions (%d with nontrivial cfgs)\n", total_functions, total_nontrivial_functions);
|
||||
printf("Named %d/%d functions (%.2f%%)\n", total_named_functions, total_functions, 100.f * float(total_named_functions) / float(total_functions));
|
||||
printf("Found %d basic blocks in %.3f ms\n", total_basic_blocks, timer.getMs());
|
||||
printf(" %d/%d functions passed cfg analysis stage (%.2f%%)\n", resolved_cfg_functions, total_functions,
|
||||
100.f * float(resolved_cfg_functions) / float(total_functions));
|
||||
printf(" %d/%d nontrivial cfg's resolved (%.2f%%)\n", total_resolved_nontrivial_functions, total_nontrivial_functions,
|
||||
100.f * float(total_resolved_nontrivial_functions) / float(total_nontrivial_functions));
|
||||
|
||||
for(auto& kv : unresolved_by_length) {
|
||||
printf("LEN %d\n", kv.first);
|
||||
for(auto& x : kv.second) {
|
||||
printf(" %s\n", x.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
105
decompiler/ObjectFile/ObjectFileDB.h
Normal file
105
decompiler/ObjectFile/ObjectFileDB.h
Normal file
@ -0,0 +1,105 @@
|
||||
/*!
|
||||
* @file ObjectFileDB.h
|
||||
* A "database" of object files found in DGO files.
|
||||
* Eliminates duplicate object files, and also assigns unique names to all object files
|
||||
* (there may be different object files with the same name sometimes)
|
||||
*/
|
||||
|
||||
#ifndef JAK2_DISASSEMBLER_OBJECTFILEDB_H
|
||||
#define JAK2_DISASSEMBLER_OBJECTFILEDB_H
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "LinkedObjectFile.h"
|
||||
|
||||
/*!
|
||||
* A "record" which can be used to identify an object file.
|
||||
*/
|
||||
struct ObjectFileRecord {
|
||||
std::string name;
|
||||
int version = -1;
|
||||
uint32_t hash = 0;
|
||||
std::string to_unique_name() const;
|
||||
};
|
||||
|
||||
/*!
|
||||
* All of the data for a single object file
|
||||
*/
|
||||
struct ObjectFileData {
|
||||
std::vector<uint8_t> data; // raw bytes
|
||||
LinkedObjectFile linked_data; // data including linking annotations
|
||||
ObjectFileRecord record; // name
|
||||
uint32_t reference_count = 0; // number of times its used.
|
||||
};
|
||||
|
||||
class ObjectFileDB {
|
||||
public:
|
||||
ObjectFileDB(const std::vector<std::string>& _dgos);
|
||||
std::string generate_dgo_listing();
|
||||
void process_link_data();
|
||||
void process_labels();
|
||||
void find_code();
|
||||
void find_and_write_scripts(const std::string& output_dir);
|
||||
|
||||
void write_object_file_words(const std::string& output_dir, bool dump_v3_only);
|
||||
void write_disassembly(const std::string& output_dir, bool disassemble_objects_without_functions);
|
||||
void analyze_functions();
|
||||
|
||||
private:
|
||||
void get_objs_from_dgo(const std::string& filename);
|
||||
void add_obj_from_dgo(const std::string& obj_name,
|
||||
uint8_t* obj_data,
|
||||
uint32_t obj_size,
|
||||
const std::string& dgo_name);
|
||||
|
||||
/*!
|
||||
* Apply f to all ObjectFileData's. Does it in the right order.
|
||||
*/
|
||||
template <typename Func>
|
||||
void for_each_obj(Func f) {
|
||||
assert(obj_files_by_name.size() == obj_file_order.size());
|
||||
for(const auto& name : obj_file_order) {
|
||||
for(auto& obj : obj_files_by_name.at(name)) {
|
||||
f(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Apply f to all functions
|
||||
* takes (Function, segment, linked_data)
|
||||
* Does it in the right order.
|
||||
*/
|
||||
template <typename Func>
|
||||
void for_each_function(Func f) {
|
||||
for_each_obj([&](ObjectFileData& data) {
|
||||
// printf("IN %s\n", data.record.to_unique_name().c_str());
|
||||
for (int i = 0; i < int(data.linked_data.segments); i++) {
|
||||
// printf("seg %d\n", i);
|
||||
int fn = 0;
|
||||
for (auto& goal_func : data.linked_data.functions_by_seg.at(i)) {
|
||||
// printf("fn %d\n", fn);
|
||||
f(goal_func, i, data);
|
||||
fn++;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Danger: after adding all object files, we assume that the vector never reallocates.
|
||||
std::unordered_map<std::string, std::vector<ObjectFileData>> obj_files_by_name;
|
||||
std::unordered_map<std::string, std::vector<ObjectFileRecord>> obj_files_by_dgo;
|
||||
|
||||
std::vector<std::string> obj_file_order;
|
||||
|
||||
struct {
|
||||
uint32_t total_dgo_bytes = 0;
|
||||
uint32_t total_obj_files = 0;
|
||||
uint32_t unique_obj_files = 0;
|
||||
uint32_t unique_obj_bytes = 0;
|
||||
} stats;
|
||||
};
|
||||
|
||||
#endif // JAK2_DISASSEMBLER_OBJECTFILEDB_H
|
189
decompiler/README.md
Normal file
189
decompiler/README.md
Normal file
@ -0,0 +1,189 @@
|
||||
How to use
|
||||
-----------
|
||||
Compile (Linux):
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make -j
|
||||
cd ..
|
||||
```
|
||||
|
||||
After compiling:
|
||||
First create a folder for the output and create a folder for the input. Add all of the CGO/DGO files into the input folder.
|
||||
```
|
||||
build/jak_disassembler config/jak1_ntsc_black_label.jsonc in_folder/ out_folder/
|
||||
```
|
||||
|
||||
|
||||
Notes
|
||||
--------
|
||||
The `config` folder has settings for the disassembly. Currently Jak 2 and Jak 3 are not as well supported as Jak 1.
|
||||
|
||||
|
||||
# Procedure
|
||||
|
||||
## ObjectFileDB
|
||||
The `ObjectFileDB` tracks unique object files. The games have a lot of duplicated objected files, and object files with the same names but different contents, so `ObjectFileDB` is used to create a unique name for each unique object file. It generates a file named `dgo.txt` which maps its names to the original name and which DGO files it appears in. The `ObjectFileDB` extracts all object files from a DGO file, decompressing the DGO first if needed. (note: Jak 2 demo DGOs do not decompress properly). Each object file has a number of segments, which the game can load to separate places. Sometimes there is just a single "data" segment, and other times there are three segments:
|
||||
|
||||
- `top-level` is executed at the end of the linking process, then discarded and goes in a special temporary heap
|
||||
- `main` is loaded and linked onto the specified heap
|
||||
- `debug` is loaded and linked onto the debug heap
|
||||
|
||||
|
||||
## `ObjectFileDB::process_link_data`
|
||||
This function interprets breaks the object file's data into segments, and processes the link data. The data is stored as a sequence of `LinkedObjectWord`s, which contain extra data from the link. The `LinkedObjectWord`s are stored by segment in a `LinkedObjectFile`, which also contains a list of `Label`s that allow `LinkedObjectWord`s to refer to other `LinkedObjectWord`s. Note that a `Label` can have a byte-offset into a word, which GOAL uses to load non-4-byte-aligned bytes and halfwords, and also to represent a `pair` object.
|
||||
|
||||
## `ObjectFileDB::find_code`
|
||||
This function looks through the `LinkedObjectFile`s and splits each segment into data and code zones.
|
||||
|
||||
The only files with code zones are from object files with three segments, and the code always comes first. The end of the code zone is found by looking for the last GOAL `function` object, then finding the end of this object by looking one word past the last `jr ra` instruction. This assumes that the last function in each segment doesn't have an extra inline assembly `jr ra` somewhere in the middle, but functions with multiple `jr ra`'s are extremely rare (and not generated by the GOAL compiler without the use of inline assembly), so this seems like a safe assumption for now.
|
||||
|
||||
The code zones are scanned for GOAL `function` types, which are in front every GOAL function, and used to create `Functions`. Each `Function` is disassembled into EE Instructions, which also adds `Label`s for branch instructions, and can also contain linking data when appropriate. The final step is to look for instructions which use the `fp` register to reference static data, and insert the apprioriate `Label`s. GOAL uses the following `fp` relative addressing modes:
|
||||
|
||||
- `lw`, `lwc1`, `ld` relative to the `fp` register to load static data.
|
||||
- `daddiu` to create a pointer to fp-relative data within +/- `2^15` bytes
|
||||
- Sequence of `ori`, `daddu` to generate a pointer that reaches within `+2^16` bytes
|
||||
- Sequence of `lui`, `ori`, `daddu` to generate any 32-bit offset from `fp`.
|
||||
|
||||
The last two are only found in very large object files, and GOALDIS doesn't handle these.
|
||||
|
||||
The `fp` register is set with this sequence. The function prologue only sets `fp` if it is needed in the function.
|
||||
|
||||
```
|
||||
;; goal function call, t9 contains the function address
|
||||
jalr ra, t9
|
||||
sll v0, ra, 0
|
||||
|
||||
;; example goal function prologue:
|
||||
daddiu sp, sp, -16
|
||||
sd ra, 0(sp)
|
||||
sd fp, 8(sp)
|
||||
or fp, t9, r0
|
||||
```
|
||||
|
||||
Note: there are a few hacks to avoid generating labels when `fp` is used as a temporary register in inline assembly. Like ignoring stores/loads of `fp` from the stack (kernel does this to suspend resume a thread), or ignoring `fp` when used with the `PEXTLW` function, or totally skipping this step for a single object file in Jak 2 (`effect-control`).
|
||||
|
||||
## `ObjectFileDB::process_labels`
|
||||
This step simply renames labels with `L1`, `L2`, .... It should happen before any custom label naming as it will overwrite all label names.
|
||||
|
||||
## `ObjectFileDB::find_and_write_scripts`
|
||||
Looks for static linked lists and attempts to print them. Doesn't support printing everything, but can print nested lists, strings, numbers, and symbols.
|
||||
|
||||
## `ObjectFileDB::write_object_file_words`
|
||||
Dumps words in each segment like `hexdump`. There's an option to only run this on `v3` object files, which contain data, as opposed to `v2` which are typically large data.
|
||||
|
||||
## `ObjectFileDB::write_disassembly`
|
||||
Like `write_object_file_words`, but code is replaced with disassembly. There's a config option to avoid running this on object files with no functions, as these are usually large data files which are uninteresting to view as a binary dump and slow to dump.
|
||||
|
||||
## Basic Block Finding
|
||||
Look at branch intstructions and their destinations to find all basic blocks. Implemented in `find_blocks_in_function` as part of `analyze_functions`. This works for Jak 1, 2 and 3.
|
||||
|
||||
## Analyze Functions Prologues and Epilogues
|
||||
This will help us find stack variables and make sure that the prologue/epilogue are ignored by the statement generation.
|
||||
|
||||
A "full" prologue looks like this:
|
||||
```
|
||||
daddiu sp, sp, -208
|
||||
sd ra, 0(sp)
|
||||
sd fp, 8(sp)
|
||||
or fp, t9, r0 ;; set fp to the address of this function
|
||||
sq s3, 128(sp)
|
||||
sq s4, 144(sp)
|
||||
sq s5, 160(sp)
|
||||
sq gp, 176(sp)
|
||||
swc1 f26, 192(sp)
|
||||
swc1 f28, 196(sp)
|
||||
swc1 f30, 200(sp)
|
||||
```
|
||||
GOAL will leave out instructions that aren't needed. This prologue is "decoded" into:
|
||||
|
||||
```
|
||||
Total stack usage: 0xd0 bytes
|
||||
$fp set? : yes
|
||||
$ra set? : yes
|
||||
Stack variables : yes, 112 bytes at sp + 16
|
||||
Saved gprs: gp s5 s4 s3
|
||||
Saved fprs: f30 f28 f26
|
||||
```
|
||||
A similar process is done for the epilogue, and it is checked against the prologue.
|
||||
|
||||
The prologue is removed from the first basic block and the epilogue + alignment padding is removed from the last one.
|
||||
|
||||
# Documentation of Planned Steps that are not implemented
|
||||
Currently the focus is to get these working for Jak 1. But it shouldn't be much extra work to support Jak 2/3.
|
||||
|
||||
|
||||
## Guess Function Names (to be implemented)
|
||||
|
||||
When possible, we should guess function names. It's not always possible because GOAL supports anonymous lambda functions, like for example:
|
||||
|
||||
```
|
||||
(lambda ((x int) (y int)) (+ x y))
|
||||
```
|
||||
|
||||
which will generate a GOAL `function` object without a name.
|
||||
|
||||
But these are pretty uncommon, and the majority of GOAL functions are
|
||||
|
||||
- Normal functions, which are stored into a `symbol` with the same name as the function
|
||||
- Methods, which are stored into the method table of their `type` with the `method-set!` function. Sadly we can't get the name of methods, but we can get their ID (to figure out the inheritance hierarchy) and what type they are defined for.
|
||||
- State handlers / behaviors (not yet fully understood)
|
||||
- Virtual state handlers / behaviors (not yet fully understood)
|
||||
|
||||
Currently the state/behavior stuff isn't well understood, or used in the early initialization of the game, so name guessing won't worry about this for now.
|
||||
|
||||
## Guess Types (to be implemented)
|
||||
|
||||
The majority of GOAL types have a compiler-generated `inpsect` method which prints their fields. We should detect these methods in the previous function name guessing step, and then read through them to determine the data layout of the type.
|
||||
|
||||
|
||||
## Control Flow Analysis
|
||||
|
||||
The basic blocks should be built into a graph and annotated with control flow patterns, like `if`, `cond`, `and`, and various loops. To do this, register liveliness will be determined for each instruction.
|
||||
|
||||
## Conversion to Statements
|
||||
|
||||
Instructions (or sequences of instructions that should not be separated) should be converted into `Statement`s, which represent something like `(add! r1 r2 r3)`. The registers should be mapped to variables, using as many variables as possible, as we don't know at this point if a register will be holding the same GOAL variable at different instructions.
|
||||
|
||||
## Type propagation
|
||||
`Variable`s should get types determined by arguments of the function, which should then be propagated to other `Statement`s in the function, and can then refine the argument types of other functions. This process should be repeated until things stop changing.
|
||||
|
||||
## Variable declaration
|
||||
Variables which are actually the same variable will be merged. The point at which variables are first defined/declared will be determined based on liveliness and then expanded to come up with a scope nesting that doesn't cross control flow boundaries.
|
||||
|
||||
|
||||
## Statement -> S-Expression map tree
|
||||
Due to the the simple single pass GOAL compiler design, we build a tree which represents how Statements can be combined to eliminate variables. As an extremely simple example:
|
||||
|
||||
```
|
||||
(set! r1 thing1)
|
||||
(set! r2 thing2)
|
||||
(add-int! r4 r2 r3)
|
||||
(mult-int! r1 r4)
|
||||
```
|
||||
can be collapsed to
|
||||
```
|
||||
(* thing1 (+ thing2 r3))
|
||||
```
|
||||
|
||||
But
|
||||
```
|
||||
(set! r2 thing2)
|
||||
(add-int! r4 r2 r3)
|
||||
(set! r1 thing1)
|
||||
(mult-int! r1 r4)
|
||||
```
|
||||
can be collapsed to
|
||||
```
|
||||
(let ((temp0 (+ thing2 r3)))
|
||||
(+ thing1 temp0)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
and this difference will actually reflect the difference in how the code was originally written! This is a huge advantage over existing decompilers, which will be unable to tell the subtle difference between the two.
|
||||
|
||||
|
||||
## Macro pattern matching
|
||||
Lots of GOAL language features are implemented with macros, so once the s-expression nesting is recovered, we can pattern match to undo macros very precisely.
|
1
decompiler/TypeSystem/GoalFunction.cpp
Normal file
1
decompiler/TypeSystem/GoalFunction.cpp
Normal file
@ -0,0 +1 @@
|
||||
#include "GoalFunction.h"
|
15
decompiler/TypeSystem/GoalFunction.h
Normal file
15
decompiler/TypeSystem/GoalFunction.h
Normal file
@ -0,0 +1,15 @@
|
||||
#ifndef JAK_DISASSEMBLER_GOALFUNCTION_H
|
||||
#define JAK_DISASSEMBLER_GOALFUNCTION_H
|
||||
|
||||
class GoalFunction {
|
||||
public:
|
||||
// enum Kind {
|
||||
// GLOBAL_FUNCTION,
|
||||
// ANON_FUNCTION,
|
||||
// METHOD,
|
||||
// BEHAVIOR,
|
||||
// UNKNOWN
|
||||
// };
|
||||
};
|
||||
|
||||
#endif // JAK_DISASSEMBLER_GOALFUNCTION_H
|
1
decompiler/TypeSystem/GoalSymbol.cpp
Normal file
1
decompiler/TypeSystem/GoalSymbol.cpp
Normal file
@ -0,0 +1 @@
|
||||
#include "GoalSymbol.h"
|
38
decompiler/TypeSystem/GoalSymbol.h
Normal file
38
decompiler/TypeSystem/GoalSymbol.h
Normal file
@ -0,0 +1,38 @@
|
||||
#ifndef JAK_DISASSEMBLER_GOALSYMBOL_H
|
||||
#define JAK_DISASSEMBLER_GOALSYMBOL_H
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include "TypeSpec.h"
|
||||
|
||||
class GoalSymbol {
|
||||
public:
|
||||
GoalSymbol() = default;
|
||||
explicit GoalSymbol(std::string name) : m_name(std::move(name)) {}
|
||||
GoalSymbol(std::string name, TypeSpec ts) : m_name(std::move(name)), m_type(std::move(ts)) {
|
||||
m_has_type_info = true;
|
||||
}
|
||||
|
||||
bool has_type_info() const {
|
||||
return m_has_type_info;
|
||||
}
|
||||
|
||||
void set_type(TypeSpec ts) {
|
||||
if(m_has_type_info) {
|
||||
if(ts != m_type) {
|
||||
printf("symbol %s %s -> %s", m_name.c_str(), m_type.to_string().c_str(), ts.to_string().c_str());
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
m_has_type_info = true;
|
||||
m_type = std::move(ts);
|
||||
}
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
TypeSpec m_type;
|
||||
bool m_has_type_info = false;
|
||||
};
|
||||
|
||||
#endif // JAK_DISASSEMBLER_GOALSYMBOL_H
|
13
decompiler/TypeSystem/GoalType.cpp
Normal file
13
decompiler/TypeSystem/GoalType.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include "GoalType.h"
|
||||
|
||||
void GoalType::set_methods(int n) {
|
||||
if (m_method_count_set) {
|
||||
if (m_method_count != n) {
|
||||
printf("Type %s had %d methods, set_methods tried to change it to %d\n", m_name.c_str(),
|
||||
m_method_count, n);
|
||||
}
|
||||
} else {
|
||||
m_method_count = n;
|
||||
m_method_count_set = true;
|
||||
}
|
||||
}
|
27
decompiler/TypeSystem/GoalType.h
Normal file
27
decompiler/TypeSystem/GoalType.h
Normal file
@ -0,0 +1,27 @@
|
||||
#ifndef JAK_DISASSEMBLER_GOALTYPE_H
|
||||
#define JAK_DISASSEMBLER_GOALTYPE_H
|
||||
|
||||
#include <string>
|
||||
|
||||
class GoalType {
|
||||
public:
|
||||
GoalType() = default;
|
||||
GoalType(std::string name) : m_name(std::move(name)) { }
|
||||
bool has_info() const {
|
||||
return m_has_info;
|
||||
}
|
||||
|
||||
bool has_method_count() const {
|
||||
return m_method_count_set;
|
||||
}
|
||||
|
||||
void set_methods(int n);
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
bool m_has_info = false;
|
||||
bool m_method_count_set = false;
|
||||
int m_method_count = -1;
|
||||
};
|
||||
|
||||
#endif // JAK_DISASSEMBLER_GOALTYPE_H
|
98
decompiler/TypeSystem/TypeInfo.cpp
Normal file
98
decompiler/TypeSystem/TypeInfo.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
#include "TypeInfo.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace {
|
||||
TypeInfo gTypeInfo;
|
||||
}
|
||||
|
||||
TypeInfo::TypeInfo() {
|
||||
GoalType type_type("type");
|
||||
m_types["type"] = type_type;
|
||||
GoalSymbol type_symbol("type");
|
||||
m_symbols["type"] = type_symbol;
|
||||
}
|
||||
|
||||
TypeInfo& get_type_info() {
|
||||
return gTypeInfo;
|
||||
}
|
||||
|
||||
std::string TypeInfo::get_summary() {
|
||||
int total_symbols = 0;
|
||||
int syms_with_type_info = 0;
|
||||
for (const auto& kv : m_symbols) {
|
||||
total_symbols++;
|
||||
if (kv.second.has_type_info()) {
|
||||
syms_with_type_info++;
|
||||
}
|
||||
}
|
||||
|
||||
int total_types = 0;
|
||||
int types_with_info = 0;
|
||||
int types_with_method_count = 0;
|
||||
for (const auto& kv : m_types) {
|
||||
total_types++;
|
||||
if (kv.second.has_info()) {
|
||||
types_with_info++;
|
||||
}
|
||||
if (kv.second.has_method_count()) {
|
||||
types_with_method_count++;
|
||||
}
|
||||
}
|
||||
|
||||
char buffer[1024];
|
||||
sprintf(buffer,
|
||||
"TypeInfo Summary\n"
|
||||
" Total Symbols: %d\n"
|
||||
" with type info: %d (%.2f%%)\n"
|
||||
" Total Types: %d\n"
|
||||
" with info: %d (%.2f%%)\n"
|
||||
" with method count: %d (%.2f%%)\n",
|
||||
total_symbols, syms_with_type_info, 100.f * float(syms_with_type_info) / float(total_symbols),
|
||||
total_types, types_with_info, 100.f * float(types_with_info) / float(total_types),
|
||||
types_with_method_count, 100.f * float(types_with_method_count) / float(total_types));
|
||||
|
||||
return {buffer};
|
||||
}
|
||||
|
||||
/*!
|
||||
* inform TypeInfo that there is a symbol with this name.
|
||||
* Provides no type info - if some is already known there is no change.
|
||||
*/
|
||||
void TypeInfo::inform_symbol_with_no_type_info(const std::string& name) {
|
||||
if (m_symbols.find(name) == m_symbols.end()) {
|
||||
// only add it if we haven't seen this already.
|
||||
GoalSymbol sym(name);
|
||||
m_symbols[name] = sym;
|
||||
}
|
||||
}
|
||||
|
||||
void TypeInfo::inform_symbol(const std::string &name, TypeSpec type) {
|
||||
inform_symbol_with_no_type_info(name);
|
||||
m_symbols.at(name).set_type(std::move(type));
|
||||
}
|
||||
|
||||
void TypeInfo::inform_type(const std::string& name) {
|
||||
if (m_types.find(name) == m_types.end()) {
|
||||
GoalType typ(name);
|
||||
m_types[name] = typ;
|
||||
}
|
||||
inform_symbol(name, TypeSpec("type"));
|
||||
}
|
||||
|
||||
void TypeInfo::inform_type_method_count(const std::string& name, int methods) {
|
||||
// create type and symbol
|
||||
inform_type(name);
|
||||
m_types.at(name).set_methods(methods);
|
||||
}
|
||||
|
||||
std::string TypeInfo::get_all_symbols_debug() {
|
||||
std::string result = "const char* all_syms[" + std::to_string(m_symbols.size()) + "] = {";
|
||||
for(auto& x : m_symbols) {
|
||||
result += "\"" + x.first + "\",";
|
||||
}
|
||||
if(!result.empty()) {
|
||||
result.pop_back();
|
||||
}
|
||||
return result + "};";
|
||||
}
|
30
decompiler/TypeSystem/TypeInfo.h
Normal file
30
decompiler/TypeSystem/TypeInfo.h
Normal file
@ -0,0 +1,30 @@
|
||||
#ifndef JAK_DISASSEMBLER_TYPEINFO_H
|
||||
#define JAK_DISASSEMBLER_TYPEINFO_H
|
||||
|
||||
#include <unordered_map>
|
||||
#include "GoalType.h"
|
||||
#include "GoalFunction.h"
|
||||
#include "GoalSymbol.h"
|
||||
|
||||
class TypeInfo {
|
||||
public:
|
||||
TypeInfo();
|
||||
|
||||
void inform_symbol(const std::string& name, TypeSpec type);
|
||||
void inform_symbol_with_no_type_info(const std::string& name);
|
||||
void inform_type(const std::string& name);
|
||||
void inform_type_method_count(const std::string& name, int methods);
|
||||
|
||||
std::string get_summary();
|
||||
std::string get_all_symbols_debug();
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, GoalType> m_types;
|
||||
std::unordered_map<std::string, GoalFunction> m_global_functions;
|
||||
std::unordered_map<std::string, GoalSymbol> m_symbols;
|
||||
};
|
||||
|
||||
TypeInfo& get_type_info();
|
||||
void init_type_info();
|
||||
|
||||
#endif // JAK_DISASSEMBLER_TYPEINFO_H
|
51
decompiler/TypeSystem/TypeSpec.cpp
Normal file
51
decompiler/TypeSystem/TypeSpec.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
#include "TypeSpec.h"
|
||||
|
||||
std::string TypeSpec::to_string() const {
|
||||
if (m_args.empty()) {
|
||||
return m_base_type;
|
||||
} else {
|
||||
std::string result = "(";
|
||||
result += m_base_type;
|
||||
for (const auto& x : m_args) {
|
||||
result += " ";
|
||||
result += x.to_string();
|
||||
}
|
||||
result += ")";
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Form> TypeSpec::to_form() const {
|
||||
if (m_args.empty()) {
|
||||
return toForm(m_base_type);
|
||||
} else {
|
||||
std::vector<std::shared_ptr<Form>> all;
|
||||
all.push_back(toForm(m_base_type));
|
||||
for (const auto& x : m_args) {
|
||||
all.push_back(x.to_form());
|
||||
}
|
||||
return buildList(all);
|
||||
}
|
||||
}
|
||||
|
||||
bool TypeSpec::operator==(const TypeSpec& other) const {
|
||||
if (m_base_type != other.m_base_type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_args.size() != other.m_args.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); i++) {
|
||||
if (m_args[i] != other.m_args[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TypeSpec::operator!=(const TypeSpec& other) const {
|
||||
return !(*this == other);
|
||||
}
|
25
decompiler/TypeSystem/TypeSpec.h
Normal file
25
decompiler/TypeSystem/TypeSpec.h
Normal file
@ -0,0 +1,25 @@
|
||||
#ifndef JAK_DISASSEMBLER_TYPESPEC_H
|
||||
#define JAK_DISASSEMBLER_TYPESPEC_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "decompiler/util/LispPrint.h"
|
||||
|
||||
class TypeSpec {
|
||||
public:
|
||||
TypeSpec() = default;
|
||||
explicit TypeSpec(std::string base_type) : m_base_type(std::move(base_type)) { }
|
||||
TypeSpec(std::string base_type, std::vector<TypeSpec> args) : m_base_type(std::move(base_type)), m_args(std::move(args)) { }
|
||||
|
||||
std::string to_string() const;
|
||||
std::shared_ptr<Form> to_form() const;
|
||||
|
||||
bool operator==(const TypeSpec& other) const;
|
||||
bool operator!=(const TypeSpec& other) const;
|
||||
|
||||
private:
|
||||
std::string m_base_type;
|
||||
std::vector<TypeSpec> m_args;
|
||||
};
|
||||
|
||||
#endif // JAK_DISASSEMBLER_TYPESPEC_H
|
32
decompiler/config.cpp
Normal file
32
decompiler/config.cpp
Normal file
@ -0,0 +1,32 @@
|
||||
#include "config.h"
|
||||
#include "third-party/json.hpp"
|
||||
#include "util/FileIO.h"
|
||||
|
||||
Config gConfig;
|
||||
|
||||
Config& get_config() {
|
||||
return gConfig;
|
||||
}
|
||||
|
||||
void set_config(const std::string& path_to_config_file) {
|
||||
auto config_str = read_text_file(path_to_config_file);
|
||||
// to ignore comments in json, which may be useful
|
||||
auto cfg = nlohmann::json::parse(config_str, nullptr, true, true);
|
||||
|
||||
gConfig.game_version = cfg.at("game_version").get<int>();
|
||||
gConfig.dgo_names = cfg.at("dgo_names").get<std::vector<std::string>>();
|
||||
gConfig.write_disassembly = cfg.at("write_disassembly").get<bool>();
|
||||
gConfig.write_hexdump = cfg.at("write_hexdump").get<bool>();
|
||||
gConfig.write_scripts = cfg.at("write_scripts").get<bool>();
|
||||
gConfig.write_hexdump_on_v3_only = cfg.at("write_hexdump_on_v3_only").get<bool>();
|
||||
gConfig.disassemble_objects_without_functions =
|
||||
cfg.at("disassemble_objects_without_functions").get<bool>();
|
||||
gConfig.find_basic_blocks = cfg.at("find_basic_blocks").get<bool>();
|
||||
gConfig.write_hex_near_instructions = cfg.at("write_hex_near_instructions").get<bool>();
|
||||
|
||||
std::vector<std::string> asm_functions_by_name =
|
||||
cfg.at("asm_functions_by_name").get<std::vector<std::string>>();
|
||||
for (const auto& x : asm_functions_by_name) {
|
||||
gConfig.asm_functions_by_name.insert(x);
|
||||
}
|
||||
}
|
25
decompiler/config.h
Normal file
25
decompiler/config.h
Normal file
@ -0,0 +1,25 @@
|
||||
#ifndef JAK2_DISASSEMBLER_CONFIG_H
|
||||
#define JAK2_DISASSEMBLER_CONFIG_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
|
||||
struct Config {
|
||||
int game_version = -1;
|
||||
std::vector<std::string> dgo_names;
|
||||
bool write_disassembly = false;
|
||||
bool write_hexdump = false;
|
||||
bool write_scripts = false;
|
||||
bool write_hexdump_on_v3_only = false;
|
||||
bool disassemble_objects_without_functions = false;
|
||||
bool find_basic_blocks = false;
|
||||
bool write_hex_near_instructions = false;
|
||||
std::unordered_set<std::string> asm_functions_by_name;
|
||||
// ...
|
||||
};
|
||||
|
||||
Config& get_config();
|
||||
void set_config(const std::string& path_to_config_file);
|
||||
|
||||
#endif // JAK2_DISASSEMBLER_CONFIG_H
|
37
decompiler/config/jak1_ntsc_black_label.jsonc
Normal file
37
decompiler/config/jak1_ntsc_black_label.jsonc
Normal file
@ -0,0 +1,37 @@
|
||||
|
||||
|
||||
{
|
||||
"game_version":1,
|
||||
// the order here matters. KERNEL and GAME should go first
|
||||
"dgo_names":["CGO/KERNEL.CGO"
|
||||
, "CGO/GAME.CGO", "CGO/ENGINE.CGO"
|
||||
, "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO",
|
||||
"DGO/FIN.DGO", "DGO/INT.DGO", "DGO/JUB.DGO", "DGO/JUN.DGO", "CGO/JUNGLE.CGO", "CGO/L1.CGO", "DGO/FIC.DGO",
|
||||
"DGO/LAV.DGO", "DGO/MAI.DGO", "CGO/MAINCAVE.CGO", "DGO/MIS.DGO", "DGO/OGR.DGO", "CGO/RACERP.CGO", "DGO/ROB.DGO", "DGO/ROL.DGO",
|
||||
"DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO",
|
||||
"DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO"],
|
||||
|
||||
"write_disassembly":true,
|
||||
"write_hex_near_instructions":false,
|
||||
// if false, skips disassembling object files without functions, as these are usually large and not interesting yet.
|
||||
"disassemble_objects_without_functions":false,
|
||||
|
||||
// to write out data of each object file
|
||||
"write_hexdump":false,
|
||||
// to write out hexdump on the v3 only, to avoid the huge level data files
|
||||
"write_hexdump_on_v3_only":true,
|
||||
|
||||
// to write out "scripts", which are currently just all the linked lists found
|
||||
"write_scripts":false,
|
||||
|
||||
// Experimental Stuff
|
||||
"find_basic_blocks":true,
|
||||
|
||||
"asm_functions_by_name":[
|
||||
// gcommon
|
||||
"ash", "abs", "min", "max", "collide-do-primitives", "draw-bones-check-longest-edge-asm",
|
||||
"sp-launch-particles-var", "(method 15 collide-shape-prim-mesh)", "(method 15 collide-shape-prim-sphere)",
|
||||
"(method 45 collide-shape)", "cam-layout-save-cam-trans", "kernel-copy-function", "dma-sync-hang", "generic-no-light-dproc", "dma-sync-fast", "bsp-camera-asm",
|
||||
"generic-none-dma-wait", "unpack-comp-rle", "level-remap-texture", "(method 10 collide-edge-hold-list)"
|
||||
]
|
||||
}
|
43
decompiler/config/jak2_ntsc_v1.jsonc
Normal file
43
decompiler/config/jak2_ntsc_v1.jsonc
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"game_version":2,
|
||||
|
||||
"dgo_names":["ART.CGO", "ATE.DGO", "ATO.DGO", "CAB.DGO", "CAP.DGO", "CAS.DGO", "CASCITY.DGO", "CASEXT.DGO",
|
||||
"CFA.DGO", "CFB.DGO", "CGA.DGO", "CGB.DGO", "CGC.DGO", "CIA.DGO", "CIB.DGO", "CMA.DGO",
|
||||
"CMB.DGO", "COA.DGO", "COB.DGO", "COMMON.CGO", "CPA.DGO", "CPO.DGO", "CTA.DGO", "CTB.DGO",
|
||||
"CTC.DGO", "CTYASHA.DGO", "CTYKORA.DGO", "CWI.DGO", "D3A.DGO", "D3B.DGO", "DEMO.DGO", "DG1.DGO",
|
||||
"DMI.DGO", "DRB.DGO", "DRI.DGO", "DRILLMTN.DGO", "ENGINE.CGO", "FDA.DGO", "FDB.DGO", "FEA.DGO",
|
||||
"FEB.DGO", "FOB.DGO", "FOR.DGO", "FORDUMPC.DGO", "FORDUMPD.DGO", "FRA.DGO", "FRB.DGO", "GAME.CGO",
|
||||
"GARAGE.DGO", "GGA.DGO", "HALFPIPE.DGO", "HIDEOUT.DGO", "HIPHOG.DGO", "INTROCST.DGO", "KERNEL.CGO", "KIOSK.DGO",
|
||||
"LASHGRD.DGO", "LASHTHRN.DGO", "LBBUSH.DGO", "LBOMBBOT.DGO", "LBRNERMK.DGO", "LCGUARD.DGO", "LCITYLOW.DGO", "LDJAKBRN.DGO",
|
||||
"LERBRNGD.DGO", "LERLCHAL.DGO", "LERLTESS.DGO", "LERROL.DGO", "LGARCSTA.DGO", "LGUARD.DGO", "LHELLDOG.DGO", "LHIPOUT.DGO",
|
||||
"LINTCSTB.DGO", "LJAKDAX.DGO", "LJKDXASH.DGO", "LKEIRIFT.DGO", "LKIDDOGE.DGO", "LMEETBRT.DGO", "LOUTCSTB.DGO", "LPACKAGE.DGO",
|
||||
"LPORTRUN.DGO", "LPOWER.DGO", "LPROTECT.DGO", "LPRSNCST.DGO", "LPRTRACE.DGO", "LRACEBB.DGO", "LRACEBF.DGO", "LRACECB.DGO",
|
||||
"LRACECF.DGO", "LRACEDB.DGO", "LRACEDF.DGO", "LRACELIT.DGO", "LSACK.DGO", "LSAMERGD.DGO", "LSHUTTLE.DGO", "LSMYSBRT.DGO",
|
||||
"LTENTOB.DGO", "LTENTOUT.DGO", "LTESS.DGO", "LTHRNOUT.DGO", "LTRNKRKD.DGO", "LTRNTESS.DGO", "LTRNYSAM.DGO", "LWHACK.DGO",
|
||||
"LWIDEA.DGO", "LWIDEB.DGO", "LWIDEC.DGO", "LWIDESTA.DGO", "LYSAMSAM.DGO", "LYSKDCD.DGO", "MCN.DGO", "MTN.DGO",
|
||||
"MTX.DGO", "NEB.DGO", "NES.DGO", "NESTT.DGO", "ONINTENT.DGO", "ORACLE.DGO", "OUTROCST.DGO", "PAC.DGO",
|
||||
"PAE.DGO", "PALBOSS.DGO", "PALOUT.DGO", "PAR.DGO", "PAS.DGO", "PORTWALL.DGO", "PRI.DGO", "RUI.DGO",
|
||||
"SAG.DGO", "SEB.DGO", "SEW.DGO", "SKA.DGO", "STA.DGO", "STADBLMP.DGO", "STB.DGO", "STC.DGO",
|
||||
"STD.DGO", "STR.DGO", "SWB.DGO", "SWE.DGO", "TBO.DGO", "THR.DGO", "TITLE.DGO", "TOA.DGO",
|
||||
"TOB.DGO", "TOC.DGO", "TOD.DGO", "TOE.DGO", "TOMBEXT.DGO", "UNB.DGO", "UND.DGO", "VI1.DGO",
|
||||
"VIN.DGO"],
|
||||
|
||||
// to write out disassembled functions in .func files
|
||||
"write_disassembly":true,
|
||||
"write_hex_near_instructions":false,
|
||||
// if false, skips disassembling object files without functions, as these are usually large and not interesting yet.
|
||||
"disassemble_objects_without_functions":false,
|
||||
|
||||
// to write out data of each object file
|
||||
"write_hexdump":false,
|
||||
// to write out hexdump on the v3 only, to avoid the huge level data files
|
||||
"write_hexdump_on_v3_only":true,
|
||||
|
||||
// to write out "scripts", which are currently just all the linked lists found
|
||||
"write_scripts":true,
|
||||
|
||||
|
||||
|
||||
// Experimental Stuff
|
||||
"find_basic_blocks":true
|
||||
}
|
56
decompiler/config/jak3_ntsc.jsonc
Normal file
56
decompiler/config/jak3_ntsc.jsonc
Normal file
@ -0,0 +1,56 @@
|
||||
{
|
||||
"game_version":3,
|
||||
"dgo_names":["ARENACST.DGO", "ART.CGO", "CFA.DGO", "CFB.DGO", "CGB.DGO", "CIA.DGO", "CIB.DGO", "CITYCAST.DGO",
|
||||
"COMBA.DGO", "COMBB.DGO", "COMBC.DGO", "COMBD.DGO", "COMBE.DGO", "COMBN.DGO", "COMBX.DGO", "COMMON.CGO",
|
||||
"CPO.DGO", "CTA.DGO", "CTB.DGO", "CTC.DGO", "CTYCARA.DGO", "CTYCARB.DGO", "CTYCARC.DGO", "CTYCARKG.DGO",
|
||||
"CTYPEPA.DGO", "CTYPEPB.DGO", "CTYPEPC.DGO", "CTYPESA.DGO", "CTYPESB.DGO", "CTYPESC.DGO", "CWI.DGO", "DESA.DGO",
|
||||
"DESB.DGO", "DESBATTL.DGO", "DESBCST.DGO", "DESBOSS1.DGO", "DESBOSS2.DGO", "DESC.DGO", "DESCHASE.DGO", "DESD.DGO",
|
||||
"DESE.DGO", "DESERROL.DGO", "DESF.DGO", "DESG.DGO", "DESH.DGO", "DESHOVER.DGO", "DESHUNT.DGO", "DESINTER.DGO",
|
||||
"DESJUMP.DGO", "DESLIZ.DGO", "DESOASIS.DGO", "DESRACE1.DGO", "DESRACE2.DGO", "DESRALLY.DGO", "DESRESC.DGO", "DESRESCC.DGO",
|
||||
"DESRESCG.DGO", "DESTRACK.DGO", "DESW.DGO", "DST.DGO", "ENGINE.CGO", "FACB.DGO", "FACC.DGO", "FACD.DGO",
|
||||
"FACTORYA.DGO", "FREECAST.DGO", "FREEHQ.DGO", "FRSTA.DGO", "FRSTB.DGO", "FRSTX.DGO", "GAME.CGO", "GGA.DGO",
|
||||
"GRIDCST.DGO", "GUNGAME1.DGO", "GUNGAME2.DGO", "HALFPIPE.DGO", "HGA.DGO", "HGB.DGO", "HHG.DGO", "INTPALRF.DGO",
|
||||
"INTROCST.DGO", "INTTITLE.DGO", "IPF.DGO", "KERNEL.CGO", "LASHELIN.DGO", "LBBRING1.DGO", "LBBRING2.DGO", "LBBRING3.DGO",
|
||||
"LBBRING4.DGO", "LBBRING5.DGO", "LBBRING6.DGO", "LBBSDRP1.DGO", "LBBSDRP2.DGO", "LBBSDRP3.DGO", "LBBSPID.DGO", "LBBSPIRT.DGO",
|
||||
"LBBSPRT2.DGO", "LBBSPRT3.DGO", "LBBTCHA1.DGO", "LBBTCHA2.DGO", "LBBTCHA3.DGO", "LBIPED.DGO", "LBLOWCST.DGO", "LBLOWTKG.DGO",
|
||||
"LBLOWTMH.DGO", "LBOMBBOT.DGO", "LCITYSML.DGO", "LCTYASS.DGO", "LCTYBLOW.DGO", "LCTYDEST.DGO", "LCTYHIJK.DGO", "LCTYPALT.DGO",
|
||||
"LCTYPATK.DGO", "LCTYPROT.DGO", "LCTYSNPR.DGO", "LDAMKLEV.DGO", "LDAMPECK.DGO", "LDAMPKSM.DGO", "LDAMSIG.DGO", "LDAX.DGO",
|
||||
"LDESGCST.DGO", "LDMPCKGN.DGO", "LERROL.DGO", "LFACB.DGO", "LFACCAR.DGO", "LFACCITY.DGO", "LFACO.DGO", "LFACRM1.DGO",
|
||||
"LFACRM2.DGO", "LFACTORY.DGO", "LFORM.DGO", "LFORP.DGO", "LFORRING.DGO", "LFREEOUT.DGO", "LGUNNORM.DGO", "LGUNRNC.DGO",
|
||||
"LJAK.DGO", "LJAKC.DGO", "LJAKCKLV.DGO", "LJAKKLEV.DGO", "LJAKNDAX.DGO", "LJAKSIG.DGO", "LJINX.DGO", "LJKCDMKL.DGO",
|
||||
"LJKDMPK.DGO", "LJKDXVIN.DGO", "LJKFEET.DGO", "LJNDKLEV.DGO", "LKEIRA.DGO", "LKLEEVER.DGO", "LMECH.DGO", "LMHCA.DGO",
|
||||
"LMHCB.DGO", "LNSTCST.DGO", "LNSTOA.DGO", "LNSTOBB.DGO", "LNSTOBC.DGO", "LONINSIM.DGO", "LOUTRO.DGO", "LOUTRO2.DGO",
|
||||
"LOUTRO3.DGO", "LPATK.DGO", "LPATKCS.DGO", "LPRECC.DGO", "LPRENME.DGO", "LPTRL.DGO", "LSAMOS.DGO", "LSEEMWCA.DGO",
|
||||
"LSIG.DGO", "LSIGJAKC.DGO", "LSIGKLV.DGO", "LSNKWHLS.DGO", "LTNFXHIP.DGO", "LTNJXHIP.DGO", "LTORN.DGO", "LTORNJNX.DGO",
|
||||
"LTORNSAM.DGO", "LTOWA.DGO", "LTOWB.DGO", "LTOWCITY.DGO", "LTRTWHLS.DGO", "LVINCST.DGO", "LWASBBV.DGO", "LWASSIG.DGO",
|
||||
"LWLANDM.DGO", "LWSTDPCK.DGO", "MHCA.DGO", "MHCB.DGO", "MHCTYCST.DGO", "MIA.DGO", "MIB.DGO", "MIC.DGO",
|
||||
"MINED.DGO", "MINEE.DGO", "MUSEUM.DGO", "MUSEUM2.DGO", "MUSEUM3.DGO", "MUSEUM3B.DGO", "MUSEUM4.DGO", "MUSEUM4B.DGO",
|
||||
"NSA.DGO", "NSB.DGO", "OASISCST.DGO", "ONINTENT.DGO", "OUTCAST3.DGO", "OUTROCST.DGO", "POWERGD.DGO", "PRECA.DGO",
|
||||
"PRECB.DGO", "PRECC.DGO", "PRECD.DGO", "RAILA.DGO", "RAILB.DGO", "RAILB2.DGO", "RAILC.DGO", "RAILCST.DGO",
|
||||
"RAILD.DGO", "RAILE.DGO", "RAILF.DGO", "RAILX.DGO", "RBCT.DGO", "RUBA.DGO", "RUBA2.DGO", "RUBB.DGO",
|
||||
"RUBC.DGO", "SEA.DGO", "SEB.DGO", "SEC.DGO", "SED.DGO", "SEE.DGO", "SEF.DGO", "SEG.DGO",
|
||||
"SEH.DGO", "SEI.DGO", "SEJ.DGO", "SEK.DGO", "SEL.DGO", "SEM.DGO", "SEN.DGO", "SEO.DGO",
|
||||
"SLUMBSET.DGO", "STA.DGO", "STAA.DGO", "STB.DGO", "TEMA.DGO", "TEMB.DGO", "TEMC.DGO", "TEMD.DGO",
|
||||
"TEMP.DGO", "TEMPLEE.DGO", "TEMX.DGO", "TITLE.DGO", "TOWB.DGO", "TOWERA.DGO", "TOWERC.DGO", "TOWERCST.DGO",
|
||||
"VIN.DGO", "VOCA.DGO", "VOCX.DGO", "WARPCAST.DGO", "WASALL.DGO", "WASCAST.DGO", "WASCHASE.DGO", "WASDEFEN.DGO",
|
||||
"WASLEAPR.DGO", "WASPALA.DGO", "WASPGAME.DGO", "WASSEEM.DGO", "WASSTADA.DGO", "WASSTADB.DGO", "WASSTADC.DGO", "WCA.DGO",
|
||||
"WCASEEM.DGO", "WCB.DGO", "WIN.DGO", "WSD.DGO", "WWD.DGO"],
|
||||
|
||||
// to write out disassembled functions in .func files
|
||||
"write_disassembly":true,
|
||||
"write_hex_near_instructions":false,
|
||||
// if false, skips disassembling object files without functions, as these are usually large and not interesting yet.
|
||||
"disassemble_objects_without_functions":false,
|
||||
|
||||
// to write out data of each object file
|
||||
"write_hexdump":false,
|
||||
// to write out hexdump on the v3 only, to avoid the huge level data files
|
||||
"write_hexdump_on_v3_only":true,
|
||||
|
||||
// to write out "scripts", which are currently just all the linked lists found
|
||||
"write_scripts":true,
|
||||
|
||||
|
||||
// Experimental Stuff
|
||||
"find_basic_blocks":true
|
||||
}
|
53
decompiler/main.cpp
Normal file
53
decompiler/main.cpp
Normal file
@ -0,0 +1,53 @@
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "ObjectFile/ObjectFileDB.h"
|
||||
#include "config.h"
|
||||
#include "util/FileIO.h"
|
||||
#include "TypeSystem/TypeInfo.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
printf("Jak Disassembler\n");
|
||||
init_crc();
|
||||
init_opcode_info();
|
||||
|
||||
if (argc != 4) {
|
||||
printf("usage: jak_disassembler <config_file> <in_folder> <out_folder>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
set_config(argv[1]);
|
||||
std::string in_folder = argv[2];
|
||||
std::string out_folder = argv[3];
|
||||
|
||||
std::vector<std::string> dgos;
|
||||
for (const auto& dgo_name : get_config().dgo_names) {
|
||||
dgos.push_back(combine_path(in_folder, dgo_name));
|
||||
}
|
||||
|
||||
ObjectFileDB db(dgos);
|
||||
write_text_file(combine_path(out_folder, "dgo.txt"), db.generate_dgo_listing());
|
||||
|
||||
db.process_link_data();
|
||||
db.find_code();
|
||||
db.process_labels();
|
||||
|
||||
if (get_config().write_scripts) {
|
||||
db.find_and_write_scripts(out_folder);
|
||||
}
|
||||
|
||||
if (get_config().write_hexdump) {
|
||||
db.write_object_file_words(out_folder, get_config().write_hexdump_on_v3_only);
|
||||
}
|
||||
|
||||
db.analyze_functions();
|
||||
|
||||
if (get_config().write_disassembly) {
|
||||
db.write_disassembly(out_folder, get_config().disassemble_objects_without_functions);
|
||||
}
|
||||
|
||||
printf("%s\n", get_type_info().get_summary().c_str());
|
||||
// printf("%d\n", InstructionKind::EE_OP_MAX);
|
||||
// printf("%s\n", get_type_info().get_all_symbols_debug().c_str());
|
||||
return 0;
|
||||
}
|
28
decompiler/scripts/create_dgo_name_list.py
Executable file
28
decompiler/scripts/create_dgo_name_list.py
Executable file
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
|
||||
# Create a dgo_names = ["...."] json config entry text for a folder of DGOs.
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(dest='folder', help='folder containing dgos')
|
||||
args = parser.parse_args()
|
||||
files = sorted([os.path.basename(x) for x in glob.glob(os.path.join(args.folder, "*.*GO"))])
|
||||
dgo_names = "\"dgo_names\":["
|
||||
count = 0
|
||||
for file in files:
|
||||
dgo_names += "\"" + file + "\", "
|
||||
count += 1
|
||||
if count == 8:
|
||||
count = 0
|
||||
dgo_names += "\n "
|
||||
|
||||
dgo_names = dgo_names[:-2] # remove last ", "
|
||||
dgo_names += "]\n"
|
||||
print(dgo_names)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
48
decompiler/util/BinaryReader.h
Normal file
48
decompiler/util/BinaryReader.h
Normal file
@ -0,0 +1,48 @@
|
||||
#ifndef JAK_V2_BINARYREADER_H
|
||||
#define JAK_V2_BINARYREADER_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
class BinaryReader {
|
||||
public:
|
||||
BinaryReader(uint8_t* _buffer, uint32_t _size) : buffer(_buffer), size(_size) {
|
||||
|
||||
}
|
||||
|
||||
explicit BinaryReader(std::vector<uint8_t>& _buffer) : buffer((uint8_t*)_buffer.data()), size(_buffer.size()) { }
|
||||
|
||||
template<typename T>
|
||||
T read() {
|
||||
assert(seek + sizeof(T) <= size);
|
||||
T& obj = *(T*)(buffer + seek);
|
||||
seek += sizeof(T);
|
||||
return obj;
|
||||
}
|
||||
|
||||
void ffwd(int amount) {
|
||||
seek += amount;
|
||||
assert(seek <= size);
|
||||
}
|
||||
|
||||
uint32_t bytes_left() const {
|
||||
return size - seek;
|
||||
}
|
||||
|
||||
uint8_t* here() {
|
||||
return buffer + seek;
|
||||
}
|
||||
|
||||
uint32_t get_seek() {
|
||||
return seek;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t* buffer;
|
||||
uint32_t size;
|
||||
uint32_t seek = 0;
|
||||
};
|
||||
|
||||
|
||||
#endif //JAK_V2_BINARYREADER_H
|
82
decompiler/util/FileIO.cpp
Normal file
82
decompiler/util/FileIO.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
#include "FileIO.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
|
||||
std::string read_text_file(const std::string& path) {
|
||||
std::ifstream file(path);
|
||||
std::stringstream ss;
|
||||
ss << file.rdbuf();
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string combine_path(const std::string& parent, const std::string& child) {
|
||||
return parent + "/" + child;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> read_binary_file(const std::string& filename) {
|
||||
auto fp = fopen(filename.c_str(), "rb");
|
||||
if(!fp) throw std::runtime_error("File " + filename + " cannot be opened");
|
||||
fseek(fp, 0, SEEK_END);
|
||||
auto len = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
std::vector<uint8_t> data;
|
||||
data.resize(len);
|
||||
|
||||
if(fread(data.data(), len, 1, fp) != 1) {
|
||||
throw std::runtime_error("File " + filename + " cannot be read");
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
std::string base_name(const std::string& filename) {
|
||||
size_t pos = 0;
|
||||
assert(!filename.empty());
|
||||
for(size_t i = filename.size() - 1; i-- > 0;) {
|
||||
if(filename.at(i) == '/') {
|
||||
pos = (i + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return filename.substr(pos);
|
||||
}
|
||||
|
||||
static bool sInitCrc = false;
|
||||
static uint32_t crc_table[0x100];
|
||||
|
||||
void init_crc() {
|
||||
for (uint32_t i = 0; i < 0x100; i++) {
|
||||
uint32_t n = i << 24u;
|
||||
for (uint32_t j = 0; j < 8; j++)
|
||||
n = n & 0x80000000 ? (n << 1u) ^ 0x04c11db7u : (n << 1u);
|
||||
crc_table[i] = n;
|
||||
}
|
||||
sInitCrc = true;
|
||||
}
|
||||
|
||||
uint32_t crc32(const uint8_t* data, size_t size) {
|
||||
assert(sInitCrc);
|
||||
uint32_t crc = 0;
|
||||
for (size_t i = size; i != 0; i--, data++) {
|
||||
crc = crc_table[crc >> 24u] ^ ((crc << 8u) | *data);
|
||||
}
|
||||
return ~crc;
|
||||
}
|
||||
|
||||
|
||||
uint32_t crc32(const std::vector<uint8_t>& data) {
|
||||
return crc32(data.data(), data.size());
|
||||
}
|
||||
|
||||
void write_text_file(const std::string& file_name, const std::string& text) {
|
||||
FILE* fp = fopen(file_name.c_str(), "w");
|
||||
if(!fp) {
|
||||
printf("Failed to fopen %s\n", file_name.c_str());
|
||||
throw std::runtime_error("Failed to open file");
|
||||
}
|
||||
fprintf(fp, "%s\n", text.c_str());
|
||||
fclose(fp);
|
||||
}
|
17
decompiler/util/FileIO.h
Normal file
17
decompiler/util/FileIO.h
Normal file
@ -0,0 +1,17 @@
|
||||
#ifndef JAK_V2_FILEIO_H
|
||||
#define JAK_V2_FILEIO_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
std::string read_text_file(const std::string& path);
|
||||
std::string combine_path(const std::string& parent, const std::string& child);
|
||||
std::vector<uint8_t> read_binary_file(const std::string& filename);
|
||||
std::string base_name(const std::string& filename);
|
||||
void write_text_file(const std::string& file_name, const std::string& text);
|
||||
|
||||
void init_crc();
|
||||
uint32_t crc32(const uint8_t* data, size_t size);
|
||||
uint32_t crc32(const std::vector<uint8_t>& data);
|
||||
|
||||
#endif //JAK_V2_FILEIO_H
|
514
decompiler/util/LispPrint.cpp
Normal file
514
decompiler/util/LispPrint.cpp
Normal file
@ -0,0 +1,514 @@
|
||||
#include "LispPrint.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
//////// HACK - symbol table now looks up by string, which makes it really stupid and store
|
||||
// all strings twice.
|
||||
// should probably just remove it
|
||||
|
||||
/*!
|
||||
* String interning
|
||||
*/
|
||||
std::string* SymbolTable::intern(const std::string& str) {
|
||||
if (map.find(str) == map.end()) {
|
||||
auto* new_string = new std::string(str);
|
||||
map[str] = new_string;
|
||||
return new_string;
|
||||
} else {
|
||||
return map[str];
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Global interned string table
|
||||
*/
|
||||
SymbolTable gSymbolTable;
|
||||
|
||||
SymbolTable::SymbolTable() {
|
||||
empty_pair = std::make_shared<Form>();
|
||||
empty_pair->kind = FormKind::EMPTY_LIST;
|
||||
}
|
||||
|
||||
SymbolTable::~SymbolTable() {
|
||||
for (const auto& kv : map)
|
||||
delete kv.second;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert a form to a one-line string.
|
||||
*/
|
||||
std::string Form::toStringSimple() {
|
||||
std::string result;
|
||||
buildStringSimple(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void Form::buildStringSimple(std::string &str) {
|
||||
std::vector<FormToken> tokens;
|
||||
toTokenList(tokens);
|
||||
for(auto& token : tokens) {
|
||||
switch(token.kind) {
|
||||
case TokenKind::WHITESPACE:
|
||||
str.push_back(' ');
|
||||
break;
|
||||
case TokenKind::SYMBOL:
|
||||
str.append(*token.str);
|
||||
break;
|
||||
case TokenKind::OPEN_PAREN:
|
||||
str.push_back('(');
|
||||
break;
|
||||
case TokenKind::DOT:
|
||||
str.push_back('.');
|
||||
break;
|
||||
case TokenKind::CLOSE_PAREN:
|
||||
str.push_back(')');
|
||||
break;
|
||||
case TokenKind::EMPTY_PAIR:
|
||||
str.append("()");
|
||||
break;
|
||||
case TokenKind::SPECIAL_SYMBOL:
|
||||
str.append(*token.str);
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("buildStringSimple unknown token kind");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Form::toTokenList(std::vector<FormToken> &tokens) {
|
||||
switch(kind) {
|
||||
case FormKind::SYMBOL:
|
||||
tokens.emplace_back(TokenKind::SYMBOL, symbol);
|
||||
break;
|
||||
case FormKind::PAIR:
|
||||
{
|
||||
tokens.emplace_back(TokenKind::OPEN_PAREN);
|
||||
Form* toPrint = this;
|
||||
for(;;) {
|
||||
if(toPrint->kind == FormKind::PAIR) {
|
||||
toPrint->pair[0]->toTokenList(tokens); // print CAR
|
||||
toPrint = toPrint->pair[1].get();
|
||||
if(toPrint->kind == FormKind::EMPTY_LIST) {
|
||||
tokens.emplace_back(TokenKind::CLOSE_PAREN);
|
||||
return;
|
||||
} else {
|
||||
tokens.emplace_back(TokenKind::WHITESPACE);
|
||||
}
|
||||
} else { // not a proper list!
|
||||
tokens.emplace_back(TokenKind::DOT);
|
||||
tokens.emplace_back(TokenKind::WHITESPACE);
|
||||
toPrint->toTokenList(tokens);
|
||||
tokens.emplace_back(TokenKind::CLOSE_PAREN);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case FormKind::EMPTY_LIST:
|
||||
tokens.emplace_back(TokenKind::EMPTY_PAIR);
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("unhandled form type in buildSimpleString");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////
|
||||
// Pretty Printer
|
||||
///////////////////
|
||||
|
||||
/*!
|
||||
* Linked list node representing a token in the output (whitespace, paren, newline, etc)
|
||||
*/
|
||||
struct PrettyPrinterNode {
|
||||
FormToken* tok = nullptr; // if we aren't a newline, we will have a token.
|
||||
int line = -1; // line that token occurs on. undef for newlines
|
||||
int lineIndent = -1; // indent of line. only valid for first token in the line
|
||||
int offset = -1; // offset of beginning of token from left margin
|
||||
int specialIndentDelta = 0;
|
||||
bool is_line_separator = false; // true if line separator (not a token)
|
||||
PrettyPrinterNode *next = nullptr, *prev = nullptr; // linked list
|
||||
PrettyPrinterNode *paren = nullptr; // pointer to open paren if in parens. open paren points to close and vice versa
|
||||
explicit PrettyPrinterNode(FormToken& _tok) {
|
||||
tok = &_tok;
|
||||
}
|
||||
PrettyPrinterNode() = default;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Splice in a line break after the given node, it there isn't one already and if it isn't the last node.
|
||||
*/
|
||||
static void insertNewlineAfter(PrettyPrinterNode* node, int specialIndentDelta) {
|
||||
if(node->next && !node->next->is_line_separator) {
|
||||
auto* nl = new PrettyPrinterNode;
|
||||
auto* next = node->next;
|
||||
node->next = nl;
|
||||
nl->prev = node;
|
||||
nl->next = next;
|
||||
next->prev = nl;
|
||||
nl->is_line_separator = true;
|
||||
nl->specialIndentDelta = specialIndentDelta;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Splice in a line break before the given node, if there isn't one already and if it isn't the first node.
|
||||
*/
|
||||
static void insertNewlineBefore(PrettyPrinterNode* node, int specialIndentDelta) {
|
||||
if(node->prev && !node->prev->is_line_separator) {
|
||||
auto* nl = new PrettyPrinterNode;
|
||||
auto* prev = node->prev;
|
||||
prev->next = nl;
|
||||
nl->prev = prev;
|
||||
nl->next = node;
|
||||
node->prev = nl;
|
||||
nl->is_line_separator = true;
|
||||
nl->specialIndentDelta = specialIndentDelta;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Break a list across multiple lines. This is the fundamental reducing operation of this algorithm
|
||||
*/
|
||||
static void breakList(PrettyPrinterNode* leftParen) {
|
||||
assert(!leftParen->is_line_separator);
|
||||
assert(leftParen->tok->kind == TokenKind::OPEN_PAREN);
|
||||
auto* rp = leftParen->paren;
|
||||
assert(rp->tok->kind == TokenKind::CLOSE_PAREN);
|
||||
|
||||
for(auto* n = leftParen->next; n && n != rp; n = n->next) {
|
||||
if(!n->is_line_separator) {
|
||||
if(n->tok->kind == TokenKind::OPEN_PAREN) {
|
||||
n = n->paren;
|
||||
assert(n->tok->kind == TokenKind::CLOSE_PAREN);
|
||||
insertNewlineAfter(n, 0);
|
||||
} else if(n->tok->kind != TokenKind::WHITESPACE) {
|
||||
assert(n->tok->kind != TokenKind::CLOSE_PAREN);
|
||||
insertNewlineAfter(n, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Compute proper line numbers, offsets, and indents for a list of tokens with newlines
|
||||
* Will add newlines for close parens if needed.
|
||||
*/
|
||||
static PrettyPrinterNode* propagatePretty(PrettyPrinterNode* list, int line_length) {
|
||||
// propagate line numbers
|
||||
PrettyPrinterNode* rv = nullptr;
|
||||
int line = list->line;
|
||||
for(auto* n = list; n; n = n->next) {
|
||||
if(n->is_line_separator) {
|
||||
line++;
|
||||
} else {
|
||||
n->line = line;
|
||||
// add the weird newline.
|
||||
if(n->tok->kind == TokenKind::CLOSE_PAREN) {
|
||||
if(n->line != n->paren->line) {
|
||||
if(n->prev && !n->prev->is_line_separator) {
|
||||
insertNewlineBefore(n, 0);
|
||||
line++;
|
||||
}
|
||||
if(n->next && !n->next->is_line_separator) {
|
||||
insertNewlineAfter(n, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// compute offsets and indents
|
||||
std::vector<int> indentStack;
|
||||
indentStack.push_back(0);
|
||||
int offset = 0;
|
||||
PrettyPrinterNode* line_start = list;
|
||||
bool previous_line_sep = false;
|
||||
for(auto* n = list; n; n = n->next) {
|
||||
if(n->is_line_separator) {
|
||||
previous_line_sep = true;
|
||||
offset = indentStack.back() += n->specialIndentDelta;
|
||||
} else {
|
||||
if(previous_line_sep) {
|
||||
line_start = n;
|
||||
n->lineIndent = offset;
|
||||
previous_line_sep = false;
|
||||
}
|
||||
|
||||
n->offset = offset;
|
||||
offset += n->tok->toString().length();
|
||||
if(offset > line_length && !rv) rv = line_start;
|
||||
if(n->tok->kind == TokenKind::OPEN_PAREN) {
|
||||
if(!n->prev || n->prev->is_line_separator) {
|
||||
indentStack.push_back(offset + 1);
|
||||
} else {
|
||||
indentStack.push_back(offset - 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(n->tok->kind == TokenKind::CLOSE_PAREN) {
|
||||
indentStack.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the token on the start of the next line. nullptr if we're the last line.
|
||||
*/
|
||||
static PrettyPrinterNode* getNextLine(PrettyPrinterNode* start) {
|
||||
assert(!start->is_line_separator);
|
||||
int line = start->line;
|
||||
for(;;) {
|
||||
if(start->is_line_separator || start->line == line) {
|
||||
if(start->next)
|
||||
start = start->next;
|
||||
else
|
||||
return nullptr;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the next open paren on the current line (can start in the middle of line, not inclusive of start)
|
||||
* nullptr if there's no open parens on the rest of this line.
|
||||
*/
|
||||
static PrettyPrinterNode* getNextListOnLine(PrettyPrinterNode* start) {
|
||||
int line = start->line;
|
||||
assert(!start->is_line_separator);
|
||||
if(!start->next || start->next->is_line_separator) return nullptr;
|
||||
start = start->next;
|
||||
while(!start->is_line_separator && start->line == line) {
|
||||
if(start->tok->kind == TokenKind::OPEN_PAREN) return start;
|
||||
if(!start->next) return nullptr;
|
||||
start = start->next;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the first open paren on the current line (can start in the middle of line, inclusive of start)
|
||||
* nullptr if there's no open parens on the rest of this line
|
||||
*/
|
||||
static PrettyPrinterNode* getFirstListOnLine(PrettyPrinterNode* start) {
|
||||
int line = start->line;
|
||||
assert(!start->is_line_separator);
|
||||
while(!start->is_line_separator && start->line == line) {
|
||||
if(start->tok->kind == TokenKind::OPEN_PAREN) return start;
|
||||
if(!start->next) return nullptr;
|
||||
start = start->next;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the first token on the first line which exceeds the max length
|
||||
*/
|
||||
static PrettyPrinterNode* getFirstBadLine(PrettyPrinterNode* start, int line_length) {
|
||||
assert(!start->is_line_separator);
|
||||
int currentLine = start->line;
|
||||
auto* currentLineNode = start;
|
||||
for(;;) {
|
||||
if(start->is_line_separator) {
|
||||
assert(start->next);
|
||||
start = start->next;
|
||||
} else {
|
||||
if(start->line != currentLine) {
|
||||
currentLine = start->line;
|
||||
currentLineNode = start;
|
||||
}
|
||||
if(start->offset > line_length) {
|
||||
return currentLineNode;
|
||||
}
|
||||
if(!start->next) {
|
||||
return nullptr;
|
||||
}
|
||||
start = start->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Break insertion algorithm.
|
||||
*/
|
||||
static void insertBreaksAsNeeded(PrettyPrinterNode* head, int line_length) {
|
||||
PrettyPrinterNode* last_line_complete = nullptr;
|
||||
PrettyPrinterNode* line_to_start_line_search = head;
|
||||
|
||||
// loop over lines
|
||||
for(;;) {
|
||||
|
||||
// compute lines as needed
|
||||
propagatePretty(head, line_length);
|
||||
|
||||
// search for a bad line starting at the last line we fixed
|
||||
PrettyPrinterNode* candidate_line = getFirstBadLine(line_to_start_line_search, line_length);
|
||||
// if we got the same line we started on, this means we couldn't fix it.
|
||||
if(candidate_line == last_line_complete) {
|
||||
candidate_line = nullptr; // so we say our candidate was bad and try to find another
|
||||
PrettyPrinterNode* next_line = getNextLine(line_to_start_line_search);
|
||||
if(next_line) {
|
||||
candidate_line = getFirstBadLine(next_line, line_length);
|
||||
}
|
||||
}
|
||||
if(!candidate_line) break;
|
||||
|
||||
// okay, we have a line which needs fixing.
|
||||
assert(!candidate_line->prev || candidate_line->prev->is_line_separator);
|
||||
PrettyPrinterNode* form_to_start = getFirstListOnLine(candidate_line);
|
||||
for(;;) {
|
||||
if(!form_to_start) {
|
||||
printf("pretty printer has failed. Fix the bug or increase the the line length.\n");
|
||||
assert(false);
|
||||
}
|
||||
breakList(form_to_start);
|
||||
propagatePretty(head, line_length);
|
||||
if(getFirstBadLine(candidate_line, line_length) != candidate_line) {
|
||||
break;
|
||||
}
|
||||
|
||||
form_to_start = getNextListOnLine(form_to_start);
|
||||
if(!form_to_start) break;
|
||||
|
||||
}
|
||||
|
||||
|
||||
last_line_complete = candidate_line;
|
||||
line_to_start_line_search = candidate_line;
|
||||
}
|
||||
}
|
||||
|
||||
static void insertSpecialBreaks(PrettyPrinterNode* node) {
|
||||
for(; node; node = node->next) {
|
||||
if(!node->is_line_separator && node->tok->kind == TokenKind::SYMBOL) {
|
||||
std::string& name = *node->tok->str;
|
||||
if(name == "deftype") {
|
||||
auto* parent_type_dec = getNextListOnLine(node);
|
||||
if(parent_type_dec) {
|
||||
insertNewlineAfter(parent_type_dec->paren, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string Form::toStringPretty(int indent, int line_length) {
|
||||
(void)indent;
|
||||
(void)line_length;
|
||||
std::vector<FormToken> tokens;
|
||||
toTokenList(tokens);
|
||||
assert(!tokens.empty());
|
||||
std::string pretty;
|
||||
|
||||
// build linked list of nodes
|
||||
PrettyPrinterNode* head = new PrettyPrinterNode(tokens[0]);
|
||||
PrettyPrinterNode* node = head;
|
||||
head->line = 0;
|
||||
head->offset = 0;
|
||||
head->lineIndent = 0;
|
||||
int offset = head->tok->toString().length();
|
||||
for(size_t i = 1; i < tokens.size(); i++) {
|
||||
node->next = new PrettyPrinterNode(tokens[i]);
|
||||
node->next->prev = node;
|
||||
node = node->next;
|
||||
node->line = 0;
|
||||
node->offset = offset;
|
||||
offset += node->tok->toString().length();
|
||||
node->lineIndent = 0;
|
||||
}
|
||||
|
||||
// attach parens.
|
||||
std::vector<PrettyPrinterNode*> parenStack;
|
||||
parenStack.push_back(nullptr);
|
||||
for(PrettyPrinterNode* n = head; n; n = n->next) {
|
||||
if(n->tok->kind == TokenKind::OPEN_PAREN) {
|
||||
parenStack.push_back(n);
|
||||
} else if(n->tok->kind == TokenKind::CLOSE_PAREN) {
|
||||
n->paren = parenStack.back();
|
||||
parenStack.back()->paren = n;
|
||||
parenStack.pop_back();
|
||||
} else {
|
||||
n->paren = parenStack.back();
|
||||
}
|
||||
}
|
||||
assert(parenStack.size() == 1);
|
||||
assert(!parenStack.back());
|
||||
|
||||
insertSpecialBreaks(head);
|
||||
propagatePretty(head, line_length);
|
||||
insertBreaksAsNeeded(head, line_length);
|
||||
|
||||
|
||||
// write to string
|
||||
bool newline_prev = true;
|
||||
for(PrettyPrinterNode* n = head; n; n = n->next) {
|
||||
if(n->is_line_separator){
|
||||
pretty.push_back('\n');
|
||||
newline_prev = true;
|
||||
} else {
|
||||
if(newline_prev) {
|
||||
pretty.append(n->lineIndent, ' ');
|
||||
newline_prev = false;
|
||||
if(n->tok->kind == TokenKind::WHITESPACE) continue;
|
||||
}
|
||||
pretty.append(n->tok->toString());
|
||||
}
|
||||
}
|
||||
|
||||
for(;;) {
|
||||
if(!head) break;
|
||||
auto* next = head->next;
|
||||
delete head;
|
||||
head = next;
|
||||
}
|
||||
|
||||
|
||||
return pretty;
|
||||
}
|
||||
|
||||
std::shared_ptr<Form> toForm(const std::string& str) {
|
||||
auto f = std::make_shared<Form>();
|
||||
f->kind = FormKind::SYMBOL;
|
||||
f->symbol = gSymbolTable.intern(str);
|
||||
return f;
|
||||
}
|
||||
|
||||
std::shared_ptr<Form> buildList(std::shared_ptr<Form> form) {
|
||||
auto f = std::make_shared<Form>();
|
||||
f->kind = FormKind::PAIR;
|
||||
f->pair[0] = form;
|
||||
f->pair[1] = gSymbolTable.getEmptyPair();
|
||||
return f;
|
||||
}
|
||||
|
||||
std::shared_ptr<Form> buildList(const std::string& str) {
|
||||
return buildList(toForm(str));
|
||||
}
|
||||
|
||||
std::shared_ptr<Form> buildList(std::shared_ptr<Form>* forms, int count) {
|
||||
auto f = std::make_shared<Form>();
|
||||
f->kind = FormKind::PAIR;
|
||||
f->pair[0] = forms[0];
|
||||
if(count - 1) {
|
||||
f->pair[1] = buildList(forms + 1, count - 1);
|
||||
} else {
|
||||
f->pair[1] = gSymbolTable.getEmptyPair();
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
std::shared_ptr<Form> buildList(std::vector<std::shared_ptr<Form>>& forms) {
|
||||
if(forms.empty()) {
|
||||
return gSymbolTable.getEmptyPair();
|
||||
}
|
||||
return buildList(forms.data(), forms.size());
|
||||
}
|
142
decompiler/util/LispPrint.h
Normal file
142
decompiler/util/LispPrint.h
Normal file
@ -0,0 +1,142 @@
|
||||
#ifndef JAK2_DISASSEMBLER_LISPPRINT_H
|
||||
#define JAK2_DISASSEMBLER_LISPPRINT_H
|
||||
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
/*!
|
||||
* What type of thing is it?
|
||||
*/
|
||||
enum class FormKind {
|
||||
SYMBOL,
|
||||
HEX_NUMBER,
|
||||
DECIMAL_NUMBER,
|
||||
BINARY_NUMBER,
|
||||
SIGNED_NUMBER,
|
||||
STRING,
|
||||
EMPTY_LIST,
|
||||
PAIR
|
||||
};
|
||||
|
||||
/*!
|
||||
* Tokens in a textual representation
|
||||
*/
|
||||
enum class TokenKind {
|
||||
WHITESPACE,
|
||||
SYMBOL,
|
||||
OPEN_PAREN,
|
||||
DOT,
|
||||
CLOSE_PAREN,
|
||||
EMPTY_PAIR,
|
||||
SPECIAL_SYMBOL
|
||||
};
|
||||
|
||||
/*!
|
||||
* Token in a text representation
|
||||
*/
|
||||
struct FormToken {
|
||||
explicit FormToken(TokenKind _kind, std::string* _str = nullptr) : kind(_kind), str(_str) {}
|
||||
|
||||
TokenKind kind;
|
||||
union {
|
||||
std::string* str;
|
||||
};
|
||||
|
||||
std::string toString() {
|
||||
std::string s;
|
||||
switch (kind) {
|
||||
case TokenKind::WHITESPACE:
|
||||
s.push_back(' ');
|
||||
break;
|
||||
case TokenKind::SYMBOL:
|
||||
s.append(*str);
|
||||
break;
|
||||
case TokenKind::OPEN_PAREN:
|
||||
s.push_back('(');
|
||||
break;
|
||||
case TokenKind::DOT:
|
||||
s.push_back('.');
|
||||
break;
|
||||
case TokenKind::CLOSE_PAREN:
|
||||
s.push_back(')');
|
||||
break;
|
||||
case TokenKind::EMPTY_PAIR:
|
||||
s.append("()");
|
||||
break;
|
||||
case TokenKind::SPECIAL_SYMBOL:
|
||||
s.append(*str);
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("toString unknown token kind");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
};
|
||||
|
||||
/*!
|
||||
* S-Expression Form
|
||||
*/
|
||||
class Form {
|
||||
public:
|
||||
FormKind kind;
|
||||
|
||||
std::string* symbol;
|
||||
std::shared_ptr<Form> pair[2];
|
||||
|
||||
std::string toStringSimple();
|
||||
std::string toStringPretty(int indent = 0, int line_length = 80);
|
||||
void toTokenList(std::vector<FormToken>& tokens);
|
||||
|
||||
private:
|
||||
void buildStringSimple(std::string& str);
|
||||
};
|
||||
|
||||
/*!
|
||||
* Symbol table to reduce the number of strings everywhere.
|
||||
*/
|
||||
class SymbolTable {
|
||||
public:
|
||||
SymbolTable();
|
||||
std::string* intern(const std::string& str);
|
||||
~SymbolTable();
|
||||
std::shared_ptr<Form> getEmptyPair() { return empty_pair; }
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, std::string*> map;
|
||||
std::shared_ptr<Form> empty_pair;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Global symbol table used for the compiler/decompiler
|
||||
*/
|
||||
extern SymbolTable gSymbolTable;
|
||||
|
||||
std::shared_ptr<Form> toForm(const std::string& str); //
|
||||
|
||||
std::shared_ptr<Form> buildList(const std::string& str);
|
||||
std::shared_ptr<Form> buildList(std::shared_ptr<Form> form);
|
||||
std::shared_ptr<Form> buildList(std::vector<std::shared_ptr<Form>>& forms);
|
||||
std::shared_ptr<Form> buildList(std::shared_ptr<Form>* forms, int count);
|
||||
|
||||
template <typename... Args>
|
||||
std::shared_ptr<Form> buildList(const std::string& str, Args... rest) {
|
||||
auto f = std::make_shared<Form>();
|
||||
f->kind = FormKind::PAIR;
|
||||
f->pair[0] = toForm(str);
|
||||
f->pair[1] = buildList(rest...);
|
||||
return f;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
std::shared_ptr<Form> buildList(std::shared_ptr<Form> car, Args... rest) {
|
||||
auto f = std::make_shared<Form>();
|
||||
f->kind = FormKind::PAIR;
|
||||
f->pair[0] = car;
|
||||
f->pair[1] = buildList(rest...);
|
||||
return f;
|
||||
}
|
||||
|
||||
#endif // JAK2_DISASSEMBLER_LISPPRINT_H
|
54
decompiler/util/Timer.cpp
Normal file
54
decompiler/util/Timer.cpp
Normal file
@ -0,0 +1,54 @@
|
||||
#include "Timer.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#define MS_PER_SEC 1000ULL // MS = milliseconds
|
||||
#define US_PER_MS 1000ULL // US = microseconds
|
||||
#define HNS_PER_US 10ULL // HNS = hundred-nanoseconds (e.g., 1 hns = 100 ns)
|
||||
#define NS_PER_US 1000ULL
|
||||
|
||||
#define HNS_PER_SEC (MS_PER_SEC * US_PER_MS * HNS_PER_US)
|
||||
#define NS_PER_HNS (100ULL) // NS = nanoseconds
|
||||
#define NS_PER_SEC (MS_PER_SEC * US_PER_MS * NS_PER_US)
|
||||
|
||||
int Timer::clock_gettime_monotonic(struct timespec* tv) {
|
||||
static LARGE_INTEGER ticksPerSec;
|
||||
LARGE_INTEGER ticks;
|
||||
double seconds;
|
||||
|
||||
if (!ticksPerSec.QuadPart) {
|
||||
QueryPerformanceFrequency(&ticksPerSec);
|
||||
if (!ticksPerSec.QuadPart) {
|
||||
errno = ENOTSUP;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
QueryPerformanceCounter(&ticks);
|
||||
|
||||
seconds = (double)ticks.QuadPart / (double)ticksPerSec.QuadPart;
|
||||
tv->tv_sec = (time_t)seconds;
|
||||
tv->tv_nsec = (long)((ULONGLONG)(seconds * NS_PER_SEC) % NS_PER_SEC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Timer::start() {
|
||||
#ifdef __linux__
|
||||
clock_gettime(CLOCK_MONOTONIC, &_startTime);
|
||||
#elif _WIN32
|
||||
clock_gettime_monotonic(&_startTime);
|
||||
#endif
|
||||
}
|
||||
|
||||
int64_t Timer::getNs() {
|
||||
struct timespec now = {};
|
||||
#ifdef __linux__
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
#elif _WIN32
|
||||
clock_gettime_monotonic(&now);
|
||||
#endif
|
||||
return (int64_t)(now.tv_nsec - _startTime.tv_nsec) +
|
||||
1000000000 * (now.tv_sec - _startTime.tv_sec);
|
||||
}
|
47
decompiler/util/Timer.h
Normal file
47
decompiler/util/Timer.h
Normal file
@ -0,0 +1,47 @@
|
||||
#ifndef JAK_V2_TIMER_H
|
||||
#define JAK_V2_TIMER_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <ctime>
|
||||
|
||||
/*!
|
||||
* Timer for measuring time elapsed with clock_monotonic
|
||||
*/
|
||||
class Timer {
|
||||
public:
|
||||
/*!
|
||||
* Construct and start timer
|
||||
*/
|
||||
explicit Timer() { start(); }
|
||||
|
||||
#ifdef _WIN32
|
||||
int clock_gettime_monotonic(struct timespec* tv);
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* Start the timer
|
||||
*/
|
||||
void start();
|
||||
|
||||
/*!
|
||||
* Get milliseconds elapsed
|
||||
*/
|
||||
double getMs() { return (double)getNs() / 1.e6; }
|
||||
|
||||
double getUs() { return (double)getNs() / 1.e3; }
|
||||
|
||||
/*!
|
||||
* Get nanoseconds elapsed
|
||||
*/
|
||||
int64_t getNs();
|
||||
|
||||
/*!
|
||||
* Get seconds elapsed
|
||||
*/
|
||||
double getSeconds() { return (double)getNs() / 1.e9; }
|
||||
|
||||
struct timespec _startTime = {};
|
||||
};
|
||||
|
||||
#endif // JAK_V2_TIMER_H
|
2
decompiler_out/.gitignore
vendored
Normal file
2
decompiler_out/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
Loading…
x
Reference in New Issue
Block a user