mirror of
https://github.com/open-goal/jak-project.git
synced 2024-11-23 06:09:57 +00:00
[Decompiler - New IR] Add AtomicOp (#181)
* wip decompiler ir * add AtomicOp stuff * fix windows build and warnings * add instruction parser * include * make minilzo shared * odr fix * a * fix merge conflicts * move decompiler into namespace * update the code coverage to include the decompiler * add demo test * add register use test to example test
This commit is contained in:
parent
3331e9cd00
commit
5093b97cda
@ -1,35 +1,59 @@
|
||||
add_executable(decompiler
|
||||
main.cpp
|
||||
ObjectFile/ObjectFileDB.cpp
|
||||
Disasm/Instruction.cpp
|
||||
Disasm/InstructionDecode.cpp
|
||||
Disasm/OpcodeInfo.cpp
|
||||
Disasm/Register.cpp
|
||||
ObjectFile/LinkedObjectFileCreation.cpp
|
||||
ObjectFile/LinkedObjectFile.cpp
|
||||
Function/Function.cpp
|
||||
config.cpp
|
||||
util/DecompilerTypeSystem.cpp
|
||||
Function/BasicBlocks.cpp
|
||||
Disasm/InstructionMatching.cpp
|
||||
Function/CfgVtx.cpp
|
||||
IR/BasicOpBuilder.cpp
|
||||
IR/CfgBuilder.cpp
|
||||
IR/IR.cpp
|
||||
Function/TypeInspector.cpp
|
||||
data/tpage.cpp
|
||||
add_library(
|
||||
decomp
|
||||
SHARED
|
||||
data/game_count.cpp
|
||||
data/game_text.cpp
|
||||
data/StrFileReader.cpp
|
||||
data/game_count.cpp
|
||||
Function/TypeAnalysis.cpp
|
||||
IR/IR_TypeAnalysis.cpp
|
||||
util/TP_Type.cpp
|
||||
Function/RegUsage.cpp
|
||||
data/tpage.cpp
|
||||
|
||||
Disasm/Instruction.cpp
|
||||
Disasm/InstructionDecode.cpp
|
||||
Disasm/InstructionMatching.cpp
|
||||
Disasm/InstructionParser.cpp
|
||||
Disasm/OpcodeInfo.cpp
|
||||
Disasm/Register.cpp
|
||||
|
||||
Function/BasicBlocks.cpp
|
||||
Function/CfgVtx.cpp
|
||||
Function/ExpressionBuilder.cpp
|
||||
Function/ExpressionStack.cpp
|
||||
IR/IR_ExpressionStack.cpp)
|
||||
Function/Function.cpp
|
||||
Function/RegUsage.cpp
|
||||
Function/TypeAnalysis.cpp
|
||||
Function/TypeInspector.cpp
|
||||
|
||||
IR/BasicOpBuilder.cpp
|
||||
IR/CfgBuilder.cpp
|
||||
IR/IR.cpp
|
||||
IR/IR_ExpressionStack.cpp
|
||||
IR/IR_TypeAnalysis.cpp
|
||||
|
||||
IR2/AtomicOp.cpp
|
||||
IR2/AtomicOpBuilder.cpp
|
||||
IR2/Env.cpp
|
||||
|
||||
ObjectFile/LinkedObjectFile.cpp
|
||||
ObjectFile/LinkedObjectFileCreation.cpp
|
||||
ObjectFile/ObjectFileDB.cpp
|
||||
|
||||
util/DecompilerTypeSystem.cpp
|
||||
util/TP_Type.cpp
|
||||
|
||||
config.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(decomp
|
||||
minilzo
|
||||
common
|
||||
fmt
|
||||
)
|
||||
|
||||
add_executable(decompiler
|
||||
main.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(decompiler
|
||||
common
|
||||
decomp
|
||||
common
|
||||
minilzo
|
||||
fmt)
|
||||
|
15
decompiler/Disasm/DecompilerLabel.h
Normal file
15
decompiler/Disasm/DecompilerLabel.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* A label to a location in an object file.
|
||||
* Doesn't have to be word aligned.
|
||||
*/
|
||||
struct DecompilerLabel {
|
||||
std::string name;
|
||||
int target_segment;
|
||||
int offset; // in bytes
|
||||
};
|
||||
} // namespace decompiler
|
@ -8,17 +8,18 @@
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* Convert atom to a string for disassembly.
|
||||
*/
|
||||
std::string InstructionAtom::to_string(const LinkedObjectFile& file) const {
|
||||
std::string InstructionAtom::to_string(const std::vector<DecompilerLabel>& labels) const {
|
||||
switch (kind) {
|
||||
case REGISTER:
|
||||
return reg.to_string();
|
||||
case IMM:
|
||||
return std::to_string(imm);
|
||||
case LABEL:
|
||||
return file.get_label_name(label_id);
|
||||
return labels.at(label_id).name;
|
||||
case VU_ACC:
|
||||
return "acc";
|
||||
case VU_Q:
|
||||
@ -115,6 +116,25 @@ bool InstructionAtom::is_link_or_label() const {
|
||||
return kind == IMM_SYM || kind == LABEL;
|
||||
}
|
||||
|
||||
bool InstructionAtom::operator==(const InstructionAtom& other) const {
|
||||
if (kind != other.kind) {
|
||||
return false;
|
||||
}
|
||||
switch (kind) {
|
||||
case REGISTER:
|
||||
return reg == other.reg;
|
||||
case IMM:
|
||||
return imm == other.imm;
|
||||
case LABEL:
|
||||
return label_id == other.label_id;
|
||||
case VU_ACC:
|
||||
case VU_Q:
|
||||
return true;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert just the name of the opcode to a string, omitting src/dst, but including
|
||||
* suffixes (interlock, broadcasts and destination)
|
||||
@ -169,7 +189,7 @@ std::string Instruction::op_name_to_string() const {
|
||||
/*!
|
||||
* Convert entire instruction to a string.
|
||||
*/
|
||||
std::string Instruction::to_string(const LinkedObjectFile& file) const {
|
||||
std::string Instruction::to_string(const std::vector<DecompilerLabel>& labels) const {
|
||||
auto& info = gOpcodeInfo[(int)kind];
|
||||
auto result = op_name_to_string();
|
||||
|
||||
@ -178,33 +198,33 @@ std::string Instruction::to_string(const LinkedObjectFile& file) const {
|
||||
assert(n_dst == 0);
|
||||
assert(n_src == 3);
|
||||
result += " ";
|
||||
result += src[0].to_string(file);
|
||||
result += src[0].to_string(labels);
|
||||
result += ", ";
|
||||
result += src[1].to_string(file);
|
||||
result += src[1].to_string(labels);
|
||||
result += "(";
|
||||
result += src[2].to_string(file);
|
||||
result += src[2].to_string(labels);
|
||||
result += ")";
|
||||
} else if (info.is_load) {
|
||||
assert(n_dst == 1);
|
||||
assert(n_src == 2);
|
||||
result += " ";
|
||||
result += dst[0].to_string(file);
|
||||
result += dst[0].to_string(labels);
|
||||
result += ", ";
|
||||
result += src[0].to_string(file);
|
||||
result += src[0].to_string(labels);
|
||||
result += "(";
|
||||
result += src[1].to_string(file);
|
||||
result += src[1].to_string(labels);
|
||||
result += ")";
|
||||
} else {
|
||||
// for instructions that aren't a store or load, the dest/sources are comma separated.
|
||||
bool end_comma = false;
|
||||
|
||||
for (uint8_t i = 0; i < n_dst; i++) {
|
||||
result += " " + dst[i].to_string(file) + ",";
|
||||
result += " " + dst[i].to_string(labels) + ",";
|
||||
end_comma = true;
|
||||
}
|
||||
|
||||
for (uint8_t i = 0; i < n_src; i++) {
|
||||
result += " " + src[i].to_string(file) + ",";
|
||||
result += " " + src[i].to_string(labels) + ",";
|
||||
end_comma = true;
|
||||
}
|
||||
|
||||
@ -312,3 +332,25 @@ int Instruction::get_label_target() const {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Instruction::operator==(const Instruction& other) const {
|
||||
if (kind != other.kind || n_src != other.n_src || n_dst != other.n_dst ||
|
||||
cop2_dest != other.cop2_dest || cop2_bc != other.cop2_bc || il != other.il) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_dst; i++) {
|
||||
if (dst[i] != other.dst[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_src; i++) {
|
||||
if (src[i] != other.src[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace decompiler
|
@ -9,10 +9,12 @@
|
||||
#ifndef NEXT_INSTRUCTION_H
|
||||
#define NEXT_INSTRUCTION_H
|
||||
|
||||
#include <vector>
|
||||
#include "OpcodeInfo.h"
|
||||
#include "Register.h"
|
||||
|
||||
class LinkedObjectFile;
|
||||
namespace decompiler {
|
||||
struct DecompilerLabel;
|
||||
|
||||
constexpr int MAX_INSTRUCTION_SOURCE = 3;
|
||||
constexpr int MAX_INTRUCTION_DEST = 1;
|
||||
@ -41,7 +43,7 @@ struct InstructionAtom {
|
||||
int get_label() const;
|
||||
std::string get_sym() const;
|
||||
|
||||
std::string to_string(const LinkedObjectFile& file) const;
|
||||
std::string to_string(const std::vector<DecompilerLabel>& labels) const;
|
||||
|
||||
bool is_link_or_label() const;
|
||||
bool is_reg() const { return kind == REGISTER; }
|
||||
@ -51,11 +53,13 @@ struct InstructionAtom {
|
||||
|
||||
bool is_reg(Register r) const { return kind == REGISTER && reg == r; }
|
||||
|
||||
bool operator==(const InstructionAtom& other) const;
|
||||
bool operator!=(const InstructionAtom& other) const { return !((*this) == other); }
|
||||
|
||||
private:
|
||||
int32_t imm;
|
||||
int label_id;
|
||||
Register reg;
|
||||
|
||||
std::string sym;
|
||||
};
|
||||
|
||||
@ -66,7 +70,7 @@ class Instruction {
|
||||
InstructionKind kind = InstructionKind::UNKNOWN;
|
||||
|
||||
std::string op_name_to_string() const;
|
||||
std::string to_string(const LinkedObjectFile& file) const;
|
||||
std::string to_string(const std::vector<DecompilerLabel>& labels) const;
|
||||
bool is_valid() const;
|
||||
|
||||
void add_src(InstructionAtom& a);
|
||||
@ -89,10 +93,13 @@ class Instruction {
|
||||
|
||||
int get_label_target() const;
|
||||
|
||||
bool operator==(const Instruction& other) const;
|
||||
bool operator!=(const Instruction& other) const { return !((*this) == other); }
|
||||
|
||||
// extra fields for some COP2 instructions.
|
||||
uint8_t cop2_dest = 0xff; // 0xff indicates "don't print dest"
|
||||
uint8_t cop2_bc = 0xff; // 0xff indicates "don't print bc"
|
||||
uint8_t il = 0xff; // 0xff indicates "don't print il"
|
||||
};
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // NEXT_INSTRUCTION_H
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <cassert>
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
|
||||
namespace decompiler {
|
||||
// utility class to extract fields of an opcode.
|
||||
struct OpcodeFields {
|
||||
OpcodeFields(uint32_t _data) : data(_data) {}
|
||||
@ -1171,3 +1172,4 @@ Instruction decode_instruction(LinkedWord& word, LinkedObjectFile& file, int seg
|
||||
|
||||
return i;
|
||||
}
|
||||
} // namespace decompiler
|
@ -11,9 +11,10 @@
|
||||
|
||||
#include "Instruction.h"
|
||||
|
||||
namespace decompiler {
|
||||
class LinkedWord;
|
||||
class LinkedObjectFile;
|
||||
|
||||
Instruction decode_instruction(LinkedWord& word, LinkedObjectFile& file, int seg_id, int word_id);
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // NEXT_INSTRUCTIONDECODE_H
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <cassert>
|
||||
#include "InstructionMatching.h"
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* Check if the given instruction stores a GPR with the specified parameters.
|
||||
*/
|
||||
@ -348,3 +349,4 @@ bool is_always_branch(const Instruction& instr) {
|
||||
|
||||
return false;
|
||||
}
|
||||
} // namespace decompiler
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "Instruction.h"
|
||||
#include "decompiler/util/MatchParam.h"
|
||||
|
||||
namespace decompiler {
|
||||
bool is_no_link_gpr_store(const Instruction& instr,
|
||||
MatchParam<int> size,
|
||||
MatchParam<Register> src,
|
||||
@ -56,5 +57,5 @@ Register make_fpr(int fpr);
|
||||
|
||||
bool is_branch(const Instruction& instr, MatchParam<bool> likely);
|
||||
bool is_always_branch(const Instruction& instr);
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // JAK_DISASSEMBLER_INSTRUCTIONMATCHING_H
|
||||
|
307
decompiler/Disasm/InstructionParser.cpp
Normal file
307
decompiler/Disasm/InstructionParser.cpp
Normal file
@ -0,0 +1,307 @@
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include "common/common_types.h"
|
||||
#include "InstructionParser.h"
|
||||
|
||||
namespace decompiler {
|
||||
InstructionParser::InstructionParser() {
|
||||
init_opcode_info();
|
||||
|
||||
// we only support a subset of the total instructions. These are common used and don't have
|
||||
// strange formatting.
|
||||
int added = 0;
|
||||
for (auto i : {InstructionKind::DADDIU, InstructionKind::ADDIU, InstructionKind::SLTI,
|
||||
InstructionKind::SLTIU, InstructionKind::SB, InstructionKind::SH,
|
||||
InstructionKind::SW, InstructionKind::SD, InstructionKind::SQ,
|
||||
InstructionKind::LB, InstructionKind::LBU, InstructionKind::LH,
|
||||
InstructionKind::LHU, InstructionKind::LW, InstructionKind::LWU,
|
||||
InstructionKind::LD, InstructionKind::LQ, InstructionKind::LDR,
|
||||
InstructionKind::LDL, InstructionKind::LWL, InstructionKind::LWR,
|
||||
InstructionKind::DADDU, InstructionKind::SUBU, InstructionKind::ADDU,
|
||||
InstructionKind::DSUBU, InstructionKind::MULT3, InstructionKind::MULTU3,
|
||||
InstructionKind::AND, InstructionKind::OR, InstructionKind::NOR,
|
||||
InstructionKind::XOR, InstructionKind::MOVN, InstructionKind::MOVZ,
|
||||
InstructionKind::SLT, InstructionKind::SLTU, InstructionKind::SLL,
|
||||
InstructionKind::SRA, InstructionKind::SRL, InstructionKind::DSLL,
|
||||
InstructionKind::DSLL32, InstructionKind::DSRA, InstructionKind::DSRA32,
|
||||
InstructionKind::DSRL, InstructionKind::DSRL32, InstructionKind::DSRAV,
|
||||
InstructionKind::SLLV, InstructionKind::DSLLV, InstructionKind::DSRLV,
|
||||
InstructionKind::DIV, InstructionKind::DIVU, InstructionKind::ORI,
|
||||
InstructionKind::XORI, InstructionKind::ANDI, InstructionKind::LUI,
|
||||
InstructionKind::JALR, InstructionKind::JR, InstructionKind::LWC1,
|
||||
InstructionKind::SWC1, InstructionKind::ADDS, InstructionKind::SUBS,
|
||||
InstructionKind::MULS, InstructionKind::DIVS, InstructionKind::MINS,
|
||||
InstructionKind::MAXS, InstructionKind::MADDS, InstructionKind::MSUBS,
|
||||
InstructionKind::RSQRTS, InstructionKind::ABSS, InstructionKind::NEGS,
|
||||
InstructionKind::CVTSW, InstructionKind::CVTWS, InstructionKind::MOVS,
|
||||
InstructionKind::SQRTS, InstructionKind::CLTS, InstructionKind::CLES,
|
||||
InstructionKind::CEQS, InstructionKind::BC1F, InstructionKind::BC1T,
|
||||
InstructionKind::BEQ, InstructionKind::BNE, InstructionKind::BEQL,
|
||||
InstructionKind::BNEL, InstructionKind::BC1FL, InstructionKind::BC1TL,
|
||||
InstructionKind::BLTZ, InstructionKind::BGEZ, InstructionKind::BLEZ,
|
||||
InstructionKind::BGTZ, InstructionKind::BLTZL, InstructionKind::BGTZL,
|
||||
InstructionKind::BGEZL}) {
|
||||
auto& info = gOpcodeInfo[int(i)];
|
||||
if (info.defined) {
|
||||
m_opcode_name_lookup[info.name] = int(i);
|
||||
added++;
|
||||
}
|
||||
}
|
||||
assert(added == int(m_opcode_name_lookup.size()));
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::string get_until_space(std::string& instr) {
|
||||
assert(!instr.empty());
|
||||
size_t i;
|
||||
for (i = 0; i < instr.length(); i++) {
|
||||
if (instr[i] == ' ') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto name = instr.substr(0, i);
|
||||
if (i == instr.length()) {
|
||||
instr.clear();
|
||||
} else {
|
||||
instr = instr.substr(i + 1);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
std::string get_comma_separated(std::string& instr) {
|
||||
assert(!instr.empty());
|
||||
auto arg = get_until_space(instr);
|
||||
if (instr.empty()) {
|
||||
assert(arg.back() != ',');
|
||||
} else {
|
||||
assert(arg.back() == ',');
|
||||
arg.pop_back();
|
||||
}
|
||||
return arg;
|
||||
}
|
||||
|
||||
std::string get_before_paren(std::string& instr) {
|
||||
size_t i;
|
||||
for (i = 0; i < instr.length(); i++) {
|
||||
if (instr[i] == '(') {
|
||||
auto result = instr.substr(0, i);
|
||||
instr = instr.substr(i);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
assert(false);
|
||||
}
|
||||
|
||||
std::string get_in_paren(std::string& instr) {
|
||||
assert(instr.length() > 2);
|
||||
assert(instr.front() == '(');
|
||||
size_t i;
|
||||
for (i = 0; i < instr.length(); i++) {
|
||||
if (instr[i] == ')') {
|
||||
auto result = instr.substr(1, i - 1);
|
||||
if (i == instr.length()) {
|
||||
instr.clear();
|
||||
} else {
|
||||
instr = instr.substr(i + 1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
assert(false);
|
||||
}
|
||||
|
||||
bool is_integer(const std::string& str) {
|
||||
assert(!str.empty());
|
||||
char* end;
|
||||
std::strtol(str.c_str(), &end, 10);
|
||||
return end == str.c_str() + str.length();
|
||||
}
|
||||
|
||||
int parse_integer(const std::string& str) {
|
||||
assert(!str.empty());
|
||||
char* end;
|
||||
int result = std::strtol(str.c_str(), &end, 10);
|
||||
assert(end == str.c_str() + str.length());
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> string_to_lines(const std::string& str) {
|
||||
std::vector<std::string> result;
|
||||
std::string::size_type i;
|
||||
std::string::size_type start = 0;
|
||||
while (true) {
|
||||
i = str.find('\n', start);
|
||||
if (i == std::string::npos) {
|
||||
if (start < str.length()) {
|
||||
result.push_back(str.substr(start));
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
result.push_back(str.substr(start, i - start));
|
||||
start = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Instruction InstructionParser::parse_single_instruction(
|
||||
std::string str,
|
||||
const std::vector<DecompilerLabel>& labels) {
|
||||
auto name = get_until_space(str);
|
||||
auto lookup = m_opcode_name_lookup.find(name);
|
||||
if (lookup == m_opcode_name_lookup.end()) {
|
||||
throw std::runtime_error("InstructionParser cannot handle opcode " + name);
|
||||
}
|
||||
|
||||
Instruction instr;
|
||||
instr.kind = InstructionKind(lookup->second);
|
||||
auto& info = gOpcodeInfo[lookup->second];
|
||||
for (u8 i = 0; i < info.step_count; i++) {
|
||||
auto& step = info.steps[i];
|
||||
switch (step.decode) {
|
||||
case DecodeType::GPR: {
|
||||
std::string gpr_name;
|
||||
if ((info.is_store || info.is_load) && i == 2) {
|
||||
gpr_name = get_in_paren(str);
|
||||
} else {
|
||||
gpr_name = get_comma_separated(str);
|
||||
}
|
||||
|
||||
Register reg(gpr_name);
|
||||
assert(reg.get_kind() == Reg::GPR);
|
||||
InstructionAtom atom;
|
||||
atom.set_reg(reg);
|
||||
if (step.is_src) {
|
||||
instr.add_src(atom);
|
||||
} else {
|
||||
instr.add_dst(atom);
|
||||
}
|
||||
} break;
|
||||
|
||||
case DecodeType::FPR: {
|
||||
auto reg_name = get_comma_separated(str);
|
||||
Register reg(reg_name);
|
||||
assert(reg.get_kind() == Reg::FPR);
|
||||
InstructionAtom atom;
|
||||
atom.set_reg(reg);
|
||||
if (step.is_src) {
|
||||
instr.add_src(atom);
|
||||
} else {
|
||||
instr.add_dst(atom);
|
||||
}
|
||||
} break;
|
||||
|
||||
case DecodeType::IMM: {
|
||||
InstructionAtom atom;
|
||||
std::string atom_str;
|
||||
if ((info.is_store || info.is_load) && i == 1) {
|
||||
// number before paren
|
||||
atom_str = get_before_paren(str);
|
||||
} else {
|
||||
atom_str = get_comma_separated(str);
|
||||
}
|
||||
|
||||
if (is_integer(atom_str)) {
|
||||
auto amt = parse_integer(atom_str);
|
||||
atom.set_imm(amt);
|
||||
} else {
|
||||
atom.set_sym(atom_str);
|
||||
}
|
||||
if (step.is_src) {
|
||||
instr.add_src(atom);
|
||||
} else {
|
||||
instr.add_dst(atom);
|
||||
}
|
||||
|
||||
} break;
|
||||
|
||||
case DecodeType::BRANCH_TARGET: {
|
||||
auto label = get_comma_separated(str);
|
||||
auto f = std::find_if(labels.begin(), labels.end(),
|
||||
[&](const DecompilerLabel& l) { return l.name == label; });
|
||||
assert(f != labels.end());
|
||||
auto idx = f - labels.begin();
|
||||
InstructionAtom atom;
|
||||
atom.set_label(idx);
|
||||
if (step.is_src) {
|
||||
instr.add_src(atom);
|
||||
} else {
|
||||
instr.add_dst(atom);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
assert(str.empty());
|
||||
return instr;
|
||||
}
|
||||
|
||||
ParsedProgram InstructionParser::parse_program(const std::string& str) {
|
||||
ParsedProgram program;
|
||||
auto lines = string_to_lines(str);
|
||||
int byte_offset = 0;
|
||||
// first pass
|
||||
for (auto& line : lines) {
|
||||
// strip off leading white space
|
||||
size_t i;
|
||||
for (i = 0; i < line.length(); i++) {
|
||||
if (line[i] != ' ') {
|
||||
line = line.substr(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (line.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.front() == 'L') {
|
||||
if (line.back() == ':') {
|
||||
line.pop_back();
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
DecompilerLabel label;
|
||||
label.target_segment = 0;
|
||||
label.offset = byte_offset;
|
||||
label.name = line;
|
||||
program.labels.push_back(label);
|
||||
} else {
|
||||
byte_offset += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// second pass
|
||||
for (auto& line : lines) {
|
||||
if (!line.empty() && line.front() != 'L') {
|
||||
program.instructions.push_back(parse_single_instruction(line, program.labels));
|
||||
}
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
std::string ParsedProgram::print() {
|
||||
std::string result;
|
||||
|
||||
int offset = 0;
|
||||
for (auto& instr : instructions) {
|
||||
for (auto& label : labels) {
|
||||
if (label.offset == offset) {
|
||||
result += label.name;
|
||||
result += ":\n";
|
||||
}
|
||||
}
|
||||
result += ' ';
|
||||
result += ' ';
|
||||
result += instr.to_string(labels);
|
||||
result += '\n';
|
||||
offset += 4;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace decompiler
|
29
decompiler/Disasm/InstructionParser.h
Normal file
29
decompiler/Disasm/InstructionParser.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*!
|
||||
* The InstructionParser converts a string like "daddu a0, s7, r0" into an Instruction.
|
||||
* It is used to generate test sequences of instructions for decompiler algorithms.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "Instruction.h"
|
||||
#include "DecompilerLabel.h"
|
||||
|
||||
namespace decompiler {
|
||||
struct ParsedProgram {
|
||||
std::vector<DecompilerLabel> labels;
|
||||
std::vector<Instruction> instructions;
|
||||
std::string print();
|
||||
};
|
||||
|
||||
class InstructionParser {
|
||||
public:
|
||||
InstructionParser();
|
||||
Instruction parse_single_instruction(std::string str, const std::vector<DecompilerLabel>& labels);
|
||||
ParsedProgram parse_program(const std::string& str);
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, int> m_opcode_name_lookup;
|
||||
};
|
||||
} // namespace decompiler
|
@ -6,8 +6,13 @@
|
||||
#include "OpcodeInfo.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace decompiler {
|
||||
OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX];
|
||||
|
||||
namespace {
|
||||
bool opcodes_initialized = false;
|
||||
}
|
||||
|
||||
typedef InstructionKind IK;
|
||||
typedef FieldType FT;
|
||||
typedef DecodeType DT;
|
||||
@ -130,6 +135,9 @@ static OpcodeInfo& cd_dacc_svfs_svft(OpcodeInfo& info) {
|
||||
}
|
||||
|
||||
void init_opcode_info() {
|
||||
if (opcodes_initialized) {
|
||||
return;
|
||||
}
|
||||
gOpcodeInfo[0].name = ";; ??????";
|
||||
|
||||
// RT, RS, SIMM
|
||||
@ -444,6 +452,7 @@ void init_opcode_info() {
|
||||
// for the UNKNOWN op which shouldn't be valid.
|
||||
total_count--;
|
||||
assert(total_count == valid_count);
|
||||
opcodes_initialized = true;
|
||||
}
|
||||
|
||||
void OpcodeInfo::step(DecodeStep& s) {
|
||||
@ -501,4 +510,5 @@ OpcodeInfo& OpcodeInfo::dst_vf(FieldType field) {
|
||||
|
||||
OpcodeInfo& OpcodeInfo::dst_vi(FieldType field) {
|
||||
return dst(field, DT::VI);
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace decompiler {
|
||||
enum class InstructionKind {
|
||||
UNKNOWN,
|
||||
|
||||
@ -342,12 +343,12 @@ struct OpcodeInfo {
|
||||
OpcodeInfo& dst_vf(FieldType field);
|
||||
OpcodeInfo& dst_vi(FieldType field);
|
||||
|
||||
uint8_t step_count;
|
||||
uint8_t step_count = 0;
|
||||
DecodeStep steps[MAX_DECODE_STEPS];
|
||||
};
|
||||
|
||||
extern OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX];
|
||||
|
||||
void init_opcode_info();
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // NEXT_OPCODEINFO_H
|
||||
|
@ -7,6 +7,24 @@
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace decompiler {
|
||||
namespace Reg {
|
||||
// register which may hold GOAL local variables
|
||||
|
||||
// clang-format off
|
||||
const bool allowed_local_gprs[Reg::MAX_GPR] = {
|
||||
false /*R0*/, false /*AT*/, true /*V0*/, true /*V1*/,
|
||||
true /*A0*/, true /*A1*/, true /*A2*/, true /*A3*/,
|
||||
true /*T0*/, true /*T1*/, true /*T2*/, true /*T3*/,
|
||||
true /*T4*/, true /*T5*/, true /*T6*/, true /*T7*/,
|
||||
true /*S0*/, true /*S1*/, true /*S2*/, true /*S3*/,
|
||||
true /*S4*/, true /*S5*/, false /*S6*/, false /*S7*/,
|
||||
true /*T8*/, true /*T9*/, false /*K0*/, false /*K1*/,
|
||||
true /*GP*/, true /*SP*/, false /*FP*/, false /*RA*/
|
||||
};
|
||||
// clang-format on
|
||||
} // namespace Reg
|
||||
|
||||
////////////////////////////
|
||||
// Register Name Constants
|
||||
////////////////////////////
|
||||
@ -233,4 +251,5 @@ bool Register::operator==(const Register& other) const {
|
||||
|
||||
bool Register::operator!=(const Register& other) const {
|
||||
return id != other.id;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -11,6 +11,7 @@
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace decompiler {
|
||||
// Namespace for register name constants
|
||||
namespace Reg {
|
||||
enum RegisterKind {
|
||||
@ -120,6 +121,9 @@ enum Vi {
|
||||
CMSAR1 = 31,
|
||||
MAX_COP2 = 32
|
||||
};
|
||||
|
||||
const extern bool allowed_local_gprs[Reg::MAX_GPR];
|
||||
|
||||
} // namespace Reg
|
||||
|
||||
// Representation of a register. Uses a 32-bit integer internally.
|
||||
@ -148,5 +152,5 @@ class Register {
|
||||
private:
|
||||
uint16_t id = -1;
|
||||
};
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // NEXT_REGISTER_H
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include "decompiler/Disasm/InstructionMatching.h"
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* Find all basic blocks in a function.
|
||||
* All delay slot instructions are grouped with the branch instruction.
|
||||
@ -48,4 +49,5 @@ std::vector<BasicBlock> find_blocks_in_function(const LinkedObjectFile& file,
|
||||
}
|
||||
|
||||
return basic_blocks;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -7,6 +7,7 @@
|
||||
#include "decompiler/util/DecompilerTypeSystem.h"
|
||||
#include "decompiler/util/TP_Type.h"
|
||||
|
||||
namespace decompiler {
|
||||
class LinkedObjectFile;
|
||||
class Function;
|
||||
|
||||
@ -48,3 +49,4 @@ struct BlockTopologicalSort {
|
||||
std::vector<BasicBlock> find_blocks_in_function(const LinkedObjectFile& file,
|
||||
int seg,
|
||||
const Function& func);
|
||||
} // namespace decompiler
|
@ -5,6 +5,7 @@
|
||||
#include "CfgVtx.h"
|
||||
#include "Function.h"
|
||||
|
||||
namespace decompiler {
|
||||
/////////////////////////////////////////
|
||||
/// CfgVtx
|
||||
/////////////////////////////////////////
|
||||
@ -1912,3 +1913,4 @@ std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int se
|
||||
|
||||
return cfg;
|
||||
}
|
||||
} // namespace decompiler
|
@ -11,6 +11,7 @@ namespace goos {
|
||||
class Object;
|
||||
}
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* In v, find an item equal to old, and replace it with replace.
|
||||
* Will throw an error is there is not exactly one thing equal to old.
|
||||
@ -351,5 +352,5 @@ class ControlFlowGraph {
|
||||
class LinkedObjectFile;
|
||||
class Function;
|
||||
std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int seg, Function& func);
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // JAK_DISASSEMBLER_CFGVTX_H
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "decompiler/IR/IR.h"
|
||||
#include "ExpressionStack.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) {
|
||||
ExpressionStack stack;
|
||||
@ -55,4 +56,5 @@ bool Function::build_expression(LinkedObjectFile& file) {
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -1,6 +1,7 @@
|
||||
#include "third-party/fmt/core.h"
|
||||
#include "ExpressionStack.h"
|
||||
|
||||
namespace decompiler {
|
||||
std::string ExpressionStack::StackEntry::print(LinkedObjectFile& file) {
|
||||
return fmt::format("d: {} s: {} | {} <- {}", display, sequence_point,
|
||||
destination.has_value() ? destination.value().to_charp() : "N/A",
|
||||
@ -107,4 +108,5 @@ ExpressionStack::StackEntry& ExpressionStack::get_display_stack_top() {
|
||||
}
|
||||
}
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -6,6 +6,7 @@
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
#include "decompiler/util/TP_Type.h"
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* An ExpressionStack is used to track partial expressions when rebuilding the tree structure of
|
||||
* GOAL code. Linear sequences of operations are added onto the expression stack.
|
||||
@ -33,4 +34,5 @@ class ExpressionStack {
|
||||
|
||||
bool display_stack_empty();
|
||||
StackEntry& get_display_stack_top();
|
||||
};
|
||||
};
|
||||
} // namespace decompiler
|
@ -8,6 +8,7 @@
|
||||
#include "TypeInspector.h"
|
||||
#include "decompiler/IR/IR.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
std::vector<Register> gpr_backups = {make_gpr(Reg::GP), make_gpr(Reg::S5), make_gpr(Reg::S4),
|
||||
make_gpr(Reg::S3), make_gpr(Reg::S2), make_gpr(Reg::S1),
|
||||
@ -70,8 +71,8 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
|
||||
// storing stack pointer on the stack is done by some ASM kernel functions
|
||||
if (instr.kind == InstructionKind::SW && instr.get_src(0).get_reg() == make_gpr(Reg::SP)) {
|
||||
printf("[Warning] %s Suspected ASM function based on this instruction in prologue: %s\n",
|
||||
guessed_name.to_string().c_str(), instr.to_string(file).c_str());
|
||||
warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n";
|
||||
guessed_name.to_string().c_str(), instr.to_string(file.labels).c_str());
|
||||
warnings += ";; Flagged as ASM function because of " + instr.to_string(file.labels) + "\n";
|
||||
suspected_asm = true;
|
||||
return;
|
||||
}
|
||||
@ -93,8 +94,8 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
|
||||
// support
|
||||
if (instr.kind == InstructionKind::SD && instr.get_src(0).get_reg() == make_gpr(Reg::S7)) {
|
||||
lg::warn("{} Suspected ASM function based on this instruction in prologue: {}\n",
|
||||
guessed_name.to_string(), instr.to_string(file));
|
||||
warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n";
|
||||
guessed_name.to_string(), instr.to_string(file.labels));
|
||||
warnings += ";; Flagged as ASM function because of " + instr.to_string(file.labels) + "\n";
|
||||
suspected_asm = true;
|
||||
return;
|
||||
}
|
||||
@ -164,9 +165,9 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
|
||||
suspected_asm = true;
|
||||
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
|
||||
guessed_name.to_string().c_str(),
|
||||
instructions.at(idx + i).to_string(file).c_str());
|
||||
instructions.at(idx + i).to_string(file.labels).c_str());
|
||||
warnings += ";; Suspected asm function due to stack store: " +
|
||||
instructions.at(idx + i).to_string(file) + "\n";
|
||||
instructions.at(idx + i).to_string(file.labels) + "\n";
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -194,9 +195,9 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
|
||||
suspected_asm = true;
|
||||
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
|
||||
guessed_name.to_string().c_str(),
|
||||
instructions.at(idx + i).to_string(file).c_str());
|
||||
instructions.at(idx + i).to_string(file.labels).c_str());
|
||||
warnings += ";; Suspected asm function due to stack store: " +
|
||||
instructions.at(idx + i).to_string(file) + "\n";
|
||||
instructions.at(idx + i).to_string(file.labels) + "\n";
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -643,7 +644,7 @@ void Function::find_type_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts)
|
||||
// done!
|
||||
// fmt::print("Got type {} parent {}\n", type_name, parent_type);
|
||||
dts.add_type_parent(type_name, parent_type);
|
||||
Label flag_label = file.labels.at(label_idx);
|
||||
DecompilerLabel flag_label = file.labels.at(label_idx);
|
||||
u64 word = file.read_data_word(flag_label);
|
||||
flag_label.offset += 4;
|
||||
u64 word2 = file.read_data_word(flag_label);
|
||||
@ -744,4 +745,5 @@ BlockTopologicalSort Function::bb_topo_sort() {
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -15,6 +15,7 @@
|
||||
#include "common/type_system/TypeSpec.h"
|
||||
#include "decompiler/config.h"
|
||||
|
||||
namespace decompiler {
|
||||
class DecompilerTypeSystem;
|
||||
class IR_Atomic;
|
||||
class IR;
|
||||
@ -158,5 +159,5 @@ class Function {
|
||||
std::unordered_map<int, int> instruction_to_basic_op;
|
||||
std::unordered_map<int, int> basic_op_to_instruction;
|
||||
};
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // NEXT_FUNCTION_H
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "Function.h"
|
||||
#include "decompiler/IR/IR.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
bool in_set(RegSet& set, const Register& obj) {
|
||||
return set.find(obj) != set.end();
|
||||
@ -170,4 +171,5 @@ void Function::run_reg_usage() {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -3,6 +3,7 @@
|
||||
#include "third-party/fmt/core.h"
|
||||
#include "decompiler/config.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
TypeState construct_initial_typestate(const TypeSpec& f_ts) {
|
||||
TypeState result;
|
||||
@ -132,4 +133,5 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -8,6 +8,7 @@
|
||||
#include "common/type_system/deftype.h"
|
||||
#include "decompiler/IR/IR.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
struct FieldPrint {
|
||||
char format = '\0';
|
||||
@ -843,4 +844,5 @@ std::string TypeInspectorResult::print_as_deftype() {
|
||||
result.append(")\n");
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -8,10 +8,12 @@
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
|
||||
class Field;
|
||||
|
||||
namespace decompiler {
|
||||
class Function;
|
||||
class DecompilerTypeSystem;
|
||||
class LinkedObjectFile;
|
||||
class Field;
|
||||
|
||||
struct TypeInspectorResult {
|
||||
bool success = false;
|
||||
@ -34,3 +36,4 @@ TypeInspectorResult inspect_inspect_method(Function& inspect,
|
||||
const std::string& type_name,
|
||||
DecompilerTypeSystem& dts,
|
||||
LinkedObjectFile& file);
|
||||
} // namespace decompiler
|
@ -11,9 +11,11 @@
|
||||
#include "decompiler/Function/Function.h"
|
||||
#include "decompiler/Function/BasicBlocks.h"
|
||||
#include "decompiler/Disasm/InstructionMatching.h"
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
#include "decompiler/IR/IR.h"
|
||||
#include "common/symbols.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
|
||||
///////////////////////////////
|
||||
@ -135,7 +137,7 @@ std::shared_ptr<IR_Atomic> to_asm_automatic(const std::string& str, Instruction&
|
||||
}
|
||||
|
||||
if (instr.n_src >= 3) {
|
||||
result->src1 = instr_atom_to_ir(instr.get_src(2), idx);
|
||||
result->src2 = instr_atom_to_ir(instr.get_src(2), idx);
|
||||
}
|
||||
|
||||
result->set_reg_info();
|
||||
@ -2520,7 +2522,7 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec
|
||||
// everything failed
|
||||
if (!result) {
|
||||
// temp hack for debug:
|
||||
printf("Instruction -> BasicOp failed on %s\n", i.to_string(*file).c_str());
|
||||
printf("Instruction -> BasicOp failed on %s\n", i.to_string(file->labels).c_str());
|
||||
func->add_basic_op(std::make_shared<IR_Failed_Atomic>(), instr, instr + 1);
|
||||
} else {
|
||||
if (!func->contains_asm_ops && dynamic_cast<IR_AsmOp*>(result.get())) {
|
||||
@ -2536,3 +2538,4 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -6,8 +6,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace decompiler {
|
||||
class Function;
|
||||
struct BasicBlock;
|
||||
class LinkedObjectFile;
|
||||
|
||||
void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjectFile* file);
|
||||
void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjectFile* file);
|
||||
} // namespace decompiler
|
@ -7,6 +7,7 @@
|
||||
#include "decompiler/Disasm/InstructionMatching.h"
|
||||
#include "decompiler/IR/IR.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
|
||||
std::shared_ptr<IR> cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx);
|
||||
@ -1278,3 +1279,4 @@ std::shared_ptr<IR> build_cfg_ir(Function& function,
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -2,9 +2,11 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace decompiler {
|
||||
class IR;
|
||||
class Function;
|
||||
class LinkedObjectFile;
|
||||
class ControlFlowGraph;
|
||||
|
||||
std::shared_ptr<IR> build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file);
|
||||
std::shared_ptr<IR> build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file);
|
||||
} // namespace decompiler
|
@ -3,6 +3,7 @@
|
||||
#include "common/goos/PrettyPrinter.h"
|
||||
#include "third-party/fmt/core.h"
|
||||
|
||||
namespace decompiler {
|
||||
// hack to print out reverse deref paths on loads to help with debugging load stuff.
|
||||
bool enable_hack_load_path_print = false;
|
||||
// hack to print (begin x) as x to make debug output easier to read.
|
||||
@ -1273,4 +1274,5 @@ goos::Object IR_Break::to_form(const LinkedObjectFile& file) const {
|
||||
void IR_Break::get_children(std::vector<std::shared_ptr<IR>>* output) const {
|
||||
output->push_back(return_code);
|
||||
output->push_back(dead_code);
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -11,14 +11,15 @@
|
||||
#include "decompiler/util/DecompilerTypeSystem.h"
|
||||
#include "decompiler/util/TP_Type.h"
|
||||
|
||||
class LinkedObjectFile;
|
||||
class DecompilerTypeSystem;
|
||||
class ExpressionStack;
|
||||
|
||||
namespace goos {
|
||||
class Object;
|
||||
}
|
||||
|
||||
namespace decompiler {
|
||||
class LinkedObjectFile;
|
||||
class DecompilerTypeSystem;
|
||||
class ExpressionStack;
|
||||
|
||||
class IR {
|
||||
public:
|
||||
virtual goos::Object to_form(const LinkedObjectFile& file) const = 0;
|
||||
@ -765,5 +766,5 @@ class IR_Break : public virtual IR {
|
||||
goos::Object to_form(const LinkedObjectFile& file) const override;
|
||||
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
|
||||
};
|
||||
|
||||
} // namespace decompiler
|
||||
#endif // JAK_IR_H
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "IR.h"
|
||||
#include "decompiler/Function/ExpressionStack.h"
|
||||
|
||||
namespace decompiler {
|
||||
bool IR_Set_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
|
||||
// first determine the type of the set.
|
||||
switch (kind) {
|
||||
@ -448,4 +449,5 @@ bool IR_FloatMath1::update_from_stack(const std::unordered_set<Register, Registe
|
||||
LinkedObjectFile& file) {
|
||||
update_from_stack_helper(&arg, consume, stack, file);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -5,6 +5,7 @@
|
||||
#include "decompiler/util/TP_Type.h"
|
||||
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
// bool is_plain_type(const TP_Type& type, const TypeSpec& ts) {
|
||||
// return type.as_typespec() == ts;
|
||||
@ -945,4 +946,5 @@ TP_Type IR_CMoveF::get_expression_type(const TypeState& input,
|
||||
(void)file;
|
||||
(void)dts;
|
||||
return TP_Type::make_from_typespec(TypeSpec("symbol"));
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
1127
decompiler/IR2/AtomicOp.cpp
Normal file
1127
decompiler/IR2/AtomicOp.cpp
Normal file
File diff suppressed because it is too large
Load Diff
544
decompiler/IR2/AtomicOp.h
Normal file
544
decompiler/IR2/AtomicOp.h
Normal file
@ -0,0 +1,544 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <optional>
|
||||
#include <cassert>
|
||||
#include "common/goos/Object.h"
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
#include "decompiler/Disasm/Instruction.h"
|
||||
#include "Env.h"
|
||||
|
||||
namespace decompiler {
|
||||
class Expr;
|
||||
|
||||
/*!
|
||||
* A "Variable" represents a register at a given instruction index.
|
||||
* The register can either be a GOAL local variable or a GOAL register used in inline assembly.
|
||||
* Because OpenGOAL's registers don't one-to-one map to GOAL registers, GOAL "inline assembly
|
||||
* registers" will become OpenGOAL variables, and are treated similarly to variables in
|
||||
* decompilation.
|
||||
*
|
||||
* In the earlier parts of decompilation, this just behaves like a register in all cases.
|
||||
* But in later parts registers can be mapped to real local variables with types. A variable can
|
||||
* look itself up in an environment to determine what "local variable" it is.
|
||||
*
|
||||
* Note: a variable is _not_ allowed to be R0, AT, S7, K0, K1, FP, or RA by default, as these
|
||||
* can never hold normal GOAL locals. Inline assembly may use these, but you must set the allow_all
|
||||
* flag to true in the constructor of Variable to indicate this is what you really want.
|
||||
*
|
||||
* Note: access to the process pointer (s6) is handled as a variable. As a result, you may always
|
||||
* use s6 as a variable.
|
||||
*/
|
||||
class Variable {
|
||||
public:
|
||||
enum class Mode : u8 {
|
||||
READ, // represents value of the variable at the beginning of the instruction
|
||||
WRITE // represents value of the variable at the end of the instruction
|
||||
};
|
||||
|
||||
Variable() = default;
|
||||
Variable(Mode mode, Register reg, int atomic_idx, bool allow_all = false);
|
||||
|
||||
enum class Print {
|
||||
AS_REG, // print as a PS2 register name
|
||||
FULL, // print as a register name, plus an index, plus read or write
|
||||
AS_VARIABLE, // print local variable name, error if impossible
|
||||
AUTOMATIC, // print as variable, but if that's not possible print as reg.
|
||||
};
|
||||
|
||||
std::string to_string(const Env* env, Print mode = Print::AUTOMATIC) const;
|
||||
|
||||
bool operator==(const Variable& other) const;
|
||||
bool operator!=(const Variable& other) const;
|
||||
|
||||
const Register& reg() const { return m_reg; }
|
||||
Mode mode() const { return m_mode; }
|
||||
int idx() const { return m_atomic_idx; }
|
||||
|
||||
private:
|
||||
Mode m_mode = Mode::READ; // do we represent a read or a write?
|
||||
Register m_reg; // the EE register
|
||||
int m_atomic_idx = -1; // the index in the function's list of AtomicOps
|
||||
};
|
||||
|
||||
/*!
|
||||
* An atomic operation represents a single operation from the point of view of the IR2 system.
|
||||
* Each IR2 op is one or more instructions.
|
||||
* Each function can be represented as a list of AtomicOps. These are stored in exactly the same
|
||||
* order as the instructions appear.
|
||||
*
|
||||
* The AtomicOps use SimpleAtom and SimpleExpression. These are extremely limited versions of
|
||||
* the full IR2 expression system, but are much easier to work with because they are less general
|
||||
* and can't be nested infinitely. They also have features specific to the AtomicOp system that are
|
||||
* not required for full expressions. The full expression system will later convert these into the
|
||||
* more complicated expressions.
|
||||
*
|
||||
* The types of AtomicOp are:
|
||||
* ConditionalMoveFalseOp
|
||||
* CallOp
|
||||
* SpecialOp
|
||||
* BranchOp
|
||||
* LoadVarOp
|
||||
* StoreOp
|
||||
* SetVarConditionOp
|
||||
* AsmOp
|
||||
* SetVarExprOp
|
||||
* AsmOp
|
||||
*/
|
||||
class AtomicOp {
|
||||
public:
|
||||
explicit AtomicOp(int my_idx);
|
||||
std::string to_string(const std::vector<DecompilerLabel>& labels, const Env* env);
|
||||
virtual goos::Object to_form(const std::vector<DecompilerLabel>& labels,
|
||||
const Env* env) const = 0;
|
||||
virtual bool operator==(const AtomicOp& other) const = 0;
|
||||
bool operator!=(const AtomicOp& other) const;
|
||||
|
||||
// determine if this is a (set! <var> thing) form. These will be handled differently in expression
|
||||
// building.
|
||||
virtual bool is_variable_set() const = 0;
|
||||
|
||||
// determine if this is a GOAL "sequence point".
|
||||
// non-sequence point instructions may be out of order from the point of view of the expression
|
||||
// stack.
|
||||
virtual bool is_sequence_point() const = 0;
|
||||
|
||||
// get the variable being set by this operation. Only call this if is_variable_set returns true.
|
||||
virtual Variable get_set_destination() const = 0;
|
||||
|
||||
// get the value of the variable being set, as an expression. Only call this if is_variable_set
|
||||
// returns true.
|
||||
virtual std::unique_ptr<Expr> get_set_source_as_expr() const = 0;
|
||||
|
||||
// convert me to an expression. If I'm a set!, this will produce a (set! x y), which may be
|
||||
// undesirable when expression stacking.
|
||||
virtual std::unique_ptr<Expr> get_as_expr() const = 0;
|
||||
|
||||
// figure out what registers are read and written in this AtomicOp and update read_regs,
|
||||
// write_regs, and clobber_regs. It's expected that these have duplicates if a register appears
|
||||
// in the original instructions multiple times. Ex: "and v0, v1, v1" would end up putting v1 in
|
||||
// read twice.
|
||||
virtual void update_register_info() = 0;
|
||||
|
||||
const std::vector<Register>& read_regs() { return m_read_regs; }
|
||||
const std::vector<Register>& write_regs() { return m_write_regs; }
|
||||
const std::vector<Register>& clobber_regs() { return m_clobber_regs; }
|
||||
|
||||
protected:
|
||||
int m_my_idx = -1;
|
||||
|
||||
// the register values that are read (at the start of this op)
|
||||
std::vector<Register> m_read_regs;
|
||||
// the registers that have actual values written into them (at the end of this op)
|
||||
std::vector<Register> m_write_regs;
|
||||
// the registers which have junk written into them.
|
||||
std::vector<Register> m_clobber_regs;
|
||||
};
|
||||
|
||||
/*!
|
||||
* The has a value. In some cases it can be set.
|
||||
*/
|
||||
class SimpleAtom {
|
||||
public:
|
||||
enum class Kind : u8 {
|
||||
VARIABLE,
|
||||
INTEGER_CONSTANT,
|
||||
SYMBOL_PTR,
|
||||
SYMBOL_VAL,
|
||||
EMPTY_LIST,
|
||||
STATIC_ADDRESS,
|
||||
INVALID
|
||||
};
|
||||
|
||||
SimpleAtom() = default;
|
||||
static SimpleAtom make_var(const Variable& var);
|
||||
static SimpleAtom make_sym_ptr(const std::string& name);
|
||||
static SimpleAtom make_sym_val(const std::string& name);
|
||||
static SimpleAtom make_empty_list();
|
||||
static SimpleAtom make_int_constant(s64 value);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
|
||||
|
||||
bool is_var() const { return m_kind == Kind::VARIABLE; }
|
||||
const Variable& var() const {
|
||||
assert(is_var());
|
||||
return m_variable;
|
||||
}
|
||||
bool is_int() const { return m_kind == Kind::INTEGER_CONSTANT; };
|
||||
bool is_sym_ptr() const { return m_kind == Kind::SYMBOL_PTR; };
|
||||
bool is_sym_val() const { return m_kind == Kind::SYMBOL_VAL; };
|
||||
bool is_empty_list() const { return m_kind == Kind::EMPTY_LIST; };
|
||||
bool is_static_addr() const { return m_kind == Kind::STATIC_ADDRESS; };
|
||||
bool operator==(const SimpleAtom& other) const;
|
||||
bool operator!=(const SimpleAtom& other) const { return !((*this) == other); }
|
||||
void get_regs(std::vector<Register>* out) const;
|
||||
|
||||
private:
|
||||
Kind m_kind = Kind::INVALID;
|
||||
std::string m_string; // for symbol ptr and symbol val
|
||||
s64 m_int = 0; // for integer constant and static address label id
|
||||
Variable m_variable;
|
||||
};
|
||||
|
||||
/*!
|
||||
* A "simple expression" can be used within an AtomicOp.
|
||||
* AtomicOps are often made up of very few instructions, so these expressions are quite simple and
|
||||
* can't nest. There is an "operation" and some arguments. There are no side effects of a
|
||||
* SimpleExpression. The side effects will be captured by the AtomicOp.
|
||||
*
|
||||
* Note - there is an expression kind called identity which takes one argument and uses that
|
||||
* argument as an expression.
|
||||
*/
|
||||
class SimpleExpression {
|
||||
public:
|
||||
enum class Kind : u8 {
|
||||
INVALID,
|
||||
IDENTITY,
|
||||
DIV_S,
|
||||
MUL_S,
|
||||
ADD_S,
|
||||
SUB_S,
|
||||
MIN_S,
|
||||
MAX_S,
|
||||
FLOAT_TO_INT,
|
||||
INT_TO_FLOAT,
|
||||
ABS_S,
|
||||
NEG_S,
|
||||
SQRT_S,
|
||||
ADD,
|
||||
SUB,
|
||||
MUL_SIGNED,
|
||||
DIV_SIGNED,
|
||||
MOD_SIGNED,
|
||||
DIV_UNSIGNED,
|
||||
MOD_UNSIGNED,
|
||||
OR,
|
||||
AND,
|
||||
NOR,
|
||||
XOR,
|
||||
LEFT_SHIFT,
|
||||
RIGHT_SHIFT_ARITH,
|
||||
RIGHT_SHIFT_LOGIC,
|
||||
MUL_UNSIGNED,
|
||||
NOT,
|
||||
NEG
|
||||
};
|
||||
|
||||
// how many arguments?
|
||||
int args() const { return n_args; }
|
||||
const SimpleAtom& get_arg(int idx) const {
|
||||
assert(idx < args());
|
||||
return m_args[idx];
|
||||
}
|
||||
Kind kind() const { return m_kind; }
|
||||
|
||||
SimpleExpression(Kind kind, const SimpleAtom& arg0);
|
||||
SimpleExpression(Kind kind, const SimpleAtom& arg0, const SimpleAtom& arg1);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
|
||||
bool operator==(const SimpleExpression& other) const;
|
||||
bool is_identity() const { return m_kind == Kind::IDENTITY; }
|
||||
void get_regs(std::vector<Register>* out) const;
|
||||
|
||||
private:
|
||||
Kind m_kind = Kind::INVALID;
|
||||
SimpleAtom m_args[2];
|
||||
s8 n_args = -1;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Set a variable equal to a Simple Expression
|
||||
*/
|
||||
class SetVarOp : public AtomicOp {
|
||||
public:
|
||||
SetVarOp(const Variable& dst, const SimpleExpression& src, int my_idx)
|
||||
: AtomicOp(my_idx), m_dst(dst), m_src(src) {
|
||||
assert(my_idx == dst.idx());
|
||||
}
|
||||
virtual goos::Object to_form(const std::vector<DecompilerLabel>& labels,
|
||||
const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
Variable m_dst;
|
||||
SimpleExpression m_src;
|
||||
};
|
||||
|
||||
/*!
|
||||
* An AsmOp represents a single inline assembly instruction. This is used when the BasicOpBuilder
|
||||
* pass decides that an instruction could not have been generated from high-level GOAL code, and
|
||||
* instead must be due to inline assembly.
|
||||
*
|
||||
* Each AsmOp stores the instruction it uses, as well as "Variable"s for each register used.
|
||||
*/
|
||||
class AsmOp : public AtomicOp {
|
||||
public:
|
||||
AsmOp(Instruction instr, int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
Instruction m_instr;
|
||||
std::optional<Variable> m_dst;
|
||||
std::optional<Variable> m_src[3];
|
||||
};
|
||||
|
||||
/*!
|
||||
* A condition represents something that can generate a 0 or 1 based on a check or comparison.
|
||||
* This can be used as a branch condition in BranchOp
|
||||
* This can be used as a condition in an SetVarConditionOp, which sets a variable to a GOAL boolean.
|
||||
* Sometimes a SetVarConditionOp gets spread across many many instructions, in which case it is
|
||||
* not correctly detected here.
|
||||
*/
|
||||
class IR2_Condition {
|
||||
public:
|
||||
enum class Kind {
|
||||
NOT_EQUAL,
|
||||
EQUAL,
|
||||
LESS_THAN_SIGNED,
|
||||
GREATER_THAN_SIGNED,
|
||||
LEQ_SIGNED,
|
||||
GEQ_SIGNED,
|
||||
GREATER_THAN_ZERO_SIGNED,
|
||||
LEQ_ZERO_SIGNED,
|
||||
LESS_THAN_ZERO,
|
||||
GEQ_ZERO_SIGNED,
|
||||
LESS_THAN_UNSIGNED,
|
||||
GREATER_THAN_UNSIGNED,
|
||||
LEQ_UNSIGNED,
|
||||
GEQ_UNSIGNED,
|
||||
ZERO,
|
||||
NONZERO,
|
||||
FALSE,
|
||||
TRUTHY,
|
||||
ALWAYS,
|
||||
NEVER,
|
||||
FLOAT_EQUAL,
|
||||
FLOAT_NOT_EQUAL,
|
||||
FLOAT_LESS_THAN,
|
||||
FLOAT_GEQ,
|
||||
FLOAT_LEQ,
|
||||
FLOAT_GREATER_THAN,
|
||||
INVALID
|
||||
};
|
||||
|
||||
explicit IR2_Condition(Kind kind);
|
||||
IR2_Condition(Kind kind, const Variable& src0);
|
||||
IR2_Condition(Kind kind, const Variable& src0, const Variable& src1);
|
||||
|
||||
void invert();
|
||||
bool operator==(const IR2_Condition& other) const;
|
||||
bool operator!=(const IR2_Condition& other) const { return !((*this) == other); }
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
|
||||
void get_regs(std::vector<Register>* out) const;
|
||||
|
||||
private:
|
||||
Kind m_kind = Kind::INVALID;
|
||||
Variable m_src[2];
|
||||
};
|
||||
|
||||
/*!
|
||||
* Set a variable to a GOAL boolean, based off of a condition.
|
||||
*/
|
||||
class SetVarConditionOp : public AtomicOp {
|
||||
public:
|
||||
SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
Variable m_dst;
|
||||
IR2_Condition m_condition;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Store an Atom into a memory location.
|
||||
* Note - this is _not_ considered a set! form because you are not setting the value of a
|
||||
* register which can be expression-compacted.
|
||||
*/
|
||||
class StoreOp : public AtomicOp {
|
||||
public:
|
||||
StoreOp(SimpleExpression addr, SimpleAtom value, int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
SimpleExpression m_addr;
|
||||
SimpleAtom m_value;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Load a value into a variable.
|
||||
* This is treated as a set! form.
|
||||
*/
|
||||
class LoadVarOp : public AtomicOp {
|
||||
public:
|
||||
LoadVarOp(Variable dst, SimpleExpression src, int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
Variable m_dst;
|
||||
SimpleExpression m_src;
|
||||
};
|
||||
|
||||
/*!
|
||||
* This represents one of the possible instructions that can go in a branch delay slot.
|
||||
* These will be "absorbed" into higher level structures, but for the purpose of printing AtomicOps,
|
||||
* it will be nice to have these print like expressions.
|
||||
*
|
||||
* These are always part of the branch op.
|
||||
*/
|
||||
class IR2_BranchDelay {
|
||||
public:
|
||||
enum class Kind {
|
||||
NOP,
|
||||
SET_REG_FALSE,
|
||||
SET_REG_TRUE,
|
||||
SET_REG_REG,
|
||||
SET_BINTEGER,
|
||||
SET_PAIR,
|
||||
DSLLV,
|
||||
NEGATE
|
||||
};
|
||||
|
||||
explicit IR2_BranchDelay(Kind kind);
|
||||
IR2_BranchDelay(Kind kind, Variable var0);
|
||||
IR2_BranchDelay(Kind kind, Variable var0, Variable var1);
|
||||
IR2_BranchDelay(Kind kind, Variable var0, Variable var1, Variable var2);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
|
||||
bool operator==(const IR2_BranchDelay& other) const;
|
||||
void get_regs(std::vector<Register>* write, std::vector<Register>* read) const;
|
||||
|
||||
private:
|
||||
std::optional<Variable> m_var[3];
|
||||
Kind m_kind;
|
||||
};
|
||||
|
||||
/*!
|
||||
* This represents a combination of a condition + a branch + the branch delay slot.
|
||||
* This is considered as a single operation.
|
||||
*/
|
||||
class BranchOp : public AtomicOp {
|
||||
public:
|
||||
BranchOp(bool likely,
|
||||
IR2_Condition condition,
|
||||
int label,
|
||||
IR2_BranchDelay branch_delay,
|
||||
int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
bool m_likely = false;
|
||||
IR2_Condition m_condition;
|
||||
int m_label = -1;
|
||||
IR2_BranchDelay m_branch_delay;
|
||||
};
|
||||
|
||||
/*!
|
||||
* A "special" op has no arguments.
|
||||
* NOP, BREAK, SUSPEND,
|
||||
*/
|
||||
class SpecialOp : public AtomicOp {
|
||||
public:
|
||||
enum class Kind {
|
||||
NOP,
|
||||
BREAK,
|
||||
SUSPEND,
|
||||
};
|
||||
|
||||
SpecialOp(Kind kind, int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
Kind m_kind;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Represents a function call.
|
||||
* This has so many special cases and exceptions that it is separate from SpecialOp.
|
||||
*/
|
||||
class CallOp : public AtomicOp {
|
||||
public:
|
||||
CallOp(int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Unfortunately the original GOAL compiler does something weird when compiling (zero? x) or (not
|
||||
* (zero? x)) when the result needs to be stored in a GOAL boolean (not in a branch condition). It
|
||||
* first does a (set! result #t), then (possibly) a bunch of code to evaluate x, then does a
|
||||
* conditional move (movn/movz). As a result, we can't recognize this as a Condition in the
|
||||
* AtomicOp pass. Instead we'll recognize it as a (set! result #t) .... (cmove result flag) where
|
||||
* flag is checked to be 0 or not. It's weird because all of the other similar cases get this
|
||||
* right.
|
||||
*
|
||||
* Note - this isn't considered a variable set. It's "conditional set" so it needs to be
|
||||
* handled separately. Unfortunately.
|
||||
*/
|
||||
class ConditionalMoveFalseOp : public AtomicOp {
|
||||
public:
|
||||
ConditionalMoveFalseOp(Variable dst, Variable src, bool on_zero, int my_idx);
|
||||
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
|
||||
bool operator==(const AtomicOp& other) const override;
|
||||
bool is_variable_set() const override;
|
||||
bool is_sequence_point() const override;
|
||||
Variable get_set_destination() const override;
|
||||
std::unique_ptr<Expr> get_set_source_as_expr() const override;
|
||||
std::unique_ptr<Expr> get_as_expr() const override;
|
||||
void update_register_info() override;
|
||||
|
||||
private:
|
||||
Variable m_dst, m_src;
|
||||
bool m_on_zero;
|
||||
};
|
||||
} // namespace decompiler
|
141
decompiler/IR2/AtomicOpBuilder.cpp
Normal file
141
decompiler/IR2/AtomicOpBuilder.cpp
Normal file
@ -0,0 +1,141 @@
|
||||
#include "AtomicOpBuilder.h"
|
||||
#include "common/log/log.h"
|
||||
#include "decompiler/Function/BasicBlocks.h"
|
||||
#include "decompiler/Function/Function.h"
|
||||
|
||||
namespace decompiler {
|
||||
|
||||
namespace {
|
||||
|
||||
Variable make_dst_var(Register reg, int idx) {
|
||||
return Variable(Variable::Mode::WRITE, reg, idx);
|
||||
}
|
||||
|
||||
Variable make_src_var(Register reg, int idx) {
|
||||
return Variable(Variable::Mode::READ, reg, idx);
|
||||
}
|
||||
|
||||
SimpleAtom make_src_atom(Register reg, int idx) {
|
||||
return SimpleAtom::make_var(make_src_var(reg, idx));
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert a single instruction in the form instr dest_reg, src_reg, src_reg
|
||||
* to an atomic op of (set! dst_reg (op src_reg src_reg))
|
||||
* Like daddu a0, a1, a2
|
||||
*/
|
||||
void make_3reg_op(const Instruction& instr,
|
||||
SimpleExpression::Kind kind,
|
||||
int idx,
|
||||
std::unique_ptr<AtomicOp>& result) {
|
||||
auto dst = make_dst_var(instr.get_dst(0).get_reg(), idx);
|
||||
auto src0 = make_src_atom(instr.get_src(0).get_reg(), idx);
|
||||
auto src1 = make_src_atom(instr.get_src(1).get_reg(), idx);
|
||||
result = std::make_unique<SetVarOp>(dst, SimpleExpression(kind, src0, src1), idx);
|
||||
}
|
||||
|
||||
bool convert_and_1(const Instruction& i0, int idx, std::unique_ptr<AtomicOp>& result) {
|
||||
// or reg, reg, reg:
|
||||
make_3reg_op(i0, SimpleExpression::Kind::AND, idx, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool convert_1(const Instruction& i0, int idx, std::unique_ptr<AtomicOp>& result) {
|
||||
switch (i0.kind) {
|
||||
case InstructionKind::AND:
|
||||
return convert_and_1(i0, idx, result);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/*!
|
||||
* Convert an entire basic block and add the results to a FunctionAtomicOps
|
||||
* @param block_id : the index of the block
|
||||
* @param begin : the start of the instructions for the block
|
||||
* @param end : the end of the instructions for the block
|
||||
* @param container : the container to add to
|
||||
*/
|
||||
void convert_block_to_atomic_ops(int begin_idx,
|
||||
std::vector<Instruction>::const_iterator begin,
|
||||
std::vector<Instruction>::const_iterator end,
|
||||
const std::vector<DecompilerLabel>& labels,
|
||||
FunctionAtomicOps* container) {
|
||||
container->block_id_to_first_atomic_op.push_back(container->ops.size());
|
||||
for (auto& instr = begin; instr < end;) {
|
||||
// how many instructions can we look at, at most?
|
||||
int n_instr = end - instr;
|
||||
// how many instructions did we use?
|
||||
int length = 0;
|
||||
// what is the index of the atomic op we would add
|
||||
int op_idx = int(container->ops.size());
|
||||
|
||||
bool converted = false;
|
||||
std::unique_ptr<AtomicOp> op;
|
||||
|
||||
if (n_instr >= 4) {
|
||||
// try 4 instructions
|
||||
}
|
||||
|
||||
if (!converted && n_instr >= 3) {
|
||||
// try 3 instructions
|
||||
}
|
||||
|
||||
if (!converted && n_instr >= 2) {
|
||||
// try 2 instructions
|
||||
}
|
||||
|
||||
if (!converted) {
|
||||
// try 1 instruction
|
||||
if (convert_1(*instr, op_idx, op)) {
|
||||
converted = true;
|
||||
length = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!converted) {
|
||||
// try assembly fallback.
|
||||
}
|
||||
|
||||
if (!converted) {
|
||||
// failed!
|
||||
lg::die("Failed to convert instruction {} to an atomic op", instr->to_string(labels));
|
||||
}
|
||||
|
||||
assert(converted && length && op);
|
||||
// add mappings:
|
||||
container->atomic_op_to_instruction[container->ops.size()] = begin_idx;
|
||||
for (int i = 0; i < length; i++) {
|
||||
container->instruction_to_basic_op[begin_idx + i] = container->ops.size();
|
||||
}
|
||||
// add
|
||||
op->update_register_info();
|
||||
container->ops.emplace_back(std::move(op));
|
||||
instr += length;
|
||||
}
|
||||
container->block_id_to_end_atomic_op.push_back(container->ops.size());
|
||||
}
|
||||
|
||||
FunctionAtomicOps convert_function_to_atomic_ops(const Function& func,
|
||||
const std::vector<DecompilerLabel>& labels) {
|
||||
FunctionAtomicOps result;
|
||||
|
||||
for (const auto& block : func.basic_blocks) {
|
||||
// we should only consider the blocks which actually have instructions:
|
||||
if (block.end_word > block.start_word) {
|
||||
auto begin = func.instructions.begin() + block.start_word;
|
||||
auto end = func.instructions.begin() + block.end_word;
|
||||
convert_block_to_atomic_ops(block.start_word, begin, end, labels, &result);
|
||||
} else {
|
||||
result.block_id_to_first_atomic_op.push_back(-1);
|
||||
result.block_id_to_end_atomic_op.push_back(-1);
|
||||
}
|
||||
}
|
||||
|
||||
assert(func.basic_blocks.size() == result.block_id_to_end_atomic_op.size());
|
||||
assert(func.basic_blocks.size() == result.block_id_to_first_atomic_op.size());
|
||||
return result;
|
||||
}
|
||||
} // namespace decompiler
|
47
decompiler/IR2/AtomicOpBuilder.h
Normal file
47
decompiler/IR2/AtomicOpBuilder.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include "AtomicOp.h"
|
||||
|
||||
namespace decompiler {
|
||||
class Function;
|
||||
struct BasicBlock;
|
||||
class LinkedObjectFile;
|
||||
|
||||
/*!
|
||||
* A collection of Atomic Ops in a function
|
||||
*/
|
||||
struct FunctionAtomicOps {
|
||||
// the actual ops, store in the correct order
|
||||
std::vector<std::unique_ptr<AtomicOp>> ops;
|
||||
|
||||
// mappings from instructions to atomic ops and back
|
||||
std::unordered_map<int, int> instruction_to_basic_op;
|
||||
std::unordered_map<int, int> atomic_op_to_instruction;
|
||||
|
||||
// map from basic block to the index of the first op
|
||||
std::vector<int> block_id_to_first_atomic_op;
|
||||
// map from basic block to the index of the last op + 1
|
||||
std::vector<int> block_id_to_end_atomic_op;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Convert an entire basic block and add the results to a FunctionAtomicOps.
|
||||
* Updates the mapping between blocks, instructions, and atomic ops as needed
|
||||
* @param begin idx : the index of the first instruction for the block
|
||||
* @param begin : the start of the instructions for the block
|
||||
* @param end : the end of the instructions for the block
|
||||
* @param labels : label names for the function, used for error prints on failed conversions
|
||||
* @param container : the container to add to
|
||||
*/
|
||||
void convert_block_to_atomic_ops(int begin_idx,
|
||||
std::vector<Instruction>::const_iterator begin,
|
||||
std::vector<Instruction>::const_iterator end,
|
||||
const std::vector<DecompilerLabel>& labels,
|
||||
FunctionAtomicOps* container);
|
||||
|
||||
/*!
|
||||
* Convert an entire function to AtomicOps
|
||||
*/
|
||||
FunctionAtomicOps convert_function_to_atomic_ops(const Function& func,
|
||||
const std::vector<DecompilerLabel>& labels);
|
||||
} // namespace decompiler
|
10
decompiler/IR2/Env.cpp
Normal file
10
decompiler/IR2/Env.cpp
Normal file
@ -0,0 +1,10 @@
|
||||
#include <stdexcept>
|
||||
#include "Env.h"
|
||||
|
||||
namespace decompiler {
|
||||
std::string Env::get_variable_name(Register reg, int atomic_idx) const {
|
||||
(void)reg;
|
||||
(void)atomic_idx;
|
||||
throw std::runtime_error("Env::get_variable_name not yet implemented.");
|
||||
}
|
||||
} // namespace decompiler
|
21
decompiler/IR2/Env.h
Normal file
21
decompiler/IR2/Env.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* An "environment" for a single function.
|
||||
* This contains data for an entire function, like which registers are live when, the types of
|
||||
* values in registers, and local variable names. This does not actually store IR itself, just
|
||||
* shared data that all IR can look at. The concept is somewhat similar to Env in the compiler.
|
||||
*/
|
||||
class Env {
|
||||
public:
|
||||
bool has_local_vars() const { return m_has_local_vars; }
|
||||
std::string get_variable_name(Register reg, int atomic_idx) const;
|
||||
|
||||
private:
|
||||
bool m_has_local_vars = false;
|
||||
};
|
||||
} // namespace decompiler
|
8
decompiler/IR2/IR2.h
Normal file
8
decompiler/IR2/IR2.h
Normal file
@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
namespace decompiler {
|
||||
class IR2 {
|
||||
public:
|
||||
private:
|
||||
};
|
||||
} // namespace decompiler
|
@ -16,6 +16,7 @@
|
||||
#include "common/log/log.h"
|
||||
#include "common/goos/PrettyPrinter.h"
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* Set the number of segments in this object file.
|
||||
* This can only be done once, and must be done before adding any words.
|
||||
@ -45,7 +46,7 @@ int LinkedObjectFile::get_label_id_for(int seg, int offset) {
|
||||
if (kv == label_per_seg_by_offset.at(seg).end()) {
|
||||
// create a new label
|
||||
int id = labels.size();
|
||||
Label label;
|
||||
DecompilerLabel label;
|
||||
label.target_segment = seg;
|
||||
label.offset = offset;
|
||||
label.name = "L" + std::to_string(id);
|
||||
@ -498,7 +499,7 @@ void LinkedObjectFile::process_fp_relative_links() {
|
||||
} break;
|
||||
|
||||
default:
|
||||
printf("unknown fp using op: %s\n", instr.to_string(*this).c_str());
|
||||
printf("unknown fp using op: %s\n", instr.to_string(labels).c_str());
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
@ -544,7 +545,7 @@ std::string LinkedObjectFile::to_asm_json(const std::string& obj_file_name) {
|
||||
}
|
||||
auto& instr = func.instructions.at(i);
|
||||
op["id"] = i;
|
||||
op["asm_op"] = instr.to_string(*this);
|
||||
op["asm_op"] = instr.to_string(labels);
|
||||
|
||||
if (func.has_basic_ops() && func.instr_starts_basic_op(i)) {
|
||||
op["basic_op"] = func.get_basic_op_at_instr(i)->print(*this);
|
||||
@ -608,7 +609,7 @@ std::string LinkedObjectFile::print_function_disassembly(Function& func,
|
||||
}
|
||||
|
||||
auto& instr = func.instructions.at(i);
|
||||
std::string line = " " + instr.to_string(*this);
|
||||
std::string line = " " + instr.to_string(labels);
|
||||
|
||||
if (write_hex) {
|
||||
if (line.length() < 60) {
|
||||
@ -1053,14 +1054,15 @@ goos::Object LinkedObjectFile::to_form_script_object(int seg,
|
||||
return result;
|
||||
}
|
||||
|
||||
u32 LinkedObjectFile::read_data_word(const Label& label) {
|
||||
u32 LinkedObjectFile::read_data_word(const DecompilerLabel& label) {
|
||||
assert(0 == (label.offset % 4));
|
||||
auto& word = words_by_seg.at(label.target_segment).at(label.offset / 4);
|
||||
assert(word.kind == LinkedWord::Kind::PLAIN_DATA);
|
||||
return word.data;
|
||||
}
|
||||
|
||||
std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) const {
|
||||
std::string LinkedObjectFile::get_goal_string_by_label(const DecompilerLabel& label) const {
|
||||
assert(0 == (label.offset % 4));
|
||||
return get_goal_string(label.target_segment, (label.offset / 4) - 1, false);
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -14,19 +14,11 @@
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include "LinkedWord.h"
|
||||
#include "decompiler/Disasm/DecompilerLabel.h"
|
||||
#include "decompiler/Function/Function.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
/*!
|
||||
* A label to a location in this object file.
|
||||
* Doesn't have to be word aligned.
|
||||
*/
|
||||
struct Label {
|
||||
std::string name;
|
||||
int target_segment;
|
||||
int offset; // in bytes
|
||||
};
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* An object file's data with linking information included.
|
||||
*/
|
||||
@ -69,8 +61,8 @@ class LinkedObjectFile {
|
||||
const std::string& extra_name);
|
||||
std::string print_asm_function_disassembly(const std::string& my_name);
|
||||
|
||||
u32 read_data_word(const Label& label);
|
||||
std::string get_goal_string_by_label(const Label& label) const;
|
||||
u32 read_data_word(const DecompilerLabel& label);
|
||||
std::string get_goal_string_by_label(const DecompilerLabel& label) const;
|
||||
|
||||
struct Stats {
|
||||
uint32_t total_code_bytes = 0;
|
||||
@ -131,7 +123,7 @@ class LinkedObjectFile {
|
||||
std::vector<std::vector<LinkedWord>> words_by_seg;
|
||||
std::vector<uint32_t> offset_of_data_zone_by_seg;
|
||||
std::vector<std::vector<Function>> functions_by_seg;
|
||||
std::vector<Label> labels;
|
||||
std::vector<DecompilerLabel> labels;
|
||||
|
||||
private:
|
||||
goos::Object to_form_script(int seg, int word_idx, std::vector<bool>& seen);
|
||||
@ -142,5 +134,6 @@ class LinkedObjectFile {
|
||||
|
||||
std::vector<std::unordered_map<int, int>> label_per_seg_by_offset;
|
||||
};
|
||||
} // namespace decompiler
|
||||
|
||||
#endif // NEXT_LINKEDOBJECTFILE_H
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "decompiler/util/DecompilerTypeSystem.h"
|
||||
#include "common/link_types.h"
|
||||
|
||||
namespace decompiler {
|
||||
// There are three link versions:
|
||||
// V2 - not really in use anymore, but V4 will resue logic from it (and the game didn't rename the
|
||||
// functions) V3 - optimized for code and small stuff. Supports segments (main, debug, top-level) V4
|
||||
@ -819,3 +820,4 @@ LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data,
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace decompiler
|
@ -11,9 +11,11 @@
|
||||
|
||||
#include "LinkedObjectFile.h"
|
||||
|
||||
namespace decompiler {
|
||||
class DecompilerTypeSystem;
|
||||
LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data,
|
||||
const std::string& name,
|
||||
DecompilerTypeSystem& dts);
|
||||
} // namespace decompiler
|
||||
|
||||
#endif // NEXT_LINKEDOBJECTFILECREATION_H
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace decompiler {
|
||||
class LinkedWord {
|
||||
public:
|
||||
explicit LinkedWord(uint32_t _data) : data(_data) {}
|
||||
@ -31,5 +32,6 @@ class LinkedWord {
|
||||
int label_id = -1;
|
||||
std::string symbol_name;
|
||||
};
|
||||
} // namespace decompiler
|
||||
|
||||
#endif // JAK2_DISASSEMBLER_LINKEDWORD_H
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "common/log/log.h"
|
||||
#include "third-party/json.hpp"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
std::string strip_dgo_extension(const std::string& x) {
|
||||
auto ext = x.substr(x.length() - 4, 4);
|
||||
@ -715,7 +716,7 @@ void ObjectFileDB::process_tpages() {
|
||||
100.f * float(success) / float(total), timer.getMs());
|
||||
}
|
||||
|
||||
std::string ObjectFileDB::process_game_text() {
|
||||
std::string ObjectFileDB::process_game_text_files() {
|
||||
lg::info("- Finding game text...");
|
||||
std::string text_string = "COMMON";
|
||||
Timer timer;
|
||||
@ -727,7 +728,7 @@ std::string ObjectFileDB::process_game_text() {
|
||||
for_each_obj([&](ObjectFileData& data) {
|
||||
if (data.name_in_dgo.substr(1) == text_string) {
|
||||
file_count++;
|
||||
auto statistics = ::process_game_text(data);
|
||||
auto statistics = process_game_text(data);
|
||||
string_count += statistics.total_text;
|
||||
char_count += statistics.total_chars;
|
||||
if (text_by_language_by_id.find(statistics.language) != text_by_language_by_id.end()) {
|
||||
@ -743,7 +744,7 @@ std::string ObjectFileDB::process_game_text() {
|
||||
return write_game_text(text_by_language_by_id);
|
||||
}
|
||||
|
||||
std::string ObjectFileDB::process_game_count() {
|
||||
std::string ObjectFileDB::process_game_count_file() {
|
||||
lg::info("- Finding game count file...");
|
||||
bool found = false;
|
||||
std::string result;
|
||||
@ -752,7 +753,7 @@ std::string ObjectFileDB::process_game_count() {
|
||||
if (data.name_in_dgo == "game-cnt") {
|
||||
assert(!found);
|
||||
found = true;
|
||||
result = write_game_count(::process_game_count(data));
|
||||
result = write_game_count(process_game_count(data));
|
||||
}
|
||||
});
|
||||
|
||||
@ -1125,4 +1126,5 @@ void ObjectFileDB::dump_raw_objects(const std::string& output_dir) {
|
||||
auto dest = output_dir + "/" + data.to_unique_name();
|
||||
file_util::write_binary_file(dest, data.data.data(), data.data.size());
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -18,6 +18,7 @@
|
||||
#include "decompiler/util/DecompilerTypeSystem.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* A "record" which can be used to identify an object file.
|
||||
*/
|
||||
@ -67,8 +68,8 @@ class ObjectFileDB {
|
||||
void analyze_functions();
|
||||
void process_tpages();
|
||||
void analyze_expressions();
|
||||
std::string process_game_count();
|
||||
std::string process_game_text();
|
||||
std::string process_game_count_file();
|
||||
std::string process_game_text_files();
|
||||
|
||||
ObjectFileData& lookup_record(const ObjectFileRecord& rec);
|
||||
DecompilerTypeSystem dts;
|
||||
@ -148,5 +149,6 @@ class ObjectFileDB {
|
||||
uint32_t unique_obj_bytes = 0;
|
||||
} stats;
|
||||
};
|
||||
} // namespace decompiler
|
||||
|
||||
#endif // JAK2_DISASSEMBLER_OBJECTFILEDB_H
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "third-party/json.hpp"
|
||||
#include "common/util/FileUtil.h"
|
||||
|
||||
namespace decompiler {
|
||||
Config gConfig;
|
||||
|
||||
Config& get_config() {
|
||||
@ -101,3 +102,4 @@ void set_config(const std::string& path_to_config_file) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -9,6 +9,7 @@
|
||||
#include <unordered_map>
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
|
||||
namespace decompiler {
|
||||
struct TypeHint {
|
||||
Register reg;
|
||||
std::string type_name;
|
||||
@ -46,5 +47,6 @@ struct Config {
|
||||
|
||||
Config& get_config();
|
||||
void set_config(const std::string& path_to_config_file);
|
||||
} // namespace decompiler
|
||||
|
||||
#endif // JAK2_DISASSEMBLER_CONFIG_H
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "common/common_types.h"
|
||||
#include "decompiler/ObjectFile/LinkedWord.h"
|
||||
|
||||
namespace decompiler {
|
||||
class LinkedWordReader {
|
||||
public:
|
||||
explicit LinkedWordReader(const std::vector<LinkedWord>* words) : m_words(words) {}
|
||||
@ -37,4 +38,5 @@ class LinkedWordReader {
|
||||
private:
|
||||
const std::vector<LinkedWord>* m_words = nullptr;
|
||||
u32 m_offset = 0;
|
||||
};
|
||||
};
|
||||
} // namespace decompiler
|
@ -10,6 +10,7 @@
|
||||
#include "game/common/str_rpc_types.h"
|
||||
#include "StrFileReader.h"
|
||||
|
||||
namespace decompiler {
|
||||
StrFileReader::StrFileReader(const std::string& file_path) {
|
||||
auto data = file_util::read_binary_file(file_path);
|
||||
assert(data.size() >= SECTOR_SIZE); // must have at least the header sector
|
||||
@ -178,4 +179,5 @@ std::string StrFileReader::get_full_name(const std::string& short_name) const {
|
||||
assert(strcmp(iso_name_1, iso_name_2) == 0);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -9,6 +9,7 @@
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace decompiler {
|
||||
class StrFileReader {
|
||||
public:
|
||||
explicit StrFileReader(const std::string& file_path);
|
||||
@ -19,3 +20,4 @@ class StrFileReader {
|
||||
private:
|
||||
std::vector<std::vector<u8>> m_chunks;
|
||||
};
|
||||
} // namespace decompiler
|
@ -3,6 +3,7 @@
|
||||
#include "game_count.h"
|
||||
#include "LinkedWordReader.h"
|
||||
|
||||
namespace decompiler {
|
||||
GameCountResult process_game_count(ObjectFileData& data) {
|
||||
GameCountResult result;
|
||||
auto& words = data.linked_data.words_by_seg.at(0);
|
||||
@ -37,4 +38,5 @@ std::string write_game_count(const GameCountResult& result) {
|
||||
str += fmt::format("(:unknown-1 {} :unknown-2 {})\n", result.mystery_data[0],
|
||||
result.mystery_data[1]);
|
||||
return str;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace decompiler {
|
||||
struct GameCountResult {
|
||||
struct CountInfo {
|
||||
s32 money_count;
|
||||
@ -15,4 +16,5 @@ struct GameCountResult {
|
||||
|
||||
struct ObjectFileData;
|
||||
GameCountResult process_game_count(ObjectFileData& data);
|
||||
std::string write_game_count(const GameCountResult& result);
|
||||
std::string write_game_count(const GameCountResult& result);
|
||||
} // namespace decompiler
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "decompiler/ObjectFile/ObjectFileDB.h"
|
||||
#include "common/goos/Reader.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
template <typename T>
|
||||
T get_word(const LinkedWord& word) {
|
||||
@ -17,7 +18,7 @@ T get_word(const LinkedWord& word) {
|
||||
return result;
|
||||
}
|
||||
|
||||
Label get_label(ObjectFileData& data, const LinkedWord& word) {
|
||||
DecompilerLabel get_label(ObjectFileData& data, const LinkedWord& word) {
|
||||
assert(word.kind == LinkedWord::PTR);
|
||||
return data.linked_data.labels.at(word.label_id);
|
||||
}
|
||||
@ -159,4 +160,5 @@ std::string write_game_text(
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace decompiler {
|
||||
struct ObjectFileData;
|
||||
|
||||
struct GameTextResult {
|
||||
@ -13,4 +14,5 @@ struct GameTextResult {
|
||||
|
||||
GameTextResult process_game_text(ObjectFileData& data);
|
||||
std::string write_game_text(
|
||||
const std::unordered_map<int, std::unordered_map<int, std::string>>& data);
|
||||
const std::unordered_map<int, std::unordered_map<int, std::string>>& data);
|
||||
} // namespace decompiler
|
@ -20,6 +20,7 @@
|
||||
#include "decompiler/ObjectFile/ObjectFileDB.h"
|
||||
#include "third-party/fmt/core.h"
|
||||
|
||||
namespace decompiler {
|
||||
namespace {
|
||||
|
||||
/*!
|
||||
@ -314,7 +315,7 @@ struct Texture {
|
||||
u32 packed_info_words[9];
|
||||
};
|
||||
|
||||
Label name_label;
|
||||
DecompilerLabel name_label;
|
||||
std::string name;
|
||||
u32 size;
|
||||
float uv_dist;
|
||||
@ -342,7 +343,7 @@ struct Texture {
|
||||
* Unclear what the segments really are, maybe you could split up big tpages if needed?
|
||||
*/
|
||||
struct TexturePageSegment {
|
||||
Label block_data_label;
|
||||
DecompilerLabel block_data_label;
|
||||
u32 size = 0xffffffff;
|
||||
u32 dest = 0xffffffff;
|
||||
std::string print_debug() const {
|
||||
@ -379,10 +380,10 @@ struct FileInfo {
|
||||
* GOAL texture-page type.
|
||||
*/
|
||||
struct TexturePage {
|
||||
Label info_label;
|
||||
DecompilerLabel info_label;
|
||||
FileInfo info;
|
||||
|
||||
Label name_label;
|
||||
DecompilerLabel name_label;
|
||||
std::string name;
|
||||
|
||||
u32 id = 0xffffffff;
|
||||
@ -392,7 +393,7 @@ struct TexturePage {
|
||||
TexturePageSegment segments[3];
|
||||
u32 pad[16] = {};
|
||||
// data...
|
||||
std::vector<Label> data;
|
||||
std::vector<DecompilerLabel> data;
|
||||
std::vector<Texture> textures;
|
||||
|
||||
std::string print_debug() const {
|
||||
@ -423,7 +424,7 @@ struct TexturePage {
|
||||
* Convert a label to the offset (words) in the object segment.
|
||||
* If basic is set, gives you a pointer to the beginning of the memory, if the thing is a basic.
|
||||
*/
|
||||
int label_to_word_offset(Label l, bool basic) {
|
||||
int label_to_word_offset(DecompilerLabel l, bool basic) {
|
||||
assert((l.offset & 3) == 0);
|
||||
int result = l.offset / 4;
|
||||
if (basic) {
|
||||
@ -441,7 +442,7 @@ bool is_type_tag(const LinkedWord& word, const std::string& type) {
|
||||
return word.kind == LinkedWord::TYPE_PTR && word.symbol_name == type;
|
||||
}
|
||||
|
||||
Label get_label(ObjectFileData& data, const LinkedWord& word) {
|
||||
DecompilerLabel get_label(ObjectFileData& data, const LinkedWord& word) {
|
||||
assert(word.kind == LinkedWord::PTR);
|
||||
return data.linked_data.labels.at(word.label_id);
|
||||
}
|
||||
@ -905,3 +906,4 @@ TPageResultStats process_tpage(ObjectFileData& data) {
|
||||
}
|
||||
return stats;
|
||||
}
|
||||
} // namespace decompiler
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
namespace decompiler {
|
||||
struct ObjectFileData;
|
||||
|
||||
struct TPageResultStats {
|
||||
@ -7,4 +8,5 @@ struct TPageResultStats {
|
||||
int successful_textures = 0;
|
||||
};
|
||||
|
||||
TPageResultStats process_tpage(ObjectFileData& data);
|
||||
TPageResultStats process_tpage(ObjectFileData& data);
|
||||
} // namespace decompiler
|
@ -7,6 +7,7 @@
|
||||
#include "common/util/FileUtil.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
using namespace decompiler;
|
||||
lg::set_file(file_util::get_file_path({"log/decompiler.txt"}));
|
||||
lg::set_file_level(lg::level::info);
|
||||
lg::set_stdout_level(lg::level::info);
|
||||
@ -67,7 +68,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
if (get_config().process_game_text) {
|
||||
auto result = db.process_game_text();
|
||||
auto result = db.process_game_text_files();
|
||||
file_util::write_text_file(file_util::get_file_path({"assets", "game_text.txt"}), result);
|
||||
}
|
||||
|
||||
@ -76,7 +77,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
if (get_config().process_game_count) {
|
||||
auto result = db.process_game_count();
|
||||
auto result = db.process_game_count_file();
|
||||
file_util::write_text_file(file_util::get_file_path({"assets", "game_count.txt"}), result);
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "common/log/log.h"
|
||||
#include "TP_Type.h"
|
||||
|
||||
namespace decompiler {
|
||||
DecompilerTypeSystem::DecompilerTypeSystem() {
|
||||
ts.add_builtin_types();
|
||||
}
|
||||
@ -322,4 +323,5 @@ int DecompilerTypeSystem::get_format_arg_count(const TP_Type& type) {
|
||||
} else {
|
||||
return type.get_format_string_arg_count();
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -5,7 +5,8 @@
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
#include "common/goos/Reader.h"
|
||||
|
||||
struct TP_Type;
|
||||
namespace decompiler {
|
||||
class TP_Type;
|
||||
struct TypeState;
|
||||
|
||||
class DecompilerTypeSystem {
|
||||
@ -54,5 +55,6 @@ class DecompilerTypeSystem {
|
||||
private:
|
||||
goos::Reader m_reader;
|
||||
};
|
||||
} // namespace decompiler
|
||||
|
||||
#endif // JAK_DECOMPILERTYPESYSTEM_H
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
namespace decompiler {
|
||||
template <typename T>
|
||||
struct MatchParam {
|
||||
MatchParam() { is_wildcard = true; }
|
||||
@ -15,4 +16,5 @@ struct MatchParam {
|
||||
|
||||
bool operator==(const T& other) const { return is_wildcard || (value == other); }
|
||||
bool operator!=(const T& other) const { return !(*this == other); }
|
||||
};
|
||||
};
|
||||
} // namespace decompiler
|
@ -1,6 +1,7 @@
|
||||
#include "TP_Type.h"
|
||||
#include "third-party/fmt/core.h"
|
||||
|
||||
namespace decompiler {
|
||||
std::string TypeState::print_gpr_masked(u32 mask) const {
|
||||
std::string result;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
@ -117,3 +118,4 @@ TypeSpec TP_Type::typespec() const {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
@ -5,83 +5,7 @@
|
||||
#include "common/common_types.h"
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
|
||||
// struct TP_Type {
|
||||
// enum Kind {
|
||||
// OBJECT_OF_TYPE,
|
||||
// TYPE_OBJECT,
|
||||
// FALSE,
|
||||
// NONE,
|
||||
// PRODUCT,
|
||||
// OBJ_PLUS_PRODUCT,
|
||||
// PARTIAL_METHOD_TABLE_ACCESS, // type + method_number * 4
|
||||
// METHOD_NEW_OF_OBJECT,
|
||||
// STRING
|
||||
// } kind = NONE;
|
||||
// // in the case that we are type_object, just store the type name in a single arg ts.
|
||||
// TypeSpec ts;
|
||||
// int multiplier;
|
||||
// std::string str_data;
|
||||
//
|
||||
// TP_Type() = default;
|
||||
// explicit TP_Type(const TypeSpec& _ts) {
|
||||
// kind = OBJECT_OF_TYPE;
|
||||
// ts = _ts;
|
||||
// }
|
||||
//
|
||||
// TP_Type simplify() const;
|
||||
// std::string print() const;
|
||||
//
|
||||
// bool is_object_of_type() const { return kind == TYPE_OBJECT || ts == TypeSpec("type"); }
|
||||
//
|
||||
// TypeSpec as_typespec() const {
|
||||
// switch (kind) {
|
||||
// case OBJECT_OF_TYPE:
|
||||
// return ts;
|
||||
// case TYPE_OBJECT:
|
||||
// return TypeSpec("type");
|
||||
// case FALSE:
|
||||
// return TypeSpec("symbol");
|
||||
// case NONE:
|
||||
// return TypeSpec("none");
|
||||
// case PRODUCT:
|
||||
// case METHOD_NEW_OF_OBJECT:
|
||||
// return ts;
|
||||
// default:
|
||||
// assert(false);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// static TP_Type make_partial_method_table_access(TypeSpec ts) {
|
||||
// TP_Type result;
|
||||
// result.kind = PARTIAL_METHOD_TABLE_ACCESS;
|
||||
// result.ts = std::move(ts);
|
||||
// return result;
|
||||
// }
|
||||
//
|
||||
// static TP_Type make_type_object(const std::string& name) {
|
||||
// TP_Type result;
|
||||
// result.kind = TYPE_OBJECT;
|
||||
// result.ts = TypeSpec(name);
|
||||
// return result;
|
||||
// }
|
||||
//
|
||||
// static TP_Type make_string_object(const std::string& str) {
|
||||
// TP_Type result;
|
||||
// result.kind = STRING;
|
||||
// result.ts = TypeSpec("string");
|
||||
// result.str_data = str;
|
||||
// return result;
|
||||
// }
|
||||
//
|
||||
// static TP_Type make_none() {
|
||||
// TP_Type result;
|
||||
// result.kind = NONE;
|
||||
// return result;
|
||||
// }
|
||||
//
|
||||
// bool operator==(const TP_Type& other) const;
|
||||
//};
|
||||
|
||||
namespace decompiler {
|
||||
/*!
|
||||
* A TP_Type is a specialized typespec used in the type propagation algorithm.
|
||||
* It is basically a normal typespec plus some optional information.
|
||||
@ -267,4 +191,5 @@ struct TypeState {
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
} // namespace decompiler
|
@ -18,12 +18,14 @@ add_executable(goalc-test
|
||||
test_pretty_print.cpp
|
||||
test_zydis.cpp
|
||||
goalc/test_goal_kernel.cpp
|
||||
decompiler/test_AtomicOpBuilder.cpp
|
||||
decompiler/test_InstructionParser.cpp
|
||||
${GOALC_TEST_FRAMEWORK_SOURCES}
|
||||
${GOALC_TEST_CASES})
|
||||
|
||||
enable_testing()
|
||||
|
||||
target_link_libraries(goalc-test common runtime compiler gtest Zydis)
|
||||
target_link_libraries(goalc-test common runtime compiler gtest decomp Zydis)
|
||||
|
||||
IF (WIN32)
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
@ -36,5 +38,5 @@ if(UNIX AND CMAKE_COMPILER_IS_GNUCXX AND CODE_COVERAGE)
|
||||
setup_target_for_coverage_lcov(NAME goalc-test_coverage
|
||||
EXECUTABLE goalc-test --gtest_color=yes
|
||||
DEPENDENCIES goalc-test
|
||||
EXCLUDE "third-party/*" "/usr/include/*" "decompiler/*")
|
||||
EXCLUDE "third-party/*" "/usr/include/*")
|
||||
endif()
|
||||
|
57
test/decompiler/test_AtomicOpBuilder.cpp
Normal file
57
test/decompiler/test_AtomicOpBuilder.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "decompiler/IR2/AtomicOp.h"
|
||||
#include "decompiler/IR2/AtomicOpBuilder.h"
|
||||
#include "decompiler/Disasm/InstructionParser.h"
|
||||
|
||||
using namespace decompiler;
|
||||
TEST(DecompilerAtomicOpBuilder, Example) {
|
||||
InstructionParser parser;
|
||||
|
||||
// some MIPS instructions. Can be a sequence of instructions, possibly with labels.
|
||||
std::string input_program =
|
||||
"and v0, v1, a3\n"
|
||||
"and a1, a2, a2";
|
||||
|
||||
// convert to Instructions:
|
||||
ParsedProgram prg = parser.parse_program(input_program);
|
||||
|
||||
// this verifies we can convert from a string to an instruction, and back to a string again.
|
||||
// the instruction printer will add two leading spaces and a newline.
|
||||
EXPECT_EQ(prg.print(), " and v0, v1, a3\n and a1, a2, a2\n");
|
||||
|
||||
// next, set up a test environment for the conversion. The FunctionAtomicOps will hold
|
||||
// the result of the conversion
|
||||
FunctionAtomicOps container;
|
||||
|
||||
// treat the entire program as a single basic block, and convert!
|
||||
convert_block_to_atomic_ops(0, prg.instructions.begin(), prg.instructions.end(), prg.labels,
|
||||
&container);
|
||||
|
||||
// we should get back a single and operation:
|
||||
EXPECT_EQ(2, container.ops.size());
|
||||
|
||||
// for now, we create an empty environment. The environment will be used in the future to
|
||||
// rename register to variables, but for now, we just leave it empty and the printing will
|
||||
// use register names
|
||||
Env env;
|
||||
|
||||
// check the we get the right result:
|
||||
EXPECT_EQ(container.ops.at(0)->to_string(prg.labels, &env), "(set! v0 (logand v1 a3))");
|
||||
EXPECT_EQ(container.ops.at(1)->to_string(prg.labels, &env), "(set! a1 (logand a2 a2))");
|
||||
|
||||
// check that the registers read/written are identified for the first op (and v0, v1, a3)
|
||||
auto& first_op = container.ops.at(0);
|
||||
|
||||
// two registers read (v1 and a3)
|
||||
EXPECT_EQ(first_op->read_regs().size(), 2);
|
||||
// one register written (v0)
|
||||
EXPECT_EQ(first_op->write_regs().size(), 1);
|
||||
// no clobber registers (register which ends up with a garbage value in it)
|
||||
EXPECT_EQ(first_op->clobber_regs().size(), 0);
|
||||
|
||||
// the ordering of the two read registers doesn't matter. It happens to be in the same order
|
||||
// as the opcode here, but it may not always be the case.
|
||||
EXPECT_EQ(first_op->read_regs().at(0).to_string(), "v1");
|
||||
EXPECT_EQ(first_op->read_regs().at(1).to_string(), "a3");
|
||||
EXPECT_EQ(first_op->write_regs().at(0).to_string(), "v0");
|
||||
}
|
42
test/decompiler/test_InstructionParser.cpp
Normal file
42
test/decompiler/test_InstructionParser.cpp
Normal file
@ -0,0 +1,42 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "decompiler/Disasm/InstructionParser.h"
|
||||
#include "decompiler/Disasm/DecompilerLabel.h"
|
||||
|
||||
using namespace decompiler;
|
||||
|
||||
TEST(DecompilerInstructionParser, SimpleTest) {
|
||||
InstructionParser parser;
|
||||
std::vector<std::string> ops = {"daddu a0, a1, a2", "addu r0, t7, s6", "daddiu r0, at, #t",
|
||||
"addiu t2, t3, 12", "slti v1, a3, -23", "sltiu s3, s4, 3",
|
||||
"sb v1, 12(a1)", "sh s7, sym(s6)", "sd s2, -12(s2)",
|
||||
"lw s3, 12(s7)", "lwu t2, sym(s7)", "add.s f0, f1, f2",
|
||||
"beq r0, r0, L312"};
|
||||
|
||||
std::vector<DecompilerLabel> labels;
|
||||
labels.push_back(DecompilerLabel{"L311", 1, 2});
|
||||
labels.push_back(DecompilerLabel{"L312", 1, 2});
|
||||
labels.push_back(DecompilerLabel{"L313", 1, 2});
|
||||
for (auto& op : ops) {
|
||||
auto instr = parser.parse_single_instruction(op, labels);
|
||||
EXPECT_EQ(op, instr.to_string(labels));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DecompilerInstructionParser, ProgramNoLabels) {
|
||||
InstructionParser parser;
|
||||
std::string program = " daddu a0, a1, a2\n sh s7, sym(s6)\n sb v1, 12(a1)\n";
|
||||
auto result = parser.parse_program(program);
|
||||
EXPECT_EQ(result.print(), program);
|
||||
}
|
||||
|
||||
TEST(DecompilerInstructionParser, ProgramLabels) {
|
||||
InstructionParser parser;
|
||||
std::string program =
|
||||
"L100:\n"
|
||||
" daddu v0, v1, v0\n"
|
||||
" beq at, r0, L102\n"
|
||||
"L102:\n"
|
||||
" jr ra\n";
|
||||
auto result = parser.parse_program(program);
|
||||
EXPECT_EQ(result.print(), program);
|
||||
}
|
2
third-party/fmt/CMakeLists.txt
vendored
2
third-party/fmt/CMakeLists.txt
vendored
@ -6,4 +6,4 @@ endif (UNIX)
|
||||
|
||||
include_directories(../)
|
||||
add_library(fmt SHARED format.cc)
|
||||
target_compile_definitions(fmt PRIVATE FMT_EXPORT INTERFACE FMT_SHARED)
|
||||
target_compile_definitions(fmt PRIVATE FMT_EXPORT INTERFACE FMT_SHARED PUBLIC FMT_SHARED)
|
1
third-party/minilzo/CMakeLists.txt
vendored
1
third-party/minilzo/CMakeLists.txt
vendored
@ -1,2 +1,3 @@
|
||||
add_library(minilzo
|
||||
SHARED
|
||||
minilzo.c)
|
Loading…
Reference in New Issue
Block a user