[Decompiler - New IR] Add AtomicOp (#181)

* wip decompiler ir

* add AtomicOp stuff

* fix windows build and warnings

* add instruction parser

* include

* make minilzo shared

* odr fix

* a

* fix merge conflicts

* move decompiler into namespace

* update the code coverage to include the decompiler

* add demo test

* add register use test to example test
This commit is contained in:
water111 2021-01-06 20:04:15 -05:00 committed by GitHub
parent 3331e9cd00
commit 5093b97cda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
71 changed files with 2676 additions and 210 deletions

View File

@ -1,35 +1,59 @@
add_executable(decompiler
main.cpp
ObjectFile/ObjectFileDB.cpp
Disasm/Instruction.cpp
Disasm/InstructionDecode.cpp
Disasm/OpcodeInfo.cpp
Disasm/Register.cpp
ObjectFile/LinkedObjectFileCreation.cpp
ObjectFile/LinkedObjectFile.cpp
Function/Function.cpp
config.cpp
util/DecompilerTypeSystem.cpp
Function/BasicBlocks.cpp
Disasm/InstructionMatching.cpp
Function/CfgVtx.cpp
IR/BasicOpBuilder.cpp
IR/CfgBuilder.cpp
IR/IR.cpp
Function/TypeInspector.cpp
data/tpage.cpp
add_library(
decomp
SHARED
data/game_count.cpp
data/game_text.cpp
data/StrFileReader.cpp
data/game_count.cpp
Function/TypeAnalysis.cpp
IR/IR_TypeAnalysis.cpp
util/TP_Type.cpp
Function/RegUsage.cpp
data/tpage.cpp
Disasm/Instruction.cpp
Disasm/InstructionDecode.cpp
Disasm/InstructionMatching.cpp
Disasm/InstructionParser.cpp
Disasm/OpcodeInfo.cpp
Disasm/Register.cpp
Function/BasicBlocks.cpp
Function/CfgVtx.cpp
Function/ExpressionBuilder.cpp
Function/ExpressionStack.cpp
IR/IR_ExpressionStack.cpp)
Function/Function.cpp
Function/RegUsage.cpp
Function/TypeAnalysis.cpp
Function/TypeInspector.cpp
IR/BasicOpBuilder.cpp
IR/CfgBuilder.cpp
IR/IR.cpp
IR/IR_ExpressionStack.cpp
IR/IR_TypeAnalysis.cpp
IR2/AtomicOp.cpp
IR2/AtomicOpBuilder.cpp
IR2/Env.cpp
ObjectFile/LinkedObjectFile.cpp
ObjectFile/LinkedObjectFileCreation.cpp
ObjectFile/ObjectFileDB.cpp
util/DecompilerTypeSystem.cpp
util/TP_Type.cpp
config.cpp
)
target_link_libraries(decomp
minilzo
common
fmt
)
add_executable(decompiler
main.cpp
)
target_link_libraries(decompiler
common
decomp
common
minilzo
fmt)

View File

@ -0,0 +1,15 @@
#pragma once
#include <string>
namespace decompiler {
/*!
* A label to a location in an object file.
* Doesn't have to be word aligned.
*/
struct DecompilerLabel {
std::string name;
int target_segment;
int offset; // in bytes
};
} // namespace decompiler

View File

@ -8,17 +8,18 @@
#include "decompiler/ObjectFile/LinkedObjectFile.h"
#include <cassert>
namespace decompiler {
/*!
* Convert atom to a string for disassembly.
*/
std::string InstructionAtom::to_string(const LinkedObjectFile& file) const {
std::string InstructionAtom::to_string(const std::vector<DecompilerLabel>& labels) const {
switch (kind) {
case REGISTER:
return reg.to_string();
case IMM:
return std::to_string(imm);
case LABEL:
return file.get_label_name(label_id);
return labels.at(label_id).name;
case VU_ACC:
return "acc";
case VU_Q:
@ -115,6 +116,25 @@ bool InstructionAtom::is_link_or_label() const {
return kind == IMM_SYM || kind == LABEL;
}
bool InstructionAtom::operator==(const InstructionAtom& other) const {
if (kind != other.kind) {
return false;
}
switch (kind) {
case REGISTER:
return reg == other.reg;
case IMM:
return imm == other.imm;
case LABEL:
return label_id == other.label_id;
case VU_ACC:
case VU_Q:
return true;
default:
assert(false);
}
}
/*!
* Convert just the name of the opcode to a string, omitting src/dst, but including
* suffixes (interlock, broadcasts and destination)
@ -169,7 +189,7 @@ std::string Instruction::op_name_to_string() const {
/*!
* Convert entire instruction to a string.
*/
std::string Instruction::to_string(const LinkedObjectFile& file) const {
std::string Instruction::to_string(const std::vector<DecompilerLabel>& labels) const {
auto& info = gOpcodeInfo[(int)kind];
auto result = op_name_to_string();
@ -178,33 +198,33 @@ std::string Instruction::to_string(const LinkedObjectFile& file) const {
assert(n_dst == 0);
assert(n_src == 3);
result += " ";
result += src[0].to_string(file);
result += src[0].to_string(labels);
result += ", ";
result += src[1].to_string(file);
result += src[1].to_string(labels);
result += "(";
result += src[2].to_string(file);
result += src[2].to_string(labels);
result += ")";
} else if (info.is_load) {
assert(n_dst == 1);
assert(n_src == 2);
result += " ";
result += dst[0].to_string(file);
result += dst[0].to_string(labels);
result += ", ";
result += src[0].to_string(file);
result += src[0].to_string(labels);
result += "(";
result += src[1].to_string(file);
result += src[1].to_string(labels);
result += ")";
} else {
// for instructions that aren't a store or load, the dest/sources are comma separated.
bool end_comma = false;
for (uint8_t i = 0; i < n_dst; i++) {
result += " " + dst[i].to_string(file) + ",";
result += " " + dst[i].to_string(labels) + ",";
end_comma = true;
}
for (uint8_t i = 0; i < n_src; i++) {
result += " " + src[i].to_string(file) + ",";
result += " " + src[i].to_string(labels) + ",";
end_comma = true;
}
@ -312,3 +332,25 @@ int Instruction::get_label_target() const {
}
return result;
}
bool Instruction::operator==(const Instruction& other) const {
if (kind != other.kind || n_src != other.n_src || n_dst != other.n_dst ||
cop2_dest != other.cop2_dest || cop2_bc != other.cop2_bc || il != other.il) {
return false;
}
for (int i = 0; i < n_dst; i++) {
if (dst[i] != other.dst[i]) {
return false;
}
}
for (int i = 0; i < n_src; i++) {
if (src[i] != other.src[i]) {
return false;
}
}
return true;
}
} // namespace decompiler

View File

@ -9,10 +9,12 @@
#ifndef NEXT_INSTRUCTION_H
#define NEXT_INSTRUCTION_H
#include <vector>
#include "OpcodeInfo.h"
#include "Register.h"
class LinkedObjectFile;
namespace decompiler {
struct DecompilerLabel;
constexpr int MAX_INSTRUCTION_SOURCE = 3;
constexpr int MAX_INTRUCTION_DEST = 1;
@ -41,7 +43,7 @@ struct InstructionAtom {
int get_label() const;
std::string get_sym() const;
std::string to_string(const LinkedObjectFile& file) const;
std::string to_string(const std::vector<DecompilerLabel>& labels) const;
bool is_link_or_label() const;
bool is_reg() const { return kind == REGISTER; }
@ -51,11 +53,13 @@ struct InstructionAtom {
bool is_reg(Register r) const { return kind == REGISTER && reg == r; }
bool operator==(const InstructionAtom& other) const;
bool operator!=(const InstructionAtom& other) const { return !((*this) == other); }
private:
int32_t imm;
int label_id;
Register reg;
std::string sym;
};
@ -66,7 +70,7 @@ class Instruction {
InstructionKind kind = InstructionKind::UNKNOWN;
std::string op_name_to_string() const;
std::string to_string(const LinkedObjectFile& file) const;
std::string to_string(const std::vector<DecompilerLabel>& labels) const;
bool is_valid() const;
void add_src(InstructionAtom& a);
@ -89,10 +93,13 @@ class Instruction {
int get_label_target() const;
bool operator==(const Instruction& other) const;
bool operator!=(const Instruction& other) const { return !((*this) == other); }
// extra fields for some COP2 instructions.
uint8_t cop2_dest = 0xff; // 0xff indicates "don't print dest"
uint8_t cop2_bc = 0xff; // 0xff indicates "don't print bc"
uint8_t il = 0xff; // 0xff indicates "don't print il"
};
} // namespace decompiler
#endif // NEXT_INSTRUCTION_H

View File

@ -8,6 +8,7 @@
#include <cassert>
#include "decompiler/ObjectFile/LinkedObjectFile.h"
namespace decompiler {
// utility class to extract fields of an opcode.
struct OpcodeFields {
OpcodeFields(uint32_t _data) : data(_data) {}
@ -1171,3 +1172,4 @@ Instruction decode_instruction(LinkedWord& word, LinkedObjectFile& file, int seg
return i;
}
} // namespace decompiler

View File

@ -11,9 +11,10 @@
#include "Instruction.h"
namespace decompiler {
class LinkedWord;
class LinkedObjectFile;
Instruction decode_instruction(LinkedWord& word, LinkedObjectFile& file, int seg_id, int word_id);
} // namespace decompiler
#endif // NEXT_INSTRUCTIONDECODE_H

View File

@ -6,6 +6,7 @@
#include <cassert>
#include "InstructionMatching.h"
namespace decompiler {
/*!
* Check if the given instruction stores a GPR with the specified parameters.
*/
@ -348,3 +349,4 @@ bool is_always_branch(const Instruction& instr) {
return false;
}
} // namespace decompiler

View File

@ -11,6 +11,7 @@
#include "Instruction.h"
#include "decompiler/util/MatchParam.h"
namespace decompiler {
bool is_no_link_gpr_store(const Instruction& instr,
MatchParam<int> size,
MatchParam<Register> src,
@ -56,5 +57,5 @@ Register make_fpr(int fpr);
bool is_branch(const Instruction& instr, MatchParam<bool> likely);
bool is_always_branch(const Instruction& instr);
} // namespace decompiler
#endif // JAK_DISASSEMBLER_INSTRUCTIONMATCHING_H

View File

@ -0,0 +1,307 @@
#include <cassert>
#include <algorithm>
#include <stdexcept>
#include "common/common_types.h"
#include "InstructionParser.h"
namespace decompiler {
InstructionParser::InstructionParser() {
init_opcode_info();
// we only support a subset of the total instructions. These are common used and don't have
// strange formatting.
int added = 0;
for (auto i : {InstructionKind::DADDIU, InstructionKind::ADDIU, InstructionKind::SLTI,
InstructionKind::SLTIU, InstructionKind::SB, InstructionKind::SH,
InstructionKind::SW, InstructionKind::SD, InstructionKind::SQ,
InstructionKind::LB, InstructionKind::LBU, InstructionKind::LH,
InstructionKind::LHU, InstructionKind::LW, InstructionKind::LWU,
InstructionKind::LD, InstructionKind::LQ, InstructionKind::LDR,
InstructionKind::LDL, InstructionKind::LWL, InstructionKind::LWR,
InstructionKind::DADDU, InstructionKind::SUBU, InstructionKind::ADDU,
InstructionKind::DSUBU, InstructionKind::MULT3, InstructionKind::MULTU3,
InstructionKind::AND, InstructionKind::OR, InstructionKind::NOR,
InstructionKind::XOR, InstructionKind::MOVN, InstructionKind::MOVZ,
InstructionKind::SLT, InstructionKind::SLTU, InstructionKind::SLL,
InstructionKind::SRA, InstructionKind::SRL, InstructionKind::DSLL,
InstructionKind::DSLL32, InstructionKind::DSRA, InstructionKind::DSRA32,
InstructionKind::DSRL, InstructionKind::DSRL32, InstructionKind::DSRAV,
InstructionKind::SLLV, InstructionKind::DSLLV, InstructionKind::DSRLV,
InstructionKind::DIV, InstructionKind::DIVU, InstructionKind::ORI,
InstructionKind::XORI, InstructionKind::ANDI, InstructionKind::LUI,
InstructionKind::JALR, InstructionKind::JR, InstructionKind::LWC1,
InstructionKind::SWC1, InstructionKind::ADDS, InstructionKind::SUBS,
InstructionKind::MULS, InstructionKind::DIVS, InstructionKind::MINS,
InstructionKind::MAXS, InstructionKind::MADDS, InstructionKind::MSUBS,
InstructionKind::RSQRTS, InstructionKind::ABSS, InstructionKind::NEGS,
InstructionKind::CVTSW, InstructionKind::CVTWS, InstructionKind::MOVS,
InstructionKind::SQRTS, InstructionKind::CLTS, InstructionKind::CLES,
InstructionKind::CEQS, InstructionKind::BC1F, InstructionKind::BC1T,
InstructionKind::BEQ, InstructionKind::BNE, InstructionKind::BEQL,
InstructionKind::BNEL, InstructionKind::BC1FL, InstructionKind::BC1TL,
InstructionKind::BLTZ, InstructionKind::BGEZ, InstructionKind::BLEZ,
InstructionKind::BGTZ, InstructionKind::BLTZL, InstructionKind::BGTZL,
InstructionKind::BGEZL}) {
auto& info = gOpcodeInfo[int(i)];
if (info.defined) {
m_opcode_name_lookup[info.name] = int(i);
added++;
}
}
assert(added == int(m_opcode_name_lookup.size()));
}
namespace {
std::string get_until_space(std::string& instr) {
assert(!instr.empty());
size_t i;
for (i = 0; i < instr.length(); i++) {
if (instr[i] == ' ') {
break;
}
}
auto name = instr.substr(0, i);
if (i == instr.length()) {
instr.clear();
} else {
instr = instr.substr(i + 1);
}
return name;
}
std::string get_comma_separated(std::string& instr) {
assert(!instr.empty());
auto arg = get_until_space(instr);
if (instr.empty()) {
assert(arg.back() != ',');
} else {
assert(arg.back() == ',');
arg.pop_back();
}
return arg;
}
std::string get_before_paren(std::string& instr) {
size_t i;
for (i = 0; i < instr.length(); i++) {
if (instr[i] == '(') {
auto result = instr.substr(0, i);
instr = instr.substr(i);
return result;
}
}
assert(false);
}
std::string get_in_paren(std::string& instr) {
assert(instr.length() > 2);
assert(instr.front() == '(');
size_t i;
for (i = 0; i < instr.length(); i++) {
if (instr[i] == ')') {
auto result = instr.substr(1, i - 1);
if (i == instr.length()) {
instr.clear();
} else {
instr = instr.substr(i + 1);
}
return result;
}
}
assert(false);
}
bool is_integer(const std::string& str) {
assert(!str.empty());
char* end;
std::strtol(str.c_str(), &end, 10);
return end == str.c_str() + str.length();
}
int parse_integer(const std::string& str) {
assert(!str.empty());
char* end;
int result = std::strtol(str.c_str(), &end, 10);
assert(end == str.c_str() + str.length());
return result;
}
std::vector<std::string> string_to_lines(const std::string& str) {
std::vector<std::string> result;
std::string::size_type i;
std::string::size_type start = 0;
while (true) {
i = str.find('\n', start);
if (i == std::string::npos) {
if (start < str.length()) {
result.push_back(str.substr(start));
}
return result;
} else {
result.push_back(str.substr(start, i - start));
start = i + 1;
}
}
}
} // namespace
Instruction InstructionParser::parse_single_instruction(
std::string str,
const std::vector<DecompilerLabel>& labels) {
auto name = get_until_space(str);
auto lookup = m_opcode_name_lookup.find(name);
if (lookup == m_opcode_name_lookup.end()) {
throw std::runtime_error("InstructionParser cannot handle opcode " + name);
}
Instruction instr;
instr.kind = InstructionKind(lookup->second);
auto& info = gOpcodeInfo[lookup->second];
for (u8 i = 0; i < info.step_count; i++) {
auto& step = info.steps[i];
switch (step.decode) {
case DecodeType::GPR: {
std::string gpr_name;
if ((info.is_store || info.is_load) && i == 2) {
gpr_name = get_in_paren(str);
} else {
gpr_name = get_comma_separated(str);
}
Register reg(gpr_name);
assert(reg.get_kind() == Reg::GPR);
InstructionAtom atom;
atom.set_reg(reg);
if (step.is_src) {
instr.add_src(atom);
} else {
instr.add_dst(atom);
}
} break;
case DecodeType::FPR: {
auto reg_name = get_comma_separated(str);
Register reg(reg_name);
assert(reg.get_kind() == Reg::FPR);
InstructionAtom atom;
atom.set_reg(reg);
if (step.is_src) {
instr.add_src(atom);
} else {
instr.add_dst(atom);
}
} break;
case DecodeType::IMM: {
InstructionAtom atom;
std::string atom_str;
if ((info.is_store || info.is_load) && i == 1) {
// number before paren
atom_str = get_before_paren(str);
} else {
atom_str = get_comma_separated(str);
}
if (is_integer(atom_str)) {
auto amt = parse_integer(atom_str);
atom.set_imm(amt);
} else {
atom.set_sym(atom_str);
}
if (step.is_src) {
instr.add_src(atom);
} else {
instr.add_dst(atom);
}
} break;
case DecodeType::BRANCH_TARGET: {
auto label = get_comma_separated(str);
auto f = std::find_if(labels.begin(), labels.end(),
[&](const DecompilerLabel& l) { return l.name == label; });
assert(f != labels.end());
auto idx = f - labels.begin();
InstructionAtom atom;
atom.set_label(idx);
if (step.is_src) {
instr.add_src(atom);
} else {
instr.add_dst(atom);
}
} break;
default:
assert(false);
}
}
assert(str.empty());
return instr;
}
ParsedProgram InstructionParser::parse_program(const std::string& str) {
ParsedProgram program;
auto lines = string_to_lines(str);
int byte_offset = 0;
// first pass
for (auto& line : lines) {
// strip off leading white space
size_t i;
for (i = 0; i < line.length(); i++) {
if (line[i] != ' ') {
line = line.substr(i);
break;
}
}
if (line.empty()) {
continue;
}
if (line.front() == 'L') {
if (line.back() == ':') {
line.pop_back();
} else {
assert(false);
}
DecompilerLabel label;
label.target_segment = 0;
label.offset = byte_offset;
label.name = line;
program.labels.push_back(label);
} else {
byte_offset += 4;
}
}
// second pass
for (auto& line : lines) {
if (!line.empty() && line.front() != 'L') {
program.instructions.push_back(parse_single_instruction(line, program.labels));
}
}
return program;
}
std::string ParsedProgram::print() {
std::string result;
int offset = 0;
for (auto& instr : instructions) {
for (auto& label : labels) {
if (label.offset == offset) {
result += label.name;
result += ":\n";
}
}
result += ' ';
result += ' ';
result += instr.to_string(labels);
result += '\n';
offset += 4;
}
return result;
}
} // namespace decompiler

View File

@ -0,0 +1,29 @@
/*!
* The InstructionParser converts a string like "daddu a0, s7, r0" into an Instruction.
* It is used to generate test sequences of instructions for decompiler algorithms.
*/
#pragma once
#include <string>
#include <unordered_map>
#include "Instruction.h"
#include "DecompilerLabel.h"
namespace decompiler {
struct ParsedProgram {
std::vector<DecompilerLabel> labels;
std::vector<Instruction> instructions;
std::string print();
};
class InstructionParser {
public:
InstructionParser();
Instruction parse_single_instruction(std::string str, const std::vector<DecompilerLabel>& labels);
ParsedProgram parse_program(const std::string& str);
private:
std::unordered_map<std::string, int> m_opcode_name_lookup;
};
} // namespace decompiler

View File

@ -6,8 +6,13 @@
#include "OpcodeInfo.h"
#include <cassert>
namespace decompiler {
OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX];
namespace {
bool opcodes_initialized = false;
}
typedef InstructionKind IK;
typedef FieldType FT;
typedef DecodeType DT;
@ -130,6 +135,9 @@ static OpcodeInfo& cd_dacc_svfs_svft(OpcodeInfo& info) {
}
void init_opcode_info() {
if (opcodes_initialized) {
return;
}
gOpcodeInfo[0].name = ";; ??????";
// RT, RS, SIMM
@ -444,6 +452,7 @@ void init_opcode_info() {
// for the UNKNOWN op which shouldn't be valid.
total_count--;
assert(total_count == valid_count);
opcodes_initialized = true;
}
void OpcodeInfo::step(DecodeStep& s) {
@ -501,4 +510,5 @@ OpcodeInfo& OpcodeInfo::dst_vf(FieldType field) {
OpcodeInfo& OpcodeInfo::dst_vi(FieldType field) {
return dst(field, DT::VI);
}
}
} // namespace decompiler

View File

@ -10,6 +10,7 @@
#include <string>
namespace decompiler {
enum class InstructionKind {
UNKNOWN,
@ -342,12 +343,12 @@ struct OpcodeInfo {
OpcodeInfo& dst_vf(FieldType field);
OpcodeInfo& dst_vi(FieldType field);
uint8_t step_count;
uint8_t step_count = 0;
DecodeStep steps[MAX_DECODE_STEPS];
};
extern OpcodeInfo gOpcodeInfo[(uint32_t)InstructionKind::EE_OP_MAX];
void init_opcode_info();
} // namespace decompiler
#endif // NEXT_OPCODEINFO_H

View File

@ -7,6 +7,24 @@
#include <cassert>
#include <stdexcept>
namespace decompiler {
namespace Reg {
// register which may hold GOAL local variables
// clang-format off
const bool allowed_local_gprs[Reg::MAX_GPR] = {
false /*R0*/, false /*AT*/, true /*V0*/, true /*V1*/,
true /*A0*/, true /*A1*/, true /*A2*/, true /*A3*/,
true /*T0*/, true /*T1*/, true /*T2*/, true /*T3*/,
true /*T4*/, true /*T5*/, true /*T6*/, true /*T7*/,
true /*S0*/, true /*S1*/, true /*S2*/, true /*S3*/,
true /*S4*/, true /*S5*/, false /*S6*/, false /*S7*/,
true /*T8*/, true /*T9*/, false /*K0*/, false /*K1*/,
true /*GP*/, true /*SP*/, false /*FP*/, false /*RA*/
};
// clang-format on
} // namespace Reg
////////////////////////////
// Register Name Constants
////////////////////////////
@ -233,4 +251,5 @@ bool Register::operator==(const Register& other) const {
bool Register::operator!=(const Register& other) const {
return id != other.id;
}
}
} // namespace decompiler

View File

@ -11,6 +11,7 @@
#include <cstdint>
#include <string>
namespace decompiler {
// Namespace for register name constants
namespace Reg {
enum RegisterKind {
@ -120,6 +121,9 @@ enum Vi {
CMSAR1 = 31,
MAX_COP2 = 32
};
const extern bool allowed_local_gprs[Reg::MAX_GPR];
} // namespace Reg
// Representation of a register. Uses a 32-bit integer internally.
@ -148,5 +152,5 @@ class Register {
private:
uint16_t id = -1;
};
} // namespace decompiler
#endif // NEXT_REGISTER_H

View File

@ -4,6 +4,7 @@
#include "decompiler/ObjectFile/LinkedObjectFile.h"
#include "decompiler/Disasm/InstructionMatching.h"
namespace decompiler {
/*!
* Find all basic blocks in a function.
* All delay slot instructions are grouped with the branch instruction.
@ -48,4 +49,5 @@ std::vector<BasicBlock> find_blocks_in_function(const LinkedObjectFile& file,
}
return basic_blocks;
}
}
} // namespace decompiler

View File

@ -7,6 +7,7 @@
#include "decompiler/util/DecompilerTypeSystem.h"
#include "decompiler/util/TP_Type.h"
namespace decompiler {
class LinkedObjectFile;
class Function;
@ -48,3 +49,4 @@ struct BlockTopologicalSort {
std::vector<BasicBlock> find_blocks_in_function(const LinkedObjectFile& file,
int seg,
const Function& func);
} // namespace decompiler

View File

@ -5,6 +5,7 @@
#include "CfgVtx.h"
#include "Function.h"
namespace decompiler {
/////////////////////////////////////////
/// CfgVtx
/////////////////////////////////////////
@ -1912,3 +1913,4 @@ std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int se
return cfg;
}
} // namespace decompiler

View File

@ -11,6 +11,7 @@ namespace goos {
class Object;
}
namespace decompiler {
/*!
* In v, find an item equal to old, and replace it with replace.
* Will throw an error is there is not exactly one thing equal to old.
@ -351,5 +352,5 @@ class ControlFlowGraph {
class LinkedObjectFile;
class Function;
std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int seg, Function& func);
} // namespace decompiler
#endif // JAK_DISASSEMBLER_CFGVTX_H

View File

@ -2,6 +2,7 @@
#include "decompiler/IR/IR.h"
#include "ExpressionStack.h"
namespace decompiler {
namespace {
bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) {
ExpressionStack stack;
@ -55,4 +56,5 @@ bool Function::build_expression(LinkedObjectFile& file) {
}
return true;
}
}
} // namespace decompiler

View File

@ -1,6 +1,7 @@
#include "third-party/fmt/core.h"
#include "ExpressionStack.h"
namespace decompiler {
std::string ExpressionStack::StackEntry::print(LinkedObjectFile& file) {
return fmt::format("d: {} s: {} | {} <- {}", display, sequence_point,
destination.has_value() ? destination.value().to_charp() : "N/A",
@ -107,4 +108,5 @@ ExpressionStack::StackEntry& ExpressionStack::get_display_stack_top() {
}
}
assert(false);
}
}
} // namespace decompiler

View File

@ -6,6 +6,7 @@
#include "decompiler/Disasm/Register.h"
#include "decompiler/util/TP_Type.h"
namespace decompiler {
/*!
* An ExpressionStack is used to track partial expressions when rebuilding the tree structure of
* GOAL code. Linear sequences of operations are added onto the expression stack.
@ -33,4 +34,5 @@ class ExpressionStack {
bool display_stack_empty();
StackEntry& get_display_stack_top();
};
};
} // namespace decompiler

View File

@ -8,6 +8,7 @@
#include "TypeInspector.h"
#include "decompiler/IR/IR.h"
namespace decompiler {
namespace {
std::vector<Register> gpr_backups = {make_gpr(Reg::GP), make_gpr(Reg::S5), make_gpr(Reg::S4),
make_gpr(Reg::S3), make_gpr(Reg::S2), make_gpr(Reg::S1),
@ -70,8 +71,8 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
// storing stack pointer on the stack is done by some ASM kernel functions
if (instr.kind == InstructionKind::SW && instr.get_src(0).get_reg() == make_gpr(Reg::SP)) {
printf("[Warning] %s Suspected ASM function based on this instruction in prologue: %s\n",
guessed_name.to_string().c_str(), instr.to_string(file).c_str());
warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n";
guessed_name.to_string().c_str(), instr.to_string(file.labels).c_str());
warnings += ";; Flagged as ASM function because of " + instr.to_string(file.labels) + "\n";
suspected_asm = true;
return;
}
@ -93,8 +94,8 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
// support
if (instr.kind == InstructionKind::SD && instr.get_src(0).get_reg() == make_gpr(Reg::S7)) {
lg::warn("{} Suspected ASM function based on this instruction in prologue: {}\n",
guessed_name.to_string(), instr.to_string(file));
warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n";
guessed_name.to_string(), instr.to_string(file.labels));
warnings += ";; Flagged as ASM function because of " + instr.to_string(file.labels) + "\n";
suspected_asm = true;
return;
}
@ -164,9 +165,9 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
suspected_asm = true;
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
guessed_name.to_string().c_str(),
instructions.at(idx + i).to_string(file).c_str());
instructions.at(idx + i).to_string(file.labels).c_str());
warnings += ";; Suspected asm function due to stack store: " +
instructions.at(idx + i).to_string(file) + "\n";
instructions.at(idx + i).to_string(file.labels) + "\n";
return;
}
}
@ -194,9 +195,9 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
suspected_asm = true;
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
guessed_name.to_string().c_str(),
instructions.at(idx + i).to_string(file).c_str());
instructions.at(idx + i).to_string(file.labels).c_str());
warnings += ";; Suspected asm function due to stack store: " +
instructions.at(idx + i).to_string(file) + "\n";
instructions.at(idx + i).to_string(file.labels) + "\n";
return;
}
}
@ -643,7 +644,7 @@ void Function::find_type_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts)
// done!
// fmt::print("Got type {} parent {}\n", type_name, parent_type);
dts.add_type_parent(type_name, parent_type);
Label flag_label = file.labels.at(label_idx);
DecompilerLabel flag_label = file.labels.at(label_idx);
u64 word = file.read_data_word(flag_label);
flag_label.offset += 4;
u64 word2 = file.read_data_word(flag_label);
@ -744,4 +745,5 @@ BlockTopologicalSort Function::bb_topo_sort() {
}
return result;
}
}
} // namespace decompiler

View File

@ -15,6 +15,7 @@
#include "common/type_system/TypeSpec.h"
#include "decompiler/config.h"
namespace decompiler {
class DecompilerTypeSystem;
class IR_Atomic;
class IR;
@ -158,5 +159,5 @@ class Function {
std::unordered_map<int, int> instruction_to_basic_op;
std::unordered_map<int, int> basic_op_to_instruction;
};
} // namespace decompiler
#endif // NEXT_FUNCTION_H

View File

@ -1,6 +1,7 @@
#include "Function.h"
#include "decompiler/IR/IR.h"
namespace decompiler {
namespace {
bool in_set(RegSet& set, const Register& obj) {
return set.find(obj) != set.end();
@ -170,4 +171,5 @@ void Function::run_reg_usage() {
}
}
}
}
}
} // namespace decompiler

View File

@ -3,6 +3,7 @@
#include "third-party/fmt/core.h"
#include "decompiler/config.h"
namespace decompiler {
namespace {
TypeState construct_initial_typestate(const TypeSpec& f_ts) {
TypeState result;
@ -132,4 +133,5 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
}
return true;
}
}
} // namespace decompiler

View File

@ -8,6 +8,7 @@
#include "common/type_system/deftype.h"
#include "decompiler/IR/IR.h"
namespace decompiler {
namespace {
struct FieldPrint {
char format = '\0';
@ -843,4 +844,5 @@ std::string TypeInspectorResult::print_as_deftype() {
result.append(")\n");
return result;
}
}
} // namespace decompiler

View File

@ -8,10 +8,12 @@
#include <vector>
#include "common/common_types.h"
class Field;
namespace decompiler {
class Function;
class DecompilerTypeSystem;
class LinkedObjectFile;
class Field;
struct TypeInspectorResult {
bool success = false;
@ -34,3 +36,4 @@ TypeInspectorResult inspect_inspect_method(Function& inspect,
const std::string& type_name,
DecompilerTypeSystem& dts,
LinkedObjectFile& file);
} // namespace decompiler

View File

@ -11,9 +11,11 @@
#include "decompiler/Function/Function.h"
#include "decompiler/Function/BasicBlocks.h"
#include "decompiler/Disasm/InstructionMatching.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
#include "decompiler/IR/IR.h"
#include "common/symbols.h"
namespace decompiler {
namespace {
///////////////////////////////
@ -135,7 +137,7 @@ std::shared_ptr<IR_Atomic> to_asm_automatic(const std::string& str, Instruction&
}
if (instr.n_src >= 3) {
result->src1 = instr_atom_to_ir(instr.get_src(2), idx);
result->src2 = instr_atom_to_ir(instr.get_src(2), idx);
}
result->set_reg_info();
@ -2520,7 +2522,7 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec
// everything failed
if (!result) {
// temp hack for debug:
printf("Instruction -> BasicOp failed on %s\n", i.to_string(*file).c_str());
printf("Instruction -> BasicOp failed on %s\n", i.to_string(file->labels).c_str());
func->add_basic_op(std::make_shared<IR_Failed_Atomic>(), instr, instr + 1);
} else {
if (!func->contains_asm_ops && dynamic_cast<IR_AsmOp*>(result.get())) {
@ -2536,3 +2538,4 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec
}
}
}
} // namespace decompiler

View File

@ -6,8 +6,10 @@
#pragma once
namespace decompiler {
class Function;
struct BasicBlock;
class LinkedObjectFile;
void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjectFile* file);
void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjectFile* file);
} // namespace decompiler

View File

@ -7,6 +7,7 @@
#include "decompiler/Disasm/InstructionMatching.h"
#include "decompiler/IR/IR.h"
namespace decompiler {
namespace {
std::shared_ptr<IR> cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx);
@ -1278,3 +1279,4 @@ std::shared_ptr<IR> build_cfg_ir(Function& function,
return nullptr;
}
}
} // namespace decompiler

View File

@ -2,9 +2,11 @@
#include <memory>
namespace decompiler {
class IR;
class Function;
class LinkedObjectFile;
class ControlFlowGraph;
std::shared_ptr<IR> build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file);
std::shared_ptr<IR> build_cfg_ir(Function& function, ControlFlowGraph& cfg, LinkedObjectFile& file);
} // namespace decompiler

View File

@ -3,6 +3,7 @@
#include "common/goos/PrettyPrinter.h"
#include "third-party/fmt/core.h"
namespace decompiler {
// hack to print out reverse deref paths on loads to help with debugging load stuff.
bool enable_hack_load_path_print = false;
// hack to print (begin x) as x to make debug output easier to read.
@ -1273,4 +1274,5 @@ goos::Object IR_Break::to_form(const LinkedObjectFile& file) const {
void IR_Break::get_children(std::vector<std::shared_ptr<IR>>* output) const {
output->push_back(return_code);
output->push_back(dead_code);
}
}
} // namespace decompiler

View File

@ -11,14 +11,15 @@
#include "decompiler/util/DecompilerTypeSystem.h"
#include "decompiler/util/TP_Type.h"
class LinkedObjectFile;
class DecompilerTypeSystem;
class ExpressionStack;
namespace goos {
class Object;
}
namespace decompiler {
class LinkedObjectFile;
class DecompilerTypeSystem;
class ExpressionStack;
class IR {
public:
virtual goos::Object to_form(const LinkedObjectFile& file) const = 0;
@ -765,5 +766,5 @@ class IR_Break : public virtual IR {
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
};
} // namespace decompiler
#endif // JAK_IR_H

View File

@ -2,6 +2,7 @@
#include "IR.h"
#include "decompiler/Function/ExpressionStack.h"
namespace decompiler {
bool IR_Set_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
// first determine the type of the set.
switch (kind) {
@ -448,4 +449,5 @@ bool IR_FloatMath1::update_from_stack(const std::unordered_set<Register, Registe
LinkedObjectFile& file) {
update_from_stack_helper(&arg, consume, stack, file);
return true;
}
}
} // namespace decompiler

View File

@ -5,6 +5,7 @@
#include "decompiler/util/TP_Type.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
namespace decompiler {
namespace {
// bool is_plain_type(const TP_Type& type, const TypeSpec& ts) {
// return type.as_typespec() == ts;
@ -945,4 +946,5 @@ TP_Type IR_CMoveF::get_expression_type(const TypeState& input,
(void)file;
(void)dts;
return TP_Type::make_from_typespec(TypeSpec("symbol"));
}
}
} // namespace decompiler

1127
decompiler/IR2/AtomicOp.cpp Normal file

File diff suppressed because it is too large Load Diff

544
decompiler/IR2/AtomicOp.h Normal file
View File

@ -0,0 +1,544 @@
#pragma once
#include <string>
#include <optional>
#include <cassert>
#include "common/goos/Object.h"
#include "decompiler/Disasm/Register.h"
#include "decompiler/Disasm/Instruction.h"
#include "Env.h"
namespace decompiler {
class Expr;
/*!
* A "Variable" represents a register at a given instruction index.
* The register can either be a GOAL local variable or a GOAL register used in inline assembly.
* Because OpenGOAL's registers don't one-to-one map to GOAL registers, GOAL "inline assembly
* registers" will become OpenGOAL variables, and are treated similarly to variables in
* decompilation.
*
* In the earlier parts of decompilation, this just behaves like a register in all cases.
* But in later parts registers can be mapped to real local variables with types. A variable can
* look itself up in an environment to determine what "local variable" it is.
*
* Note: a variable is _not_ allowed to be R0, AT, S7, K0, K1, FP, or RA by default, as these
* can never hold normal GOAL locals. Inline assembly may use these, but you must set the allow_all
* flag to true in the constructor of Variable to indicate this is what you really want.
*
* Note: access to the process pointer (s6) is handled as a variable. As a result, you may always
* use s6 as a variable.
*/
class Variable {
public:
enum class Mode : u8 {
READ, // represents value of the variable at the beginning of the instruction
WRITE // represents value of the variable at the end of the instruction
};
Variable() = default;
Variable(Mode mode, Register reg, int atomic_idx, bool allow_all = false);
enum class Print {
AS_REG, // print as a PS2 register name
FULL, // print as a register name, plus an index, plus read or write
AS_VARIABLE, // print local variable name, error if impossible
AUTOMATIC, // print as variable, but if that's not possible print as reg.
};
std::string to_string(const Env* env, Print mode = Print::AUTOMATIC) const;
bool operator==(const Variable& other) const;
bool operator!=(const Variable& other) const;
const Register& reg() const { return m_reg; }
Mode mode() const { return m_mode; }
int idx() const { return m_atomic_idx; }
private:
Mode m_mode = Mode::READ; // do we represent a read or a write?
Register m_reg; // the EE register
int m_atomic_idx = -1; // the index in the function's list of AtomicOps
};
/*!
* An atomic operation represents a single operation from the point of view of the IR2 system.
* Each IR2 op is one or more instructions.
* Each function can be represented as a list of AtomicOps. These are stored in exactly the same
* order as the instructions appear.
*
* The AtomicOps use SimpleAtom and SimpleExpression. These are extremely limited versions of
* the full IR2 expression system, but are much easier to work with because they are less general
* and can't be nested infinitely. They also have features specific to the AtomicOp system that are
* not required for full expressions. The full expression system will later convert these into the
* more complicated expressions.
*
* The types of AtomicOp are:
* ConditionalMoveFalseOp
* CallOp
* SpecialOp
* BranchOp
* LoadVarOp
* StoreOp
* SetVarConditionOp
* AsmOp
* SetVarExprOp
* AsmOp
*/
class AtomicOp {
public:
explicit AtomicOp(int my_idx);
std::string to_string(const std::vector<DecompilerLabel>& labels, const Env* env);
virtual goos::Object to_form(const std::vector<DecompilerLabel>& labels,
const Env* env) const = 0;
virtual bool operator==(const AtomicOp& other) const = 0;
bool operator!=(const AtomicOp& other) const;
// determine if this is a (set! <var> thing) form. These will be handled differently in expression
// building.
virtual bool is_variable_set() const = 0;
// determine if this is a GOAL "sequence point".
// non-sequence point instructions may be out of order from the point of view of the expression
// stack.
virtual bool is_sequence_point() const = 0;
// get the variable being set by this operation. Only call this if is_variable_set returns true.
virtual Variable get_set_destination() const = 0;
// get the value of the variable being set, as an expression. Only call this if is_variable_set
// returns true.
virtual std::unique_ptr<Expr> get_set_source_as_expr() const = 0;
// convert me to an expression. If I'm a set!, this will produce a (set! x y), which may be
// undesirable when expression stacking.
virtual std::unique_ptr<Expr> get_as_expr() const = 0;
// figure out what registers are read and written in this AtomicOp and update read_regs,
// write_regs, and clobber_regs. It's expected that these have duplicates if a register appears
// in the original instructions multiple times. Ex: "and v0, v1, v1" would end up putting v1 in
// read twice.
virtual void update_register_info() = 0;
const std::vector<Register>& read_regs() { return m_read_regs; }
const std::vector<Register>& write_regs() { return m_write_regs; }
const std::vector<Register>& clobber_regs() { return m_clobber_regs; }
protected:
int m_my_idx = -1;
// the register values that are read (at the start of this op)
std::vector<Register> m_read_regs;
// the registers that have actual values written into them (at the end of this op)
std::vector<Register> m_write_regs;
// the registers which have junk written into them.
std::vector<Register> m_clobber_regs;
};
/*!
* The has a value. In some cases it can be set.
*/
class SimpleAtom {
public:
enum class Kind : u8 {
VARIABLE,
INTEGER_CONSTANT,
SYMBOL_PTR,
SYMBOL_VAL,
EMPTY_LIST,
STATIC_ADDRESS,
INVALID
};
SimpleAtom() = default;
static SimpleAtom make_var(const Variable& var);
static SimpleAtom make_sym_ptr(const std::string& name);
static SimpleAtom make_sym_val(const std::string& name);
static SimpleAtom make_empty_list();
static SimpleAtom make_int_constant(s64 value);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
bool is_var() const { return m_kind == Kind::VARIABLE; }
const Variable& var() const {
assert(is_var());
return m_variable;
}
bool is_int() const { return m_kind == Kind::INTEGER_CONSTANT; };
bool is_sym_ptr() const { return m_kind == Kind::SYMBOL_PTR; };
bool is_sym_val() const { return m_kind == Kind::SYMBOL_VAL; };
bool is_empty_list() const { return m_kind == Kind::EMPTY_LIST; };
bool is_static_addr() const { return m_kind == Kind::STATIC_ADDRESS; };
bool operator==(const SimpleAtom& other) const;
bool operator!=(const SimpleAtom& other) const { return !((*this) == other); }
void get_regs(std::vector<Register>* out) const;
private:
Kind m_kind = Kind::INVALID;
std::string m_string; // for symbol ptr and symbol val
s64 m_int = 0; // for integer constant and static address label id
Variable m_variable;
};
/*!
* A "simple expression" can be used within an AtomicOp.
* AtomicOps are often made up of very few instructions, so these expressions are quite simple and
* can't nest. There is an "operation" and some arguments. There are no side effects of a
* SimpleExpression. The side effects will be captured by the AtomicOp.
*
* Note - there is an expression kind called identity which takes one argument and uses that
* argument as an expression.
*/
class SimpleExpression {
public:
enum class Kind : u8 {
INVALID,
IDENTITY,
DIV_S,
MUL_S,
ADD_S,
SUB_S,
MIN_S,
MAX_S,
FLOAT_TO_INT,
INT_TO_FLOAT,
ABS_S,
NEG_S,
SQRT_S,
ADD,
SUB,
MUL_SIGNED,
DIV_SIGNED,
MOD_SIGNED,
DIV_UNSIGNED,
MOD_UNSIGNED,
OR,
AND,
NOR,
XOR,
LEFT_SHIFT,
RIGHT_SHIFT_ARITH,
RIGHT_SHIFT_LOGIC,
MUL_UNSIGNED,
NOT,
NEG
};
// how many arguments?
int args() const { return n_args; }
const SimpleAtom& get_arg(int idx) const {
assert(idx < args());
return m_args[idx];
}
Kind kind() const { return m_kind; }
SimpleExpression(Kind kind, const SimpleAtom& arg0);
SimpleExpression(Kind kind, const SimpleAtom& arg0, const SimpleAtom& arg1);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
bool operator==(const SimpleExpression& other) const;
bool is_identity() const { return m_kind == Kind::IDENTITY; }
void get_regs(std::vector<Register>* out) const;
private:
Kind m_kind = Kind::INVALID;
SimpleAtom m_args[2];
s8 n_args = -1;
};
/*!
* Set a variable equal to a Simple Expression
*/
class SetVarOp : public AtomicOp {
public:
SetVarOp(const Variable& dst, const SimpleExpression& src, int my_idx)
: AtomicOp(my_idx), m_dst(dst), m_src(src) {
assert(my_idx == dst.idx());
}
virtual goos::Object to_form(const std::vector<DecompilerLabel>& labels,
const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
Variable m_dst;
SimpleExpression m_src;
};
/*!
* An AsmOp represents a single inline assembly instruction. This is used when the BasicOpBuilder
* pass decides that an instruction could not have been generated from high-level GOAL code, and
* instead must be due to inline assembly.
*
* Each AsmOp stores the instruction it uses, as well as "Variable"s for each register used.
*/
class AsmOp : public AtomicOp {
public:
AsmOp(Instruction instr, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
Instruction m_instr;
std::optional<Variable> m_dst;
std::optional<Variable> m_src[3];
};
/*!
* A condition represents something that can generate a 0 or 1 based on a check or comparison.
* This can be used as a branch condition in BranchOp
* This can be used as a condition in an SetVarConditionOp, which sets a variable to a GOAL boolean.
* Sometimes a SetVarConditionOp gets spread across many many instructions, in which case it is
* not correctly detected here.
*/
class IR2_Condition {
public:
enum class Kind {
NOT_EQUAL,
EQUAL,
LESS_THAN_SIGNED,
GREATER_THAN_SIGNED,
LEQ_SIGNED,
GEQ_SIGNED,
GREATER_THAN_ZERO_SIGNED,
LEQ_ZERO_SIGNED,
LESS_THAN_ZERO,
GEQ_ZERO_SIGNED,
LESS_THAN_UNSIGNED,
GREATER_THAN_UNSIGNED,
LEQ_UNSIGNED,
GEQ_UNSIGNED,
ZERO,
NONZERO,
FALSE,
TRUTHY,
ALWAYS,
NEVER,
FLOAT_EQUAL,
FLOAT_NOT_EQUAL,
FLOAT_LESS_THAN,
FLOAT_GEQ,
FLOAT_LEQ,
FLOAT_GREATER_THAN,
INVALID
};
explicit IR2_Condition(Kind kind);
IR2_Condition(Kind kind, const Variable& src0);
IR2_Condition(Kind kind, const Variable& src0, const Variable& src1);
void invert();
bool operator==(const IR2_Condition& other) const;
bool operator!=(const IR2_Condition& other) const { return !((*this) == other); }
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
void get_regs(std::vector<Register>* out) const;
private:
Kind m_kind = Kind::INVALID;
Variable m_src[2];
};
/*!
* Set a variable to a GOAL boolean, based off of a condition.
*/
class SetVarConditionOp : public AtomicOp {
public:
SetVarConditionOp(Variable dst, IR2_Condition condition, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
Variable m_dst;
IR2_Condition m_condition;
};
/*!
* Store an Atom into a memory location.
* Note - this is _not_ considered a set! form because you are not setting the value of a
* register which can be expression-compacted.
*/
class StoreOp : public AtomicOp {
public:
StoreOp(SimpleExpression addr, SimpleAtom value, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
SimpleExpression m_addr;
SimpleAtom m_value;
};
/*!
* Load a value into a variable.
* This is treated as a set! form.
*/
class LoadVarOp : public AtomicOp {
public:
LoadVarOp(Variable dst, SimpleExpression src, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
Variable m_dst;
SimpleExpression m_src;
};
/*!
* This represents one of the possible instructions that can go in a branch delay slot.
* These will be "absorbed" into higher level structures, but for the purpose of printing AtomicOps,
* it will be nice to have these print like expressions.
*
* These are always part of the branch op.
*/
class IR2_BranchDelay {
public:
enum class Kind {
NOP,
SET_REG_FALSE,
SET_REG_TRUE,
SET_REG_REG,
SET_BINTEGER,
SET_PAIR,
DSLLV,
NEGATE
};
explicit IR2_BranchDelay(Kind kind);
IR2_BranchDelay(Kind kind, Variable var0);
IR2_BranchDelay(Kind kind, Variable var0, Variable var1);
IR2_BranchDelay(Kind kind, Variable var0, Variable var1, Variable var2);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const;
bool operator==(const IR2_BranchDelay& other) const;
void get_regs(std::vector<Register>* write, std::vector<Register>* read) const;
private:
std::optional<Variable> m_var[3];
Kind m_kind;
};
/*!
* This represents a combination of a condition + a branch + the branch delay slot.
* This is considered as a single operation.
*/
class BranchOp : public AtomicOp {
public:
BranchOp(bool likely,
IR2_Condition condition,
int label,
IR2_BranchDelay branch_delay,
int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
bool m_likely = false;
IR2_Condition m_condition;
int m_label = -1;
IR2_BranchDelay m_branch_delay;
};
/*!
* A "special" op has no arguments.
* NOP, BREAK, SUSPEND,
*/
class SpecialOp : public AtomicOp {
public:
enum class Kind {
NOP,
BREAK,
SUSPEND,
};
SpecialOp(Kind kind, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
Kind m_kind;
};
/*!
* Represents a function call.
* This has so many special cases and exceptions that it is separate from SpecialOp.
*/
class CallOp : public AtomicOp {
public:
CallOp(int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
};
/*!
* Unfortunately the original GOAL compiler does something weird when compiling (zero? x) or (not
* (zero? x)) when the result needs to be stored in a GOAL boolean (not in a branch condition). It
* first does a (set! result #t), then (possibly) a bunch of code to evaluate x, then does a
* conditional move (movn/movz). As a result, we can't recognize this as a Condition in the
* AtomicOp pass. Instead we'll recognize it as a (set! result #t) .... (cmove result flag) where
* flag is checked to be 0 or not. It's weird because all of the other similar cases get this
* right.
*
* Note - this isn't considered a variable set. It's "conditional set" so it needs to be
* handled separately. Unfortunately.
*/
class ConditionalMoveFalseOp : public AtomicOp {
public:
ConditionalMoveFalseOp(Variable dst, Variable src, bool on_zero, int my_idx);
goos::Object to_form(const std::vector<DecompilerLabel>& labels, const Env* env) const override;
bool operator==(const AtomicOp& other) const override;
bool is_variable_set() const override;
bool is_sequence_point() const override;
Variable get_set_destination() const override;
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
private:
Variable m_dst, m_src;
bool m_on_zero;
};
} // namespace decompiler

View File

@ -0,0 +1,141 @@
#include "AtomicOpBuilder.h"
#include "common/log/log.h"
#include "decompiler/Function/BasicBlocks.h"
#include "decompiler/Function/Function.h"
namespace decompiler {
namespace {
Variable make_dst_var(Register reg, int idx) {
return Variable(Variable::Mode::WRITE, reg, idx);
}
Variable make_src_var(Register reg, int idx) {
return Variable(Variable::Mode::READ, reg, idx);
}
SimpleAtom make_src_atom(Register reg, int idx) {
return SimpleAtom::make_var(make_src_var(reg, idx));
}
/*!
* Convert a single instruction in the form instr dest_reg, src_reg, src_reg
* to an atomic op of (set! dst_reg (op src_reg src_reg))
* Like daddu a0, a1, a2
*/
void make_3reg_op(const Instruction& instr,
SimpleExpression::Kind kind,
int idx,
std::unique_ptr<AtomicOp>& result) {
auto dst = make_dst_var(instr.get_dst(0).get_reg(), idx);
auto src0 = make_src_atom(instr.get_src(0).get_reg(), idx);
auto src1 = make_src_atom(instr.get_src(1).get_reg(), idx);
result = std::make_unique<SetVarOp>(dst, SimpleExpression(kind, src0, src1), idx);
}
bool convert_and_1(const Instruction& i0, int idx, std::unique_ptr<AtomicOp>& result) {
// or reg, reg, reg:
make_3reg_op(i0, SimpleExpression::Kind::AND, idx, result);
return true;
}
bool convert_1(const Instruction& i0, int idx, std::unique_ptr<AtomicOp>& result) {
switch (i0.kind) {
case InstructionKind::AND:
return convert_and_1(i0, idx, result);
default:
return false;
}
}
} // namespace
/*!
* Convert an entire basic block and add the results to a FunctionAtomicOps
* @param block_id : the index of the block
* @param begin : the start of the instructions for the block
* @param end : the end of the instructions for the block
* @param container : the container to add to
*/
void convert_block_to_atomic_ops(int begin_idx,
std::vector<Instruction>::const_iterator begin,
std::vector<Instruction>::const_iterator end,
const std::vector<DecompilerLabel>& labels,
FunctionAtomicOps* container) {
container->block_id_to_first_atomic_op.push_back(container->ops.size());
for (auto& instr = begin; instr < end;) {
// how many instructions can we look at, at most?
int n_instr = end - instr;
// how many instructions did we use?
int length = 0;
// what is the index of the atomic op we would add
int op_idx = int(container->ops.size());
bool converted = false;
std::unique_ptr<AtomicOp> op;
if (n_instr >= 4) {
// try 4 instructions
}
if (!converted && n_instr >= 3) {
// try 3 instructions
}
if (!converted && n_instr >= 2) {
// try 2 instructions
}
if (!converted) {
// try 1 instruction
if (convert_1(*instr, op_idx, op)) {
converted = true;
length = 1;
}
}
if (!converted) {
// try assembly fallback.
}
if (!converted) {
// failed!
lg::die("Failed to convert instruction {} to an atomic op", instr->to_string(labels));
}
assert(converted && length && op);
// add mappings:
container->atomic_op_to_instruction[container->ops.size()] = begin_idx;
for (int i = 0; i < length; i++) {
container->instruction_to_basic_op[begin_idx + i] = container->ops.size();
}
// add
op->update_register_info();
container->ops.emplace_back(std::move(op));
instr += length;
}
container->block_id_to_end_atomic_op.push_back(container->ops.size());
}
FunctionAtomicOps convert_function_to_atomic_ops(const Function& func,
const std::vector<DecompilerLabel>& labels) {
FunctionAtomicOps result;
for (const auto& block : func.basic_blocks) {
// we should only consider the blocks which actually have instructions:
if (block.end_word > block.start_word) {
auto begin = func.instructions.begin() + block.start_word;
auto end = func.instructions.begin() + block.end_word;
convert_block_to_atomic_ops(block.start_word, begin, end, labels, &result);
} else {
result.block_id_to_first_atomic_op.push_back(-1);
result.block_id_to_end_atomic_op.push_back(-1);
}
}
assert(func.basic_blocks.size() == result.block_id_to_end_atomic_op.size());
assert(func.basic_blocks.size() == result.block_id_to_first_atomic_op.size());
return result;
}
} // namespace decompiler

View File

@ -0,0 +1,47 @@
#pragma once
#include <vector>
#include "AtomicOp.h"
namespace decompiler {
class Function;
struct BasicBlock;
class LinkedObjectFile;
/*!
* A collection of Atomic Ops in a function
*/
struct FunctionAtomicOps {
// the actual ops, store in the correct order
std::vector<std::unique_ptr<AtomicOp>> ops;
// mappings from instructions to atomic ops and back
std::unordered_map<int, int> instruction_to_basic_op;
std::unordered_map<int, int> atomic_op_to_instruction;
// map from basic block to the index of the first op
std::vector<int> block_id_to_first_atomic_op;
// map from basic block to the index of the last op + 1
std::vector<int> block_id_to_end_atomic_op;
};
/*!
* Convert an entire basic block and add the results to a FunctionAtomicOps.
* Updates the mapping between blocks, instructions, and atomic ops as needed
* @param begin idx : the index of the first instruction for the block
* @param begin : the start of the instructions for the block
* @param end : the end of the instructions for the block
* @param labels : label names for the function, used for error prints on failed conversions
* @param container : the container to add to
*/
void convert_block_to_atomic_ops(int begin_idx,
std::vector<Instruction>::const_iterator begin,
std::vector<Instruction>::const_iterator end,
const std::vector<DecompilerLabel>& labels,
FunctionAtomicOps* container);
/*!
* Convert an entire function to AtomicOps
*/
FunctionAtomicOps convert_function_to_atomic_ops(const Function& func,
const std::vector<DecompilerLabel>& labels);
} // namespace decompiler

10
decompiler/IR2/Env.cpp Normal file
View File

@ -0,0 +1,10 @@
#include <stdexcept>
#include "Env.h"
namespace decompiler {
std::string Env::get_variable_name(Register reg, int atomic_idx) const {
(void)reg;
(void)atomic_idx;
throw std::runtime_error("Env::get_variable_name not yet implemented.");
}
} // namespace decompiler

21
decompiler/IR2/Env.h Normal file
View File

@ -0,0 +1,21 @@
#pragma once
#include <string>
#include "decompiler/Disasm/Register.h"
namespace decompiler {
/*!
* An "environment" for a single function.
* This contains data for an entire function, like which registers are live when, the types of
* values in registers, and local variable names. This does not actually store IR itself, just
* shared data that all IR can look at. The concept is somewhat similar to Env in the compiler.
*/
class Env {
public:
bool has_local_vars() const { return m_has_local_vars; }
std::string get_variable_name(Register reg, int atomic_idx) const;
private:
bool m_has_local_vars = false;
};
} // namespace decompiler

8
decompiler/IR2/IR2.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
namespace decompiler {
class IR2 {
public:
private:
};
} // namespace decompiler

View File

@ -16,6 +16,7 @@
#include "common/log/log.h"
#include "common/goos/PrettyPrinter.h"
namespace decompiler {
/*!
* Set the number of segments in this object file.
* This can only be done once, and must be done before adding any words.
@ -45,7 +46,7 @@ int LinkedObjectFile::get_label_id_for(int seg, int offset) {
if (kv == label_per_seg_by_offset.at(seg).end()) {
// create a new label
int id = labels.size();
Label label;
DecompilerLabel label;
label.target_segment = seg;
label.offset = offset;
label.name = "L" + std::to_string(id);
@ -498,7 +499,7 @@ void LinkedObjectFile::process_fp_relative_links() {
} break;
default:
printf("unknown fp using op: %s\n", instr.to_string(*this).c_str());
printf("unknown fp using op: %s\n", instr.to_string(labels).c_str());
assert(false);
}
}
@ -544,7 +545,7 @@ std::string LinkedObjectFile::to_asm_json(const std::string& obj_file_name) {
}
auto& instr = func.instructions.at(i);
op["id"] = i;
op["asm_op"] = instr.to_string(*this);
op["asm_op"] = instr.to_string(labels);
if (func.has_basic_ops() && func.instr_starts_basic_op(i)) {
op["basic_op"] = func.get_basic_op_at_instr(i)->print(*this);
@ -608,7 +609,7 @@ std::string LinkedObjectFile::print_function_disassembly(Function& func,
}
auto& instr = func.instructions.at(i);
std::string line = " " + instr.to_string(*this);
std::string line = " " + instr.to_string(labels);
if (write_hex) {
if (line.length() < 60) {
@ -1053,14 +1054,15 @@ goos::Object LinkedObjectFile::to_form_script_object(int seg,
return result;
}
u32 LinkedObjectFile::read_data_word(const Label& label) {
u32 LinkedObjectFile::read_data_word(const DecompilerLabel& label) {
assert(0 == (label.offset % 4));
auto& word = words_by_seg.at(label.target_segment).at(label.offset / 4);
assert(word.kind == LinkedWord::Kind::PLAIN_DATA);
return word.data;
}
std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) const {
std::string LinkedObjectFile::get_goal_string_by_label(const DecompilerLabel& label) const {
assert(0 == (label.offset % 4));
return get_goal_string(label.target_segment, (label.offset / 4) - 1, false);
}
}
} // namespace decompiler

View File

@ -14,19 +14,11 @@
#include <unordered_map>
#include <unordered_set>
#include "LinkedWord.h"
#include "decompiler/Disasm/DecompilerLabel.h"
#include "decompiler/Function/Function.h"
#include "common/common_types.h"
/*!
* A label to a location in this object file.
* Doesn't have to be word aligned.
*/
struct Label {
std::string name;
int target_segment;
int offset; // in bytes
};
namespace decompiler {
/*!
* An object file's data with linking information included.
*/
@ -69,8 +61,8 @@ class LinkedObjectFile {
const std::string& extra_name);
std::string print_asm_function_disassembly(const std::string& my_name);
u32 read_data_word(const Label& label);
std::string get_goal_string_by_label(const Label& label) const;
u32 read_data_word(const DecompilerLabel& label);
std::string get_goal_string_by_label(const DecompilerLabel& label) const;
struct Stats {
uint32_t total_code_bytes = 0;
@ -131,7 +123,7 @@ class LinkedObjectFile {
std::vector<std::vector<LinkedWord>> words_by_seg;
std::vector<uint32_t> offset_of_data_zone_by_seg;
std::vector<std::vector<Function>> functions_by_seg;
std::vector<Label> labels;
std::vector<DecompilerLabel> labels;
private:
goos::Object to_form_script(int seg, int word_idx, std::vector<bool>& seen);
@ -142,5 +134,6 @@ class LinkedObjectFile {
std::vector<std::unordered_map<int, int>> label_per_seg_by_offset;
};
} // namespace decompiler
#endif // NEXT_LINKEDOBJECTFILE_H

View File

@ -11,6 +11,7 @@
#include "decompiler/util/DecompilerTypeSystem.h"
#include "common/link_types.h"
namespace decompiler {
// There are three link versions:
// V2 - not really in use anymore, but V4 will resue logic from it (and the game didn't rename the
// functions) V3 - optimized for code and small stuff. Supports segments (main, debug, top-level) V4
@ -819,3 +820,4 @@ LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data,
return result;
}
} // namespace decompiler

View File

@ -11,9 +11,11 @@
#include "LinkedObjectFile.h"
namespace decompiler {
class DecompilerTypeSystem;
LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data,
const std::string& name,
DecompilerTypeSystem& dts);
} // namespace decompiler
#endif // NEXT_LINKEDOBJECTFILECREATION_H

View File

@ -11,6 +11,7 @@
#include <cstdint>
#include <string>
namespace decompiler {
class LinkedWord {
public:
explicit LinkedWord(uint32_t _data) : data(_data) {}
@ -31,5 +32,6 @@ class LinkedWord {
int label_id = -1;
std::string symbol_name;
};
} // namespace decompiler
#endif // JAK2_DISASSEMBLER_LINKEDWORD_H

View File

@ -27,6 +27,7 @@
#include "common/log/log.h"
#include "third-party/json.hpp"
namespace decompiler {
namespace {
std::string strip_dgo_extension(const std::string& x) {
auto ext = x.substr(x.length() - 4, 4);
@ -715,7 +716,7 @@ void ObjectFileDB::process_tpages() {
100.f * float(success) / float(total), timer.getMs());
}
std::string ObjectFileDB::process_game_text() {
std::string ObjectFileDB::process_game_text_files() {
lg::info("- Finding game text...");
std::string text_string = "COMMON";
Timer timer;
@ -727,7 +728,7 @@ std::string ObjectFileDB::process_game_text() {
for_each_obj([&](ObjectFileData& data) {
if (data.name_in_dgo.substr(1) == text_string) {
file_count++;
auto statistics = ::process_game_text(data);
auto statistics = process_game_text(data);
string_count += statistics.total_text;
char_count += statistics.total_chars;
if (text_by_language_by_id.find(statistics.language) != text_by_language_by_id.end()) {
@ -743,7 +744,7 @@ std::string ObjectFileDB::process_game_text() {
return write_game_text(text_by_language_by_id);
}
std::string ObjectFileDB::process_game_count() {
std::string ObjectFileDB::process_game_count_file() {
lg::info("- Finding game count file...");
bool found = false;
std::string result;
@ -752,7 +753,7 @@ std::string ObjectFileDB::process_game_count() {
if (data.name_in_dgo == "game-cnt") {
assert(!found);
found = true;
result = write_game_count(::process_game_count(data));
result = write_game_count(process_game_count(data));
}
});
@ -1125,4 +1126,5 @@ void ObjectFileDB::dump_raw_objects(const std::string& output_dir) {
auto dest = output_dir + "/" + data.to_unique_name();
file_util::write_binary_file(dest, data.data.data(), data.data.size());
});
}
}
} // namespace decompiler

View File

@ -18,6 +18,7 @@
#include "decompiler/util/DecompilerTypeSystem.h"
#include "common/common_types.h"
namespace decompiler {
/*!
* A "record" which can be used to identify an object file.
*/
@ -67,8 +68,8 @@ class ObjectFileDB {
void analyze_functions();
void process_tpages();
void analyze_expressions();
std::string process_game_count();
std::string process_game_text();
std::string process_game_count_file();
std::string process_game_text_files();
ObjectFileData& lookup_record(const ObjectFileRecord& rec);
DecompilerTypeSystem dts;
@ -148,5 +149,6 @@ class ObjectFileDB {
uint32_t unique_obj_bytes = 0;
} stats;
};
} // namespace decompiler
#endif // JAK2_DISASSEMBLER_OBJECTFILEDB_H

View File

@ -2,6 +2,7 @@
#include "third-party/json.hpp"
#include "common/util/FileUtil.h"
namespace decompiler {
Config gConfig;
Config& get_config() {
@ -101,3 +102,4 @@ void set_config(const std::string& path_to_config_file) {
}
}
}
} // namespace decompiler

View File

@ -9,6 +9,7 @@
#include <unordered_map>
#include "decompiler/Disasm/Register.h"
namespace decompiler {
struct TypeHint {
Register reg;
std::string type_name;
@ -46,5 +47,6 @@ struct Config {
Config& get_config();
void set_config(const std::string& path_to_config_file);
} // namespace decompiler
#endif // JAK2_DISASSEMBLER_CONFIG_H

View File

@ -6,6 +6,7 @@
#include "common/common_types.h"
#include "decompiler/ObjectFile/LinkedWord.h"
namespace decompiler {
class LinkedWordReader {
public:
explicit LinkedWordReader(const std::vector<LinkedWord>* words) : m_words(words) {}
@ -37,4 +38,5 @@ class LinkedWordReader {
private:
const std::vector<LinkedWord>* m_words = nullptr;
u32 m_offset = 0;
};
};
} // namespace decompiler

View File

@ -10,6 +10,7 @@
#include "game/common/str_rpc_types.h"
#include "StrFileReader.h"
namespace decompiler {
StrFileReader::StrFileReader(const std::string& file_path) {
auto data = file_util::read_binary_file(file_path);
assert(data.size() >= SECTOR_SIZE); // must have at least the header sector
@ -178,4 +179,5 @@ std::string StrFileReader::get_full_name(const std::string& short_name) const {
assert(strcmp(iso_name_1, iso_name_2) == 0);
return result;
}
}
} // namespace decompiler

View File

@ -9,6 +9,7 @@
#include <vector>
#include "common/common_types.h"
namespace decompiler {
class StrFileReader {
public:
explicit StrFileReader(const std::string& file_path);
@ -19,3 +20,4 @@ class StrFileReader {
private:
std::vector<std::vector<u8>> m_chunks;
};
} // namespace decompiler

View File

@ -3,6 +3,7 @@
#include "game_count.h"
#include "LinkedWordReader.h"
namespace decompiler {
GameCountResult process_game_count(ObjectFileData& data) {
GameCountResult result;
auto& words = data.linked_data.words_by_seg.at(0);
@ -37,4 +38,5 @@ std::string write_game_count(const GameCountResult& result) {
str += fmt::format("(:unknown-1 {} :unknown-2 {})\n", result.mystery_data[0],
result.mystery_data[1]);
return str;
}
}
} // namespace decompiler

View File

@ -3,6 +3,7 @@
#include <vector>
#include "common/common_types.h"
namespace decompiler {
struct GameCountResult {
struct CountInfo {
s32 money_count;
@ -15,4 +16,5 @@ struct GameCountResult {
struct ObjectFileData;
GameCountResult process_game_count(ObjectFileData& data);
std::string write_game_count(const GameCountResult& result);
std::string write_game_count(const GameCountResult& result);
} // namespace decompiler

View File

@ -7,6 +7,7 @@
#include "decompiler/ObjectFile/ObjectFileDB.h"
#include "common/goos/Reader.h"
namespace decompiler {
namespace {
template <typename T>
T get_word(const LinkedWord& word) {
@ -17,7 +18,7 @@ T get_word(const LinkedWord& word) {
return result;
}
Label get_label(ObjectFileData& data, const LinkedWord& word) {
DecompilerLabel get_label(ObjectFileData& data, const LinkedWord& word) {
assert(word.kind == LinkedWord::PTR);
return data.linked_data.labels.at(word.label_id);
}
@ -159,4 +160,5 @@ std::string write_game_text(
}
return result;
}
}
} // namespace decompiler

View File

@ -2,6 +2,7 @@
#include <string>
#include <unordered_map>
namespace decompiler {
struct ObjectFileData;
struct GameTextResult {
@ -13,4 +14,5 @@ struct GameTextResult {
GameTextResult process_game_text(ObjectFileData& data);
std::string write_game_text(
const std::unordered_map<int, std::unordered_map<int, std::string>>& data);
const std::unordered_map<int, std::unordered_map<int, std::string>>& data);
} // namespace decompiler

View File

@ -20,6 +20,7 @@
#include "decompiler/ObjectFile/ObjectFileDB.h"
#include "third-party/fmt/core.h"
namespace decompiler {
namespace {
/*!
@ -314,7 +315,7 @@ struct Texture {
u32 packed_info_words[9];
};
Label name_label;
DecompilerLabel name_label;
std::string name;
u32 size;
float uv_dist;
@ -342,7 +343,7 @@ struct Texture {
* Unclear what the segments really are, maybe you could split up big tpages if needed?
*/
struct TexturePageSegment {
Label block_data_label;
DecompilerLabel block_data_label;
u32 size = 0xffffffff;
u32 dest = 0xffffffff;
std::string print_debug() const {
@ -379,10 +380,10 @@ struct FileInfo {
* GOAL texture-page type.
*/
struct TexturePage {
Label info_label;
DecompilerLabel info_label;
FileInfo info;
Label name_label;
DecompilerLabel name_label;
std::string name;
u32 id = 0xffffffff;
@ -392,7 +393,7 @@ struct TexturePage {
TexturePageSegment segments[3];
u32 pad[16] = {};
// data...
std::vector<Label> data;
std::vector<DecompilerLabel> data;
std::vector<Texture> textures;
std::string print_debug() const {
@ -423,7 +424,7 @@ struct TexturePage {
* Convert a label to the offset (words) in the object segment.
* If basic is set, gives you a pointer to the beginning of the memory, if the thing is a basic.
*/
int label_to_word_offset(Label l, bool basic) {
int label_to_word_offset(DecompilerLabel l, bool basic) {
assert((l.offset & 3) == 0);
int result = l.offset / 4;
if (basic) {
@ -441,7 +442,7 @@ bool is_type_tag(const LinkedWord& word, const std::string& type) {
return word.kind == LinkedWord::TYPE_PTR && word.symbol_name == type;
}
Label get_label(ObjectFileData& data, const LinkedWord& word) {
DecompilerLabel get_label(ObjectFileData& data, const LinkedWord& word) {
assert(word.kind == LinkedWord::PTR);
return data.linked_data.labels.at(word.label_id);
}
@ -905,3 +906,4 @@ TPageResultStats process_tpage(ObjectFileData& data) {
}
return stats;
}
} // namespace decompiler

View File

@ -1,5 +1,6 @@
#pragma once
namespace decompiler {
struct ObjectFileData;
struct TPageResultStats {
@ -7,4 +8,5 @@ struct TPageResultStats {
int successful_textures = 0;
};
TPageResultStats process_tpage(ObjectFileData& data);
TPageResultStats process_tpage(ObjectFileData& data);
} // namespace decompiler

View File

@ -7,6 +7,7 @@
#include "common/util/FileUtil.h"
int main(int argc, char** argv) {
using namespace decompiler;
lg::set_file(file_util::get_file_path({"log/decompiler.txt"}));
lg::set_file_level(lg::level::info);
lg::set_stdout_level(lg::level::info);
@ -67,7 +68,7 @@ int main(int argc, char** argv) {
}
if (get_config().process_game_text) {
auto result = db.process_game_text();
auto result = db.process_game_text_files();
file_util::write_text_file(file_util::get_file_path({"assets", "game_text.txt"}), result);
}
@ -76,7 +77,7 @@ int main(int argc, char** argv) {
}
if (get_config().process_game_count) {
auto result = db.process_game_count();
auto result = db.process_game_count_file();
file_util::write_text_file(file_util::get_file_path({"assets", "game_count.txt"}), result);
}

View File

@ -5,6 +5,7 @@
#include "common/log/log.h"
#include "TP_Type.h"
namespace decompiler {
DecompilerTypeSystem::DecompilerTypeSystem() {
ts.add_builtin_types();
}
@ -322,4 +323,5 @@ int DecompilerTypeSystem::get_format_arg_count(const TP_Type& type) {
} else {
return type.get_format_string_arg_count();
}
}
}
} // namespace decompiler

View File

@ -5,7 +5,8 @@
#include "decompiler/Disasm/Register.h"
#include "common/goos/Reader.h"
struct TP_Type;
namespace decompiler {
class TP_Type;
struct TypeState;
class DecompilerTypeSystem {
@ -54,5 +55,6 @@ class DecompilerTypeSystem {
private:
goos::Reader m_reader;
};
} // namespace decompiler
#endif // JAK_DECOMPILERTYPESYSTEM_H

View File

@ -1,5 +1,6 @@
#pragma once
namespace decompiler {
template <typename T>
struct MatchParam {
MatchParam() { is_wildcard = true; }
@ -15,4 +16,5 @@ struct MatchParam {
bool operator==(const T& other) const { return is_wildcard || (value == other); }
bool operator!=(const T& other) const { return !(*this == other); }
};
};
} // namespace decompiler

View File

@ -1,6 +1,7 @@
#include "TP_Type.h"
#include "third-party/fmt/core.h"
namespace decompiler {
std::string TypeState::print_gpr_masked(u32 mask) const {
std::string result;
for (int i = 0; i < 32; i++) {
@ -117,3 +118,4 @@ TypeSpec TP_Type::typespec() const {
assert(false);
}
}
} // namespace decompiler

View File

@ -5,83 +5,7 @@
#include "common/common_types.h"
#include "decompiler/Disasm/Register.h"
// struct TP_Type {
// enum Kind {
// OBJECT_OF_TYPE,
// TYPE_OBJECT,
// FALSE,
// NONE,
// PRODUCT,
// OBJ_PLUS_PRODUCT,
// PARTIAL_METHOD_TABLE_ACCESS, // type + method_number * 4
// METHOD_NEW_OF_OBJECT,
// STRING
// } kind = NONE;
// // in the case that we are type_object, just store the type name in a single arg ts.
// TypeSpec ts;
// int multiplier;
// std::string str_data;
//
// TP_Type() = default;
// explicit TP_Type(const TypeSpec& _ts) {
// kind = OBJECT_OF_TYPE;
// ts = _ts;
// }
//
// TP_Type simplify() const;
// std::string print() const;
//
// bool is_object_of_type() const { return kind == TYPE_OBJECT || ts == TypeSpec("type"); }
//
// TypeSpec as_typespec() const {
// switch (kind) {
// case OBJECT_OF_TYPE:
// return ts;
// case TYPE_OBJECT:
// return TypeSpec("type");
// case FALSE:
// return TypeSpec("symbol");
// case NONE:
// return TypeSpec("none");
// case PRODUCT:
// case METHOD_NEW_OF_OBJECT:
// return ts;
// default:
// assert(false);
// }
// }
//
// static TP_Type make_partial_method_table_access(TypeSpec ts) {
// TP_Type result;
// result.kind = PARTIAL_METHOD_TABLE_ACCESS;
// result.ts = std::move(ts);
// return result;
// }
//
// static TP_Type make_type_object(const std::string& name) {
// TP_Type result;
// result.kind = TYPE_OBJECT;
// result.ts = TypeSpec(name);
// return result;
// }
//
// static TP_Type make_string_object(const std::string& str) {
// TP_Type result;
// result.kind = STRING;
// result.ts = TypeSpec("string");
// result.str_data = str;
// return result;
// }
//
// static TP_Type make_none() {
// TP_Type result;
// result.kind = NONE;
// return result;
// }
//
// bool operator==(const TP_Type& other) const;
//};
namespace decompiler {
/*!
* A TP_Type is a specialized typespec used in the type propagation algorithm.
* It is basically a normal typespec plus some optional information.
@ -267,4 +191,5 @@ struct TypeState {
assert(false);
}
}
};
};
} // namespace decompiler

View File

@ -18,12 +18,14 @@ add_executable(goalc-test
test_pretty_print.cpp
test_zydis.cpp
goalc/test_goal_kernel.cpp
decompiler/test_AtomicOpBuilder.cpp
decompiler/test_InstructionParser.cpp
${GOALC_TEST_FRAMEWORK_SOURCES}
${GOALC_TEST_CASES})
enable_testing()
target_link_libraries(goalc-test common runtime compiler gtest Zydis)
target_link_libraries(goalc-test common runtime compiler gtest decomp Zydis)
IF (WIN32)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
@ -36,5 +38,5 @@ if(UNIX AND CMAKE_COMPILER_IS_GNUCXX AND CODE_COVERAGE)
setup_target_for_coverage_lcov(NAME goalc-test_coverage
EXECUTABLE goalc-test --gtest_color=yes
DEPENDENCIES goalc-test
EXCLUDE "third-party/*" "/usr/include/*" "decompiler/*")
EXCLUDE "third-party/*" "/usr/include/*")
endif()

View File

@ -0,0 +1,57 @@
#include "gtest/gtest.h"
#include "decompiler/IR2/AtomicOp.h"
#include "decompiler/IR2/AtomicOpBuilder.h"
#include "decompiler/Disasm/InstructionParser.h"
using namespace decompiler;
TEST(DecompilerAtomicOpBuilder, Example) {
InstructionParser parser;
// some MIPS instructions. Can be a sequence of instructions, possibly with labels.
std::string input_program =
"and v0, v1, a3\n"
"and a1, a2, a2";
// convert to Instructions:
ParsedProgram prg = parser.parse_program(input_program);
// this verifies we can convert from a string to an instruction, and back to a string again.
// the instruction printer will add two leading spaces and a newline.
EXPECT_EQ(prg.print(), " and v0, v1, a3\n and a1, a2, a2\n");
// next, set up a test environment for the conversion. The FunctionAtomicOps will hold
// the result of the conversion
FunctionAtomicOps container;
// treat the entire program as a single basic block, and convert!
convert_block_to_atomic_ops(0, prg.instructions.begin(), prg.instructions.end(), prg.labels,
&container);
// we should get back a single and operation:
EXPECT_EQ(2, container.ops.size());
// for now, we create an empty environment. The environment will be used in the future to
// rename register to variables, but for now, we just leave it empty and the printing will
// use register names
Env env;
// check the we get the right result:
EXPECT_EQ(container.ops.at(0)->to_string(prg.labels, &env), "(set! v0 (logand v1 a3))");
EXPECT_EQ(container.ops.at(1)->to_string(prg.labels, &env), "(set! a1 (logand a2 a2))");
// check that the registers read/written are identified for the first op (and v0, v1, a3)
auto& first_op = container.ops.at(0);
// two registers read (v1 and a3)
EXPECT_EQ(first_op->read_regs().size(), 2);
// one register written (v0)
EXPECT_EQ(first_op->write_regs().size(), 1);
// no clobber registers (register which ends up with a garbage value in it)
EXPECT_EQ(first_op->clobber_regs().size(), 0);
// the ordering of the two read registers doesn't matter. It happens to be in the same order
// as the opcode here, but it may not always be the case.
EXPECT_EQ(first_op->read_regs().at(0).to_string(), "v1");
EXPECT_EQ(first_op->read_regs().at(1).to_string(), "a3");
EXPECT_EQ(first_op->write_regs().at(0).to_string(), "v0");
}

View File

@ -0,0 +1,42 @@
#include "gtest/gtest.h"
#include "decompiler/Disasm/InstructionParser.h"
#include "decompiler/Disasm/DecompilerLabel.h"
using namespace decompiler;
TEST(DecompilerInstructionParser, SimpleTest) {
InstructionParser parser;
std::vector<std::string> ops = {"daddu a0, a1, a2", "addu r0, t7, s6", "daddiu r0, at, #t",
"addiu t2, t3, 12", "slti v1, a3, -23", "sltiu s3, s4, 3",
"sb v1, 12(a1)", "sh s7, sym(s6)", "sd s2, -12(s2)",
"lw s3, 12(s7)", "lwu t2, sym(s7)", "add.s f0, f1, f2",
"beq r0, r0, L312"};
std::vector<DecompilerLabel> labels;
labels.push_back(DecompilerLabel{"L311", 1, 2});
labels.push_back(DecompilerLabel{"L312", 1, 2});
labels.push_back(DecompilerLabel{"L313", 1, 2});
for (auto& op : ops) {
auto instr = parser.parse_single_instruction(op, labels);
EXPECT_EQ(op, instr.to_string(labels));
}
}
TEST(DecompilerInstructionParser, ProgramNoLabels) {
InstructionParser parser;
std::string program = " daddu a0, a1, a2\n sh s7, sym(s6)\n sb v1, 12(a1)\n";
auto result = parser.parse_program(program);
EXPECT_EQ(result.print(), program);
}
TEST(DecompilerInstructionParser, ProgramLabels) {
InstructionParser parser;
std::string program =
"L100:\n"
" daddu v0, v1, v0\n"
" beq at, r0, L102\n"
"L102:\n"
" jr ra\n";
auto result = parser.parse_program(program);
EXPECT_EQ(result.print(), program);
}

View File

@ -6,4 +6,4 @@ endif (UNIX)
include_directories(../)
add_library(fmt SHARED format.cc)
target_compile_definitions(fmt PRIVATE FMT_EXPORT INTERFACE FMT_SHARED)
target_compile_definitions(fmt PRIVATE FMT_EXPORT INTERFACE FMT_SHARED PUBLIC FMT_SHARED)

View File

@ -1,2 +1,3 @@
add_library(minilzo
SHARED
minilzo.c)