[Decompiler] Experimental Expression Stack (#157)

* begin framework for expressions

* more

* clean up warnings

* small fixes

* update

* wip type prop improvements

* see if nasm works

* fix format strings
This commit is contained in:
water111 2020-12-17 15:48:07 -05:00 committed by GitHub
parent ba919a069c
commit cae3871730
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 2064 additions and 631 deletions

View File

@ -51,6 +51,7 @@ class TypeSpec {
size_t arg_count() const { return m_arguments.size(); }
const TypeSpec& get_arg(int idx) const { return m_arguments.at(idx); }
TypeSpec& get_arg(int idx) { return m_arguments.at(idx); }
const TypeSpec& last_arg() const {
assert(!m_arguments.empty());
return m_arguments.back();

View File

@ -1095,6 +1095,11 @@ std::string TypeSystem::lca_base(const std::string& a, const std::string& b) {
*/
TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b) {
auto result = make_typespec(lca_base(a.base_type(), b.base_type()));
if (result == TypeSpec("function") && a.m_arguments.size() == 2 && b.m_arguments.size() == 2 &&
(a.m_arguments.at(0) == TypeSpec("_varargs_") ||
b.m_arguments.at(0) == TypeSpec("_varargs_"))) {
return TypeSpec("function");
}
if (!a.m_arguments.empty() && !b.m_arguments.empty() &&
a.m_arguments.size() == b.m_arguments.size()) {
// recursively add arguments
@ -1184,10 +1189,21 @@ bool TypeSystem::reverse_deref(const ReverseDerefInputInfo& input,
assert(di.mem_deref);
if (offset_into_elt == 0) {
if (input.mem_deref) {
path->push_back(token);
*addr_of = false;
*result_type = base_type;
return true;
// todo - this is a hack to let quadword loads always succeed because we don't support it
// correctly at this point.
if (input.load_size == 16 ||
(di.load_size == input.load_size && di.sign_extend == input.sign_extend)) {
path->push_back(token);
*addr_of = false;
*result_type = base_type;
return true;
} else {
if (debug_reverse_deref) {
fmt::print("load size {} {}, sext {} {}, input {}\n", di.load_size, input.load_size,
di.sign_extend, input.sign_extend, input.input_type.print().c_str());
}
return false;
}
} else {
path->push_back(token);
*addr_of = true;

View File

@ -46,6 +46,16 @@ struct ReverseDerefInfo {
enum Kind { INDEX, FIELD } kind;
std::string name;
int index;
std::string print() const {
switch (kind) {
case INDEX:
return std::to_string(index);
case FIELD:
return name;
default:
assert(false);
}
}
};
TypeSpec result_type;

View File

@ -23,7 +23,11 @@ add_executable(decompiler
data/game_count.cpp
Function/TypeAnalysis.cpp
IR/IR_TypeAnalysis.cpp
util/TP_Type.cpp)
util/TP_Type.cpp
Function/RegUsage.cpp
Function/ExpressionBuilder.cpp
Function/ExpressionStack.cpp
IR/IR_ExpressionStack.cpp)
target_link_libraries(decompiler
goos

View File

@ -109,6 +109,26 @@ Register::Register(Reg::RegisterKind kind, uint32_t num) {
}
}
Register::Register(const std::string& name) {
// first try gprs,
for (int i = 0; i < Reg::MAX_GPR; i++) {
if (name == gpr_names[i]) {
id = (Reg::GPR << 8) | i;
return;
}
}
// next fprs
for (int i = 0; i < 32; i++) {
if (name == fpr_names[i]) {
id = (Reg::FPR << 8) | i;
return;
}
}
throw std::runtime_error("Unknown register name: " + name);
}
/*!
* Convert to string. The register must be valid.
*/

View File

@ -127,6 +127,7 @@ class Register {
public:
Register() = default;
Register(Reg::RegisterKind kind, uint32_t num);
Register(const std::string& name);
const char* to_charp() const;
std::string to_string() const;
Reg::RegisterKind get_kind() const;

View File

@ -10,13 +10,17 @@
class LinkedObjectFile;
class Function;
using RegSet = std::unordered_set<Register, Register::hash>;
struct BasicBlock {
int start_word;
int end_word;
TypeState init_types;
// [start, end)
int start_basic_op = -1;
int end_basic_op = -1;
int basic_op_size() const { return end_basic_op - start_basic_op; }
std::string label_name;
@ -24,6 +28,15 @@ struct BasicBlock {
int succ_ft = -1;
int succ_branch = -1;
std::vector<RegSet> live, dead;
RegSet use, defs;
RegSet input, output;
bool op_has_reg_live_out(int basic_op_idx, Register reg) {
auto& lv = live.at(basic_op_idx - start_basic_op);
return lv.find(reg) != lv.end();
}
BasicBlock(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {}
};

View File

@ -1850,7 +1850,7 @@ std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int se
}
if (!cfg->is_fully_resolved()) {
func.warnings += "Failed to fully resolve CFG\n";
func.warnings += ";; Failed to fully resolve CFG\n";
}
return cfg;

View File

@ -0,0 +1,53 @@
#include "Function.h"
#include "decompiler/IR/IR.h"
#include "ExpressionStack.h"
namespace {
bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) {
ExpressionStack stack;
// todo - this might need to run multiple times?
for (auto& op : begin->forms) {
op->expression_stack(stack, file);
}
begin->forms = stack.get_result();
return true;
}
} // namespace
bool Function::build_expression(LinkedObjectFile& file) {
if (!ir) {
return false;
}
try {
// first we get a list of begins, which are where we can build up expressions.
// we want to start with innermost begins because we'll probably need to do some fixing up
// or more complicated analysis to do as good as possible on outer begins.
auto all_children = ir->get_all_ir(file);
std::vector<IR_Begin*> all_begins;
for (auto i = all_children.size(); i-- > 0;) {
auto as_begin = dynamic_cast<IR_Begin*>(all_children.at(i).get());
if (as_begin) {
all_begins.push_back(as_begin);
}
}
// the top level may also be a begin
auto as_begin = dynamic_cast<IR_Begin*>(ir.get());
if (as_begin) {
all_begins.push_back(as_begin);
}
// turn each begin into an expression
for (auto b : all_begins) {
if (!expressionize_begin(b, file)) {
return false;
}
}
} catch (std::exception& e) {
printf("build_expression failed on %s due to %s\n", guessed_name.to_string().c_str(), e.what());
return false;
}
return true;
}

View File

@ -0,0 +1,80 @@
#include "third-party/fmt/core.h"
#include "ExpressionStack.h"
std::string ExpressionStack::StackEntry::print(LinkedObjectFile& file) {
return fmt::format("d: {} {} <- {}", display, destination.to_charp(), source->print(file));
}
std::string ExpressionStack::print(LinkedObjectFile& file) {
std::string result;
for (auto& x : m_stack) {
result += x.print(file);
result += '\n';
}
return result;
}
void ExpressionStack::set(Register reg, std::shared_ptr<IR> value) {
StackEntry entry;
entry.display = true; // by default, we should display everything!
entry.destination = reg;
entry.source = std::move(value);
m_stack.push_back(entry);
}
bool ExpressionStack::is_single_expression() {
int count = 0;
for (auto& e : m_stack) {
if (e.display) {
count++;
}
}
return count == 1;
}
std::shared_ptr<IR> ExpressionStack::get(Register reg) {
// see if the stack top is this register...
if (!display_stack_empty()) {
auto& top = get_display_stack_top();
if (top.destination == reg) {
// yep. We can compact!
top.display = false;
return top.source;
}
}
return std::make_shared<IR_Register>(reg, -1);
}
std::vector<std::shared_ptr<IR>> ExpressionStack::get_result() {
std::vector<std::shared_ptr<IR>> result;
for (auto& e : m_stack) {
if (!e.display) {
continue;
}
auto dst_reg = std::make_shared<IR_Register>(e.destination, -1);
auto op = std::make_shared<IR_Set>(IR_Set::EXPR, dst_reg, e.source);
result.push_back(op);
}
return result;
}
bool ExpressionStack::display_stack_empty() {
for (auto& e : m_stack) {
if (e.display) {
return false;
}
}
return true;
}
ExpressionStack::StackEntry& ExpressionStack::get_display_stack_top() {
for (size_t i = m_stack.size(); i-- > 0;) {
auto& entry = m_stack.at(i);
if (entry.display) {
return entry;
}
}
assert(false);
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <vector>
#include "decompiler/IR/IR.h"
#include "decompiler/Disasm/Register.h"
#include "decompiler/util/TP_Type.h"
class ExpressionStack {
public:
ExpressionStack() = default;
void set(Register reg, std::shared_ptr<IR> value);
std::shared_ptr<IR> get(Register reg);
bool is_single_expression();
std::string print(LinkedObjectFile& file);
std::vector<std::shared_ptr<IR>> get_result();
private:
struct StackEntry {
bool display = true; // should this appear in the output?
Register destination; // what register we are setting
std::shared_ptr<IR> source; // the value we are setting the register to.
// TP_Type type;
std::string print(LinkedObjectFile& file);
};
std::vector<StackEntry> m_stack;
bool display_stack_empty();
StackEntry& get_display_stack_top();
};

View File

@ -71,7 +71,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
if (instr.kind == InstructionKind::SW && instr.get_src(0).get_reg() == make_gpr(Reg::SP)) {
printf("[Warning] %s Suspected ASM function based on this instruction in prologue: %s\n",
guessed_name.to_string().c_str(), instr.to_string(file).c_str());
warnings += "Flagged as ASM function because of " + instr.to_string(file) + "\n";
warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n";
suspected_asm = true;
return;
}
@ -94,7 +94,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
if (instr.kind == InstructionKind::SD && instr.get_src(0).get_reg() == make_gpr(Reg::S7)) {
spdlog::warn("{} Suspected ASM function based on this instruction in prologue: {}\n",
guessed_name.to_string(), instr.to_string(file));
warnings += "Flagged as ASM function because of " + instr.to_string(file) + "\n";
warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n";
suspected_asm = true;
return;
}
@ -134,7 +134,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
"[Warning] %s Stack Zeroing Detected in Function::analyze_prologue, prologue may be "
"wrong\n",
guessed_name.to_string().c_str());
warnings += "Stack Zeroing Detected, prologue may be wrong\n";
warnings += ";; Stack Zeroing Detected, prologue may be wrong\n";
expect_nothing_after_gprs = true;
break;
}
@ -146,7 +146,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
printf(
"[Warning] %s Suspected ASM function because register $a0 was stored on the stack!\n",
guessed_name.to_string().c_str());
warnings += "a0 on stack detected, flagging as asm\n";
warnings += ";; a0 on stack detected, flagging as asm\n";
return;
}
@ -165,7 +165,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
guessed_name.to_string().c_str(),
instructions.at(idx + i).to_string(file).c_str());
warnings += "Suspected asm function due to stack store: " +
warnings += ";; Suspected asm function due to stack store: " +
instructions.at(idx + i).to_string(file) + "\n";
return;
}
@ -195,7 +195,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) {
printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n",
guessed_name.to_string().c_str(),
instructions.at(idx + i).to_string(file).c_str());
warnings += "Suspected asm function due to stack store: " +
warnings += ";; Suspected asm function due to stack store: " +
instructions.at(idx + i).to_string(file) + "\n";
return;
}
@ -356,7 +356,7 @@ void Function::check_epilogue(const LinkedObjectFile& file) {
"[Warning] %s Double Return Epilogue Hack! This is probably an ASM function in "
"disguise\n",
guessed_name.to_string().c_str());
warnings += "Double Return Epilogue - this is probably an ASM function\n";
warnings += ";; Double Return Epilogue - this is probably an ASM function\n";
}
// delay slot should be daddiu sp, sp, offset
assert(is_gpr_2_imm_int(instructions.at(idx), InstructionKind::DADDIU, make_gpr(Reg::SP),

View File

@ -7,10 +7,13 @@
#include <vector>
#include <unordered_map>
#include <stdexcept>
#include <unordered_set>
#include "decompiler/Disasm/Instruction.h"
#include "decompiler/Disasm/Register.h"
#include "BasicBlocks.h"
#include "CfgVtx.h"
#include "common/type_system/TypeSpec.h"
#include "decompiler/config.h"
class DecompilerTypeSystem;
class IR_Atomic;
@ -79,7 +82,10 @@ class Function {
int get_reginfo_basic_op_count();
bool run_type_analysis(const TypeSpec& my_type,
DecompilerTypeSystem& dts,
LinkedObjectFile& file);
LinkedObjectFile& file,
const std::unordered_map<int, std::vector<TypeHint>>& hints);
void run_reg_usage();
bool build_expression(LinkedObjectFile& file);
BlockTopologicalSort bb_topo_sort();
TypeSpec type;

View File

@ -0,0 +1,165 @@
#include "Function.h"
#include "decompiler/IR/IR.h"
namespace {
bool in_set(RegSet& set, const Register& obj) {
return set.find(obj) != set.end();
}
void phase1(Function& f, BasicBlock& block) {
for (int i = block.end_basic_op; i-- > block.start_basic_op;) {
auto& instr = f.basic_ops.at(i);
auto& lv = block.live.at(i - block.start_basic_op);
auto& dd = block.dead.at(i - block.start_basic_op);
// make all read live out
auto read = instr->read_regs;
lv.clear();
for (auto& x : read) {
lv.insert(x);
}
// kill things which are overwritten
dd.clear();
auto write = instr->write_regs;
for (auto& x : write) {
if (!in_set(lv, x)) {
dd.insert(x);
}
}
// b.use = i.liveout
RegSet use_old = block.use;
block.use.clear();
for (auto& x : lv) {
block.use.insert(x);
}
// | (bu.use & !i.dead)
for (auto& x : use_old) {
if (!in_set(dd, x)) {
block.use.insert(x);
}
}
// b.defs = i.dead
RegSet defs_old = block.defs;
block.defs.clear();
for (auto& x : dd) {
block.defs.insert(x);
}
// | b.defs & !i.lv
for (auto& x : defs_old) {
if (!in_set(lv, x)) {
block.defs.insert(x);
}
}
}
}
bool phase2(std::vector<BasicBlock>& blocks, BasicBlock& block) {
bool changed = false;
auto out = block.defs;
for (auto s : {block.succ_branch, block.succ_ft}) {
if (s == -1) {
continue;
}
for (auto in : blocks.at(s).input) {
out.insert(in);
}
}
RegSet in = block.use;
for (auto x : out) {
if (!in_set(block.defs, x)) {
in.insert(x);
}
}
if (in != block.input || out != block.output) {
changed = true;
block.input = in;
block.output = out;
}
return changed;
}
void phase3(std::vector<BasicBlock>& blocks, BasicBlock& block) {
RegSet live_local;
for (auto s : {block.succ_branch, block.succ_ft}) {
if (s == -1) {
continue;
}
for (auto i : blocks.at(s).input) {
live_local.insert(i);
}
}
for (int i = block.end_basic_op; i-- > block.start_basic_op;) {
auto& lv = block.live.at(i - block.start_basic_op);
auto& dd = block.dead.at(i - block.start_basic_op);
RegSet new_live = lv;
for (auto x : live_local) {
if (!in_set(dd, x)) {
new_live.insert(x);
}
}
lv = live_local;
live_local = new_live;
}
}
} // namespace
/*!
* Analyze the function use of registers to determine which are live where.
*/
void Function::run_reg_usage() {
// phase 1
for (auto& block : basic_blocks) {
block.live.resize(block.basic_op_size());
block.dead.resize(block.basic_op_size());
phase1(*this, block);
}
// phase 2
bool changed = false;
do {
changed = false;
for (auto& block : basic_blocks) {
if (phase2(basic_blocks, block)) {
changed = true;
}
}
} while (changed);
// phase 3
for (auto& block : basic_blocks) {
phase3(basic_blocks, block);
}
// we want to know if an op "consumes" a register.
// this means that the value of the register coming in to the operation is:
// A. read by the operation.
// B. no longer read after the operation.
for (auto& block : basic_blocks) {
for (int i = block.start_basic_op; i < block.end_basic_op; i++) {
auto& op = basic_ops.at(i);
// look at each register that we read
for (auto reg : op->read_regs) {
if (!block.op_has_reg_live_out(i, reg)) {
// if the register is not live out, we definitely consume it.
op->consumed.insert(reg);
} else {
// it's live out... but it could be a new value.
for (auto wr : op->write_regs) {
if (wr == reg) {
op->consumed.insert(reg);
}
}
}
}
}
}
}

View File

@ -9,20 +9,41 @@ TypeState construct_initial_typestate(const TypeSpec& f_ts) {
int goal_args[] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::T0, Reg::T1, Reg::T2, Reg::T3};
assert(f_ts.base_type() == "function");
assert(f_ts.arg_count() >= 1);
assert(f_ts.arg_count() <= 8);
assert(f_ts.arg_count() <= 8 + 1); // 8 args + 1 return.
for (int i = 0; i < int(f_ts.arg_count()) - 1; i++) {
auto reg_id = goal_args[i];
auto reg_type = f_ts.get_arg(i);
result.gpr_types[reg_id].ts = reg_type;
result.gpr_types[reg_id].kind = TP_Type::OBJECT_OF_TYPE;
result.gpr_types[reg_id] = TP_Type::make_from_typespec(reg_type);
}
return result;
}
void apply_hints(const std::vector<TypeHint>& hints, TypeState* state, DecompilerTypeSystem& dts) {
for (auto& hint : hints) {
try {
state->get(hint.reg) = TP_Type::make_from_typespec(dts.parse_type_spec(hint.type_name));
} catch (std::exception& e) {
printf("failed to parse hint: %s\n", e.what());
assert(false);
}
}
}
void try_apply_hints(int idx,
const std::unordered_map<int, std::vector<TypeHint>>& hints,
TypeState* state,
DecompilerTypeSystem& dts) {
auto kv = hints.find(idx);
if (kv != hints.end()) {
apply_hints(kv->second, state, dts);
}
}
} // namespace
bool Function::run_type_analysis(const TypeSpec& my_type,
DecompilerTypeSystem& dts,
LinkedObjectFile& file) {
LinkedObjectFile& file,
const std::unordered_map<int, std::vector<TypeHint>>& hints) {
// STEP 0 - setup settings
dts.type_prop_settings.reset();
if (get_config().pair_functions_by_name.find(guessed_name.to_string()) !=
@ -48,6 +69,8 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
// STEP 3 - initialize type state.
basic_blocks.at(0).init_types = construct_initial_typestate(my_type);
// and add hints:
try_apply_hints(0, hints, &basic_blocks.at(0).init_types, dts);
// STEP 2 - loop while types are changing
bool run_again = true;
@ -60,13 +83,18 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
for (int op_id = block.start_basic_op; op_id < block.end_basic_op; op_id++) {
auto& op = basic_ops.at(op_id);
// apply type hints only if we are not the first op.
if (op_id != block.start_basic_op) {
try_apply_hints(op_id, hints, init_types, dts);
}
// while the implementation of propagate_types is in progress, it may throw
// for unimplemented cases. Eventually this try/catch should be removed.
try {
op->propagate_types(*init_types, file, dts);
} catch (std::runtime_error& e) {
fmt::print("Type prop fail: {}\n\n\n", e.what());
warnings += "Type prop attempted and failed. ";
fmt::print("Type prop fail on {}: {}\n", guessed_name.to_string(), e.what());
warnings += ";; Type prop attempted and failed.\n";
return false;
}
@ -80,6 +108,9 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
for (auto succ_block_id : {block.succ_ft, block.succ_branch}) {
if (succ_block_id != -1) {
auto& succ_block = basic_blocks.at(succ_block_id);
// apply hint
try_apply_hints(succ_block.start_basic_op, hints, init_types, dts);
// set types to LCA (current, new)
if (dts.tp_lca(&succ_block.init_types, *init_types)) {
// if something changed, run again!
@ -91,9 +122,9 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
}
auto last_op = basic_ops.back();
auto last_type = last_op->end_types.get(Register(Reg::GPR, Reg::V0)).as_typespec();
auto last_type = last_op->end_types.get(Register(Reg::GPR, Reg::V0)).typespec();
if (last_type != my_type.last_arg()) {
warnings += fmt::format("return type mismatch {} vs {}. ", last_type.print(),
warnings += fmt::format(";; return type mismatch {} vs {}. ", last_type.print(),
my_type.last_arg().print());
}

View File

@ -1403,6 +1403,12 @@ std::shared_ptr<IR_Atomic> try_beq(Instruction& instr, Instruction& next_instr,
instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), false);
op->update_reginfo_self(0, 1, 0);
return op;
} else if (instr.kind == InstructionKind::BEQ && instr.get_src(1).is_reg(make_gpr(Reg::R0))) {
auto op = std::make_shared<IR_Branch_Atomic>(
Condition(Condition::ZERO, make_reg(instr.get_src(0).get_reg(), idx), nullptr, nullptr),
instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), false);
op->update_reginfo_self(0, 1, 0);
return op;
} else if (instr.kind == InstructionKind::BEQ) {
auto op = std::make_shared<IR_Branch_Atomic>(
Condition(Condition::EQUAL, make_reg(instr.get_src(0).get_reg(), idx),
@ -1532,7 +1538,7 @@ std::shared_ptr<IR_Atomic> try_slt(Instruction& i0, Instruction& i1, int idx) {
result->clobber_regs.push_back(temp);
result->write_regs.push_back(left);
result->read_regs.push_back(right);
result->read_regs.push_back(right);
result->read_regs.push_back(left);
result->reg_info_set = true;
return result;
}
@ -1547,7 +1553,7 @@ std::shared_ptr<IR_Atomic> try_slt(Instruction& i0, Instruction& i1, int idx) {
result->clobber_regs.push_back(temp);
result->write_regs.push_back(left);
result->read_regs.push_back(right);
result->read_regs.push_back(right);
result->read_regs.push_back(left);
result->reg_info_set = true;
return result;
}
@ -1646,12 +1652,22 @@ std::shared_ptr<IR_Atomic> try_slt(Instruction& i0, Instruction& i1, Instruction
if (i2.get_src(1).get_reg() != clobber_reg) {
return nullptr; // TODO!
}
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::LESS_THAN_SIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
if (src1_reg == make_gpr(Reg::R0)) {
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::LESS_THAN_ZERO, make_reg(src0_reg, idx),
nullptr, make_reg(clobber_reg, idx))));
op->update_reginfo_self<IR_Compare>(1, 1, 1);
return op;
} else {
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(
Condition::LESS_THAN_SIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
}
} else if (i0.kind == InstructionKind::SLT && i1.kind == InstructionKind::BEQ) {
auto clobber_reg = i0.get_dst(0).get_reg();
auto src0_reg = i0.get_src(0).get_reg();
@ -2435,7 +2451,7 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec
func->add_basic_op(std::make_shared<IR_Failed_Atomic>(), instr, instr + 1);
} else {
if (!func->contains_asm_ops && dynamic_cast<IR_AsmOp*>(result.get())) {
func->warnings += "Function contains asm op";
func->warnings += ";; Function contains asm op\n";
func->contains_asm_ops = true;
}

View File

@ -623,7 +623,8 @@ std::shared_ptr<IR> try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCirc
auto b0_ptr = cfg_to_ir(f, file, b0);
auto b0_ir = dynamic_cast<IR_Begin*>(b0_ptr.get());
auto branch = dynamic_cast<IR_Branch*>(b0_ir->forms.back().get());
auto branch_sp = b0_ir->forms.back();
auto branch = dynamic_cast<IR_Branch*>(branch_sp.get());
if (!branch) {
return nullptr;
}
@ -647,7 +648,10 @@ std::shared_ptr<IR> try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCirc
b0_ir->forms.pop_back();
// add the ash
b0_ir->forms.push_back(std::make_shared<IR_Set>(
IR_Set::REG_64, output, std::make_shared<IR_IntMath1>(IR_IntMath1::ABS, input)));
IR_Set::REG_64, output,
std::make_shared<IR_IntMath1>(IR_IntMath1::ABS, input,
std::dynamic_pointer_cast<IR_Atomic>(branch_sp))));
return b0_ptr;
}
@ -682,7 +686,8 @@ std::shared_ptr<IR> try_sc_as_ash(Function& f, LinkedObjectFile& file, ShortCirc
return nullptr;
}
auto branch = dynamic_cast<IR_Branch*>(b0_ir->forms.back().get());
auto branch_sp = b0_ir->forms.back();
auto branch = dynamic_cast<IR_Branch*>(branch_sp.get());
if (!branch || b1_ir->forms.size() != 2) {
return nullptr;
}
@ -752,7 +757,10 @@ std::shared_ptr<IR> try_sc_as_ash(Function& f, LinkedObjectFile& file, ShortCirc
// add the ash
b0_ir->forms.push_back(std::make_shared<IR_Set>(
IR_Set::REG_64, dest_ir,
std::make_shared<IR_Ash>(shift_ir, value_ir, clobber_ir, is_arith)));
std::make_shared<IR_Ash>(shift_ir, value_ir, clobber_ir,
std::dynamic_pointer_cast<IR_Branch_Atomic>(branch_sp),
std::dynamic_pointer_cast<IR_Atomic>(dsubu_candidate),
std::dynamic_pointer_cast<IR_Atomic>(dsrav_candidate), is_arith)));
return b0_ptr;
}
@ -1145,7 +1153,6 @@ std::shared_ptr<IR> build_cfg_ir(Function& function,
auto all_children = ir->get_all_ir(file);
all_children.push_back(ir);
for (auto& child : all_children) {
// printf("child is %s\n", child->print(file).c_str());
auto as_begin = dynamic_cast<IR_Begin*>(child.get());
if (as_begin) {
clean_up_while_loops(as_begin, file);

View File

@ -3,6 +3,9 @@
#include "common/goos/PrettyPrinter.h"
#include "third-party/fmt/core.h"
// hack to print out reverse deref paths on loads to help with debugging load stuff.
bool enable_hack_load_path_print = false;
std::vector<std::shared_ptr<IR>> IR::get_all_ir(LinkedObjectFile& file) const {
(void)file;
std::vector<std::shared_ptr<IR>> result;
@ -94,6 +97,14 @@ std::string IR_Atomic::print_with_types(const TypeState& init_types,
result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask),
end_types.print_gpr_masked(write_mask));
if (!consumed.empty()) {
result += "c:";
for (auto x : consumed) {
result += " ";
result += x.to_charp();
}
}
return result;
}
@ -377,15 +388,6 @@ void IR_EmptyPair::get_children(std::vector<std::shared_ptr<IR>>* output) const
(void)output;
}
TP_Type IR_EmptyPair::get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) {
(void)input;
(void)file;
(void)dts;
return TP_Type(TypeSpec("pair"));
}
goos::Object IR_StaticAddress::to_form(const LinkedObjectFile& file) const {
// return pretty_print::build_list(pretty_print::to_symbol("&"), file.get_label_name(label_id));
return pretty_print::to_symbol(file.get_label_name(label_id));
@ -396,6 +398,19 @@ void IR_StaticAddress::get_children(std::vector<std::shared_ptr<IR>>* output) co
}
goos::Object IR_Load::to_form(const LinkedObjectFile& file) const {
if (load_path_set && enable_hack_load_path_print) {
std::vector<goos::Object> list;
if (load_path_addr_of) {
list.push_back(pretty_print::to_symbol("&->"));
} else {
list.push_back(pretty_print::to_symbol("->"));
}
list.push_back(load_path_base->to_form(file));
for (auto& x : load_path) {
list.push_back(pretty_print::to_symbol(x));
}
return pretty_print::build_list(list);
}
std::string load_operator;
switch (kind) {
case FLOAT:
@ -599,7 +614,18 @@ goos::Object IR_FloatMath1::to_form(const LinkedObjectFile& file) const {
goos::Object IR_Call::to_form(const LinkedObjectFile& file) const {
(void)file;
return pretty_print::build_list("call!");
std::vector<goos::Object> result;
result.push_back(pretty_print::to_symbol("call!"));
if (call_type_set) {
result.push_back(pretty_print::to_symbol(":arg-count"));
result.push_back(pretty_print::to_symbol(std::to_string(call_type.arg_count() - 1)));
}
for (auto& x : args) {
result.push_back(x->to_form(file));
}
return pretty_print::build_list(result);
}
void IR_Call::get_children(std::vector<std::shared_ptr<IR>>* output) const {

View File

@ -5,6 +5,7 @@
#include <utility>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include "decompiler/Disasm/Register.h"
#include "common/type_system/TypeSpec.h"
#include "decompiler/util/DecompilerTypeSystem.h"
@ -12,6 +13,7 @@
class LinkedObjectFile;
class DecompilerTypeSystem;
class ExpressionStack;
namespace goos {
class Object;
@ -27,12 +29,33 @@ class IR {
virtual TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts);
// update the expression stack
virtual bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
(void)stack;
(void)file;
throw std::runtime_error("expression_stack NYI for " + print(file));
}
// update myself to use consumed registers from the stack.
virtual bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
(void)consume;
(void)stack;
throw std::runtime_error("update_from_stack NYI for " + print(file));
}
virtual std::unordered_set<Register, Register::hash> get_consumed(LinkedObjectFile& file) {
throw std::runtime_error("get_consumed NYI for " + print(file));
}
virtual ~IR() = default;
};
class IR_Atomic : public virtual IR {
public:
std::vector<Register> read_regs, write_regs, clobber_regs;
std::unordered_set<Register, Register::hash> consumed;
bool reg_info_set = false;
TypeState end_types; // types at the end of this instruction
@ -81,12 +104,15 @@ class IR_Set : public virtual IR {
FPR_TO_GPR64,
GPR_TO_FPR,
REG_FLT,
REG_I128
REG_I128,
EXPR
} kind;
IR_Set(Kind _kind, std::shared_ptr<IR> _dst, std::shared_ptr<IR> _src)
: kind(_kind), dst(std::move(_dst)), src(std::move(_src)) {}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
std::shared_ptr<IR> dst, src;
std::shared_ptr<IR> clobber = nullptr;
};
@ -103,6 +129,7 @@ class IR_Set_Atomic : public IR_Set, public IR_Atomic {
void propagate_types(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
class IR_IntMath2;
@ -144,6 +171,14 @@ class IR_Symbol : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override {
(void)consume;
(void)stack;
(void)file;
return true;
}
};
class IR_SymbolValue : public virtual IR {
@ -155,6 +190,14 @@ class IR_SymbolValue : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override {
(void)consume;
(void)stack;
(void)file;
return true;
}
};
class IR_EmptyPair : public virtual IR {
@ -176,6 +219,9 @@ class IR_StaticAddress : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_Load : public virtual IR {
@ -191,6 +237,22 @@ class IR_Load : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
// this load_path stuff is just for debugging and shouldn't be used as part of the real
// decompilation.
void clear_load_path() {
load_path_set = false;
load_path_addr_of = false;
load_path.clear();
load_path_base = nullptr;
}
std::shared_ptr<IR> load_path_base = nullptr;
bool load_path_set = false;
bool load_path_addr_of = false;
std::vector<std::string> load_path;
};
class IR_FloatMath2 : public virtual IR {
@ -204,6 +266,9 @@ class IR_FloatMath2 : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_FloatMath1 : public virtual IR {
@ -213,9 +278,9 @@ class IR_FloatMath1 : public virtual IR {
std::shared_ptr<IR> arg;
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
// TP_Type get_expression_type(const TypeState& input,
// const LinkedObjectFile& file,
// DecompilerTypeSystem& dts) override;
};
class IR_IntMath2 : public virtual IR {
@ -247,18 +312,30 @@ class IR_IntMath2 : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_IntMath1 : public virtual IR {
public:
enum Kind { NOT, ABS, NEG } kind;
IR_IntMath1(Kind _kind, std::shared_ptr<IR> _arg) : kind(_kind), arg(std::move(_arg)) {}
IR_IntMath1(Kind _kind, std::shared_ptr<IR> _arg, std::shared_ptr<IR_Atomic> _abs_op)
: kind(_kind), arg(std::move(_arg)), abs_op(std::move(_abs_op)) {
assert(abs_op);
}
std::shared_ptr<IR> arg;
std::shared_ptr<IR_Atomic> abs_op = nullptr;
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
std::unordered_set<Register, Register::hash> get_consumed(LinkedObjectFile& file) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_Call : public virtual IR {
@ -266,6 +343,9 @@ class IR_Call : public virtual IR {
IR_Call() = default;
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
std::vector<std::shared_ptr<IR>> args;
TypeSpec call_type;
bool call_type_set = false;
};
// todo
@ -275,6 +355,7 @@ class IR_Call_Atomic : public virtual IR_Call, public IR_Atomic {
void propagate_types(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
class IR_IntegerConstant : public virtual IR {
@ -436,9 +517,9 @@ class IR_Breakpoint_Atomic : public virtual IR_Atomic {
IR_Breakpoint_Atomic() = default;
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
void propagate_types(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
// void propagate_types(const TypeState& input,
// const LinkedObjectFile& file,
// DecompilerTypeSystem& dts) override;
};
class IR_Begin : public virtual IR {
@ -530,17 +611,32 @@ class IR_ShortCircuit : public virtual IR {
class IR_Ash : public virtual IR {
public:
std::shared_ptr<IR> shift_amount, value, clobber;
std::shared_ptr<IR_Atomic> branch_op, sub_op, shift_op;
bool is_signed = true;
IR_Ash(std::shared_ptr<IR> _shift_amount,
std::shared_ptr<IR> _value,
std::shared_ptr<IR> _clobber,
std::shared_ptr<IR_Atomic> _branch_op,
std::shared_ptr<IR_Atomic> _sub_op,
std::shared_ptr<IR_Atomic> _shift_op,
bool _is_signed)
: shift_amount(std::move(_shift_amount)),
value(std::move(_value)),
clobber(std::move(_clobber)),
is_signed(_is_signed) {}
branch_op(std::move(_branch_op)),
sub_op(std::move(_sub_op)),
shift_op(std::move(_shift_op)),
is_signed(_is_signed) {
assert(sub_op);
assert(shift_op);
assert(branch_op);
}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
std::unordered_set<Register, Register::hash> get_consumed(LinkedObjectFile& file) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_AsmOp : public virtual IR {
@ -559,9 +655,9 @@ class IR_AsmOp_Atomic : public virtual IR_AsmOp, public IR_Atomic {
public:
IR_AsmOp_Atomic(std::string _name) : IR_AsmOp(std::move(_name)) {}
void set_reg_info();
void propagate_types(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
// void propagate_types(const TypeState& input,
// const LinkedObjectFile& file,
// DecompilerTypeSystem& dts) override;
};
class IR_CMoveF : public virtual IR {

View File

@ -0,0 +1,224 @@
#include <algorithm>
#include "IR.h"
#include "decompiler/Function/ExpressionStack.h"
bool IR_Set_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
// first determine the type of the set.
switch (kind) {
case IR_Set::REG_64:
case IR_Set::LOAD:
case IR_Set::GPR_TO_FPR: // TODO - this should probably not be invisible.
case IR_Set::FPR_TO_GPR64:
case IR_Set::REG_FLT:
case IR_Set::SYM_LOAD: {
// normal 64-bit GPR set!
// first, we update our source to substitute in more complicated expressions.
auto src_as_reg = dynamic_cast<IR_Register*>(src.get());
if (src_as_reg) {
// an annoying special case.
if (consumed.find(src_as_reg->reg) != consumed.end()) {
// we consume it.
src = stack.get(src_as_reg->reg);
}
} else {
src->update_from_stack(consumed, stack, file);
}
// next, we tell the stack the value of the register we just set
auto dest_reg = dynamic_cast<IR_Register*>(dst.get());
assert(dest_reg);
stack.set(dest_reg->reg, src);
return true;
}
break;
default:
throw std::runtime_error("IR_Set_Atomic::expression_stack NYI for " + print(file));
}
}
bool IR_Set::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
// first determine the type of the set.
switch (kind) {
case IR_Set::REG_64:
case IR_Set::LOAD:
case IR_Set::GPR_TO_FPR: // TODO - this should probably not be invisible.
case IR_Set::FPR_TO_GPR64:
case IR_Set::REG_FLT: {
// normal 64-bit GPR set!
// first, we update our source to substitute in more complicated expressions.
auto consumed = src->get_consumed(file);
auto src_as_reg = dynamic_cast<IR_Register*>(src.get());
if (src_as_reg) {
// an annoying special case.
if (consumed.find(src_as_reg->reg) != consumed.end()) {
// we consume it.
src = stack.get(src_as_reg->reg);
}
} else {
src->update_from_stack(consumed, stack, file);
}
// next, we tell the stack the value of the register we just set
auto dest_reg = dynamic_cast<IR_Register*>(dst.get());
assert(dest_reg);
stack.set(dest_reg->reg, src);
return true;
}
break;
default:
throw std::runtime_error("IR_Set_Atomic::expression_stack NYI for " + print(file));
}
}
bool IR_Call_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
(void)file;
if (!call_type_set) {
throw std::runtime_error("Call type is unknown on an IR_Call_Atomic");
}
const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3,
Reg::T0, Reg::T1, Reg::T2, Reg::T3};
int nargs = int(call_type.arg_count()) - 1;
// get all arguments.
for (int i = nargs; i-- > 0;) {
args.push_back(stack.get(Register(Reg::GPR, arg_regs[i])));
}
args.push_back(stack.get(Register(Reg::GPR, Reg::T9)));
std::reverse(args.begin(), args.end());
auto return_type = call_type.get_arg(call_type.arg_count() - 1);
// bleh...
stack.set(Register(Reg::GPR, Reg::V0), std::make_shared<IR_Call_Atomic>(*this));
return true;
}
namespace {
void update_from_stack_helper(std::shared_ptr<IR>* ir,
const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
auto as_reg = dynamic_cast<IR_Register*>(ir->get());
if (as_reg) {
if (consume.find(as_reg->reg) != consume.end()) {
*ir = stack.get(as_reg->reg);
}
} else {
(*ir)->update_from_stack(consume, stack, file);
}
}
} // namespace
bool IR_Load::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
update_from_stack_helper(&location, consume, stack, file);
return true;
}
bool IR_StaticAddress::update_from_stack(
const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
(void)consume;
(void)stack;
(void)file;
return true;
}
bool IR_FloatMath2::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
if (kind == DIV) {
for (auto reg : {&arg1, &arg0}) {
auto as_reg = dynamic_cast<IR_Register*>(reg->get());
if (as_reg) {
if (consume.find(as_reg->reg) != consume.end()) {
*reg = stack.get(as_reg->reg);
}
} else {
(*reg)->update_from_stack(consume, stack, file);
}
}
} else {
for (auto reg : {&arg0, &arg1}) {
auto as_reg = dynamic_cast<IR_Register*>(reg->get());
if (as_reg) {
if (consume.find(as_reg->reg) != consume.end()) {
*reg = stack.get(as_reg->reg);
}
} else {
(*reg)->update_from_stack(consume, stack, file);
}
}
}
return true;
}
bool IR_IntMath2::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
for (auto reg : {&arg1, &arg0}) {
auto as_reg = dynamic_cast<IR_Register*>(reg->get());
if (as_reg) {
if (consume.find(as_reg->reg) != consume.end()) {
*reg = stack.get(as_reg->reg);
}
} else {
(*reg)->update_from_stack(consume, stack, file);
}
}
return true;
}
std::unordered_set<Register, Register::hash> IR_Ash::get_consumed(LinkedObjectFile& file) {
(void)file;
// first get the set of read registers...
auto value_as_reg = dynamic_cast<IR_Register*>(value.get());
auto sa_as_reg = dynamic_cast<IR_Register*>(shift_amount.get());
if (!sa_as_reg || !value_as_reg) {
// consume nobody.
// todo - is this actually right? If not, this is "safe", but might lead to ugly code.
return {};
}
std::unordered_set<Register, Register::hash> result;
for (auto& op : {branch_op, sub_op, shift_op}) {
for (auto& reg : {value_as_reg->reg, sa_as_reg->reg}) {
if (op->consumed.find(reg) != op->consumed.end()) {
result.insert(reg);
}
}
}
return result;
}
bool IR_Ash::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
for (auto x : {&value, &shift_amount}) {
update_from_stack_helper(x, consume, stack, file);
}
return true;
}
std::unordered_set<Register, Register::hash> IR_IntMath1::get_consumed(LinkedObjectFile& file) {
if (kind == ABS) {
assert(abs_op);
return abs_op->consumed;
} else {
throw std::runtime_error("IR_IntMath1::get_consumed NYI for " + print(file));
}
}
bool IR_IntMath1::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
update_from_stack_helper(&arg, consume, stack, file);
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -586,7 +586,7 @@ std::string LinkedObjectFile::print_function_disassembly(Function& func,
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
result += func.prologue.to_string(2) + "\n";
if (!func.warnings.empty()) {
result += "Warnings: " + func.warnings + "\n";
result += ";;Warnings:\n" + func.warnings + "\n";
}
// print each instruction in the function.
@ -784,7 +784,7 @@ std::string LinkedObjectFile::print_type_analysis_debug() {
result += "; .function " + func.guessed_name.to_string() + "\n";
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
if (!func.warnings.empty()) {
result += ";; WARNING: " + func.warnings + "\n";
result += ";; WARNING:\n" + func.warnings + "\n";
}
for (auto& block : func.basic_blocks) {
@ -799,10 +799,12 @@ std::string LinkedObjectFile::print_type_analysis_debug() {
// result += func.basic_ops.at(i)->print_with_reguse(*this);
// result += func.basic_ops.at(i)->print(*this);
if (func.attempted_type_analysis) {
result += fmt::format("[{:3d}] ", i);
result += func.basic_ops.at(i)->print_with_types(*init_types, *this);
result += "\n";
init_types = &func.basic_ops.at(i)->end_types;
} else {
result += fmt::format("[{:3d}] ", i);
result += func.basic_ops.at(i)->print(*this);
result += "\n";
}
@ -817,7 +819,7 @@ std::string LinkedObjectFile::print_type_analysis_debug() {
/*!
* Hacky way to get a GOAL string object
*/
std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_quotes) {
std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_quotes) const {
std::string result;
if (with_quotes) {
result += "\"";
@ -826,7 +828,7 @@ std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_q
if (word_idx + 1 >= int(words_by_seg[seg].size())) {
return "invalid string!\n";
}
LinkedWord& size_word = words_by_seg[seg].at(word_idx + 1);
const LinkedWord& size_word = words_by_seg[seg].at(word_idx + 1);
if (size_word.kind != LinkedWord::PLAIN_DATA) {
// sometimes an array of string pointer triggers this!
return "invalid string!\n";
@ -1036,7 +1038,7 @@ u32 LinkedObjectFile::read_data_word(const Label& label) {
return word.data;
}
std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) {
std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) const {
assert(0 == (label.offset % 4));
return get_goal_string(label.target_segment, (label.offset / 4) - 1, false);
}

View File

@ -70,7 +70,7 @@ class LinkedObjectFile {
std::string print_asm_function_disassembly(const std::string& my_name);
u32 read_data_word(const Label& label);
std::string get_goal_string_by_label(const Label& label);
std::string get_goal_string_by_label(const Label& label) const;
struct Stats {
uint32_t total_code_bytes = 0;
@ -138,7 +138,7 @@ class LinkedObjectFile {
goos::Object to_form_script_object(int seg, int byte_idx, std::vector<bool>& seen);
bool is_empty_list(int seg, int byte_idx);
bool is_string(int seg, int byte_idx);
std::string get_goal_string(int seg, int word_idx, bool with_quotes = true);
std::string get_goal_string(int seg, int word_idx, bool with_quotes = true) const;
std::vector<std::unordered_map<int, int>> label_per_seg_by_offset;
};

View File

@ -560,7 +560,8 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d
// printf("\n");
}
void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) {
void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir,
const std::string& suffix) {
spdlog::info("- Writing debug type analysis...");
Timer timer;
uint32_t total_bytes = 0, total_files = 0;
@ -568,7 +569,8 @@ void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) {
for_each_obj([&](ObjectFileData& obj) {
if (obj.linked_data.has_any_functions()) {
auto file_text = obj.linked_data.print_type_analysis_debug();
auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_db.asm");
auto file_name =
file_util::combine_path(output_dir, obj.to_unique_name() + suffix + "_db.asm");
total_bytes += file_text.size();
file_util::write_text_file(file_name, file_text);
@ -588,7 +590,8 @@ void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) {
*/
void ObjectFileDB::write_disassembly(const std::string& output_dir,
bool disassemble_objects_without_functions,
bool write_json) {
bool write_json,
const std::string& file_suffix) {
spdlog::info("- Writing functions...");
Timer timer;
uint32_t total_bytes = 0, total_files = 0;
@ -599,7 +602,8 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir,
if (obj.linked_data.has_any_functions() || disassemble_objects_without_functions) {
auto file_text = obj.linked_data.print_disassembly();
asm_functions += obj.linked_data.print_asm_function_disassembly(obj.to_unique_name());
auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + ".asm");
auto file_name =
file_util::combine_path(output_dir, obj.to_unique_name() + file_suffix + ".asm");
if (get_config().analyze_functions && write_json) {
auto json_asm_text = obj.linked_data.to_asm_json(obj.to_unique_name());
@ -811,7 +815,7 @@ void ObjectFileDB::analyze_functions() {
unique_names.insert(name);
if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) {
func.warnings += "flagged as asm by config\n";
func.warnings += ";; flagged as asm by config\n";
func.suspected_asm = true;
}
}
@ -824,7 +828,7 @@ void ObjectFileDB::analyze_functions() {
if (duplicated_functions.find(name) != duplicated_functions.end()) {
duplicated_functions[name].insert(data.to_unique_name());
func.warnings += "this function exists in multiple non-identical object files";
func.warnings += ";; this function exists in multiple non-identical object files";
}
});
/*
@ -917,54 +921,66 @@ void ObjectFileDB::analyze_functions() {
// type analysis
if (get_config().function_type_prop) {
if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) {
// we're a global named function. This means we're stored in a symbol
auto kv = dts.symbol_types.find(func.guessed_name.function_name);
if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) {
if (kv->second.base_type() != "function") {
spdlog::error("Found a function named {} but the symbol has type {}",
func.guessed_name.to_string(), kv->second.print());
assert(false);
}
// GOOD!
func.type = kv->second;
func.attempted_type_analysis = true;
attempted_type_analysis++;
spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(),
kv->second.print());
if (func.run_type_analysis(kv->second, dts, data.linked_data)) {
successful_type_analysis++;
}
}
} else if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) {
// it's a method.
try {
auto info =
dts.ts.lookup_method(func.guessed_name.type_name, func.guessed_name.method_id);
if (info.type.arg_count() >= 1) {
if (info.type.base_type() != "function") {
spdlog::error("Found a method named {} but the symbol has type {}",
func.guessed_name.to_string(), info.type.print());
auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()];
if (get_config().no_type_analysis_functions_by_name.find(func.guessed_name.to_string()) ==
get_config().no_type_analysis_functions_by_name.end()) {
if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) {
// we're a global named function. This means we're stored in a symbol
auto kv = dts.symbol_types.find(func.guessed_name.function_name);
if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) {
if (kv->second.base_type() != "function") {
spdlog::error("Found a function named {} but the symbol has type {}",
func.guessed_name.to_string(), kv->second.print());
assert(false);
}
// GOOD!
func.type = info.type.substitute_for_method_call(func.guessed_name.type_name);
func.type = kv->second;
func.attempted_type_analysis = true;
attempted_type_analysis++;
spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(),
func.type.print());
if (func.run_type_analysis(func.type, dts, data.linked_data)) {
// spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(),
// kv->second.print());
if (func.run_type_analysis(kv->second, dts, data.linked_data, hints)) {
successful_type_analysis++;
}
}
} else if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) {
// it's a method.
try {
auto info =
dts.ts.lookup_method(func.guessed_name.type_name, func.guessed_name.method_id);
if (info.type.arg_count() >= 1) {
if (info.type.base_type() != "function") {
spdlog::error("Found a method named {} but the symbol has type {}",
func.guessed_name.to_string(), info.type.print());
assert(false);
}
// GOOD!
func.type = info.type.substitute_for_method_call(func.guessed_name.type_name);
func.attempted_type_analysis = true;
attempted_type_analysis++;
// spdlog::info("Type Analysis on {} {}",
// func.guessed_name.to_string(),
// func.type.print());
if (func.run_type_analysis(func.type, dts, data.linked_data, hints)) {
successful_type_analysis++;
}
}
} catch (std::runtime_error& e) {
// failed to lookup method info
} catch (std::runtime_error& e) {
// failed to lookup method info
}
}
if (!func.attempted_type_analysis) {
func.warnings.append(";; Failed to try type analysis\n");
}
} else {
func.warnings.append(";; Marked as no type analysis in config\n");
}
}
} else {
asm_funcs++;
func.warnings.append(";; Assembly Function. Analysis passes were not attempted.\n");
}
if (func.basic_blocks.size() > 1 && !func.suspected_asm) {
@ -982,6 +998,10 @@ void ObjectFileDB::analyze_functions() {
if (!func.guessed_name.empty()) {
total_named_functions++;
}
// if (func.guessed_name.to_string() == "reset-and-call") {
// assert(false);
// }
});
spdlog::info("Found {} functions ({} with no control flow)", total_functions,
@ -1015,6 +1035,27 @@ void ObjectFileDB::analyze_functions() {
// }
}
void ObjectFileDB::analyze_expressions() {
spdlog::info("- Analyzing Expressions...");
Timer timer;
int attempts = 0;
int success = 0;
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
// register usage
func.run_reg_usage();
attempts++;
if (func.build_expression(data.linked_data)) {
success++;
} else {
func.warnings.append(";; Expression analysis failed.\n");
}
});
spdlog::info(" {}/{} functions passed expression building ({:.2f}%)\n", success, attempts,
100.f * float(success) / float(attempts));
}
void ObjectFileDB::dump_raw_objects(const std::string& output_dir) {
for_each_obj([&](ObjectFileData& data) {
auto dest = output_dir + "/" + data.to_unique_name();

View File

@ -60,11 +60,13 @@ class ObjectFileDB {
void write_object_file_words(const std::string& output_dir, bool dump_v3_only);
void write_disassembly(const std::string& output_dir,
bool disassemble_objects_without_functions,
bool write_json);
bool write_json,
const std::string& file_suffix = "");
void write_debug_type_analysis(const std::string& output_dir);
void write_debug_type_analysis(const std::string& output_dir, const std::string& suffix = "");
void analyze_functions();
void process_tpages();
void analyze_expressions();
std::string process_game_count();
std::string process_game_text();

View File

@ -34,6 +34,7 @@ void set_config(const std::string& path_to_config_file) {
gConfig.dump_objs = cfg.at("dump_objs").get<bool>();
gConfig.write_func_json = cfg.at("write_func_json").get<bool>();
gConfig.function_type_prop = cfg.at("function_type_prop").get<bool>();
gConfig.analyze_expressions = cfg.at("analyze_expressions").get<bool>();
std::vector<std::string> asm_functions_by_name =
cfg.at("asm_functions_by_name").get<std::vector<std::string>>();
@ -47,8 +48,33 @@ void set_config(const std::string& path_to_config_file) {
gConfig.pair_functions_by_name.insert(x);
}
std::vector<std::string> no_type_analysis_functions_by_name =
cfg.at("no_type_analysis_functions_by_name").get<std::vector<std::string>>();
for (const auto& x : no_type_analysis_functions_by_name) {
gConfig.no_type_analysis_functions_by_name.insert(x);
}
auto bad_inspect = cfg.at("types_with_bad_inspect_methods").get<std::vector<std::string>>();
for (const auto& x : bad_inspect) {
gConfig.bad_inspect_types.insert(x);
}
auto type_hints_file_name = cfg.at("type_hints_file").get<std::string>();
auto type_hints_txt = file_util::read_text_file(file_util::get_file_path({type_hints_file_name}));
auto type_hints_json = nlohmann::json::parse(type_hints_txt, nullptr, true, true);
for (auto& kv : type_hints_json.items()) {
auto& function_name = kv.key();
auto& hints = kv.value();
for (auto& hint : hints) {
auto idx = hint.at(0).get<int>();
for (size_t i = 1; i < hint.size(); i++) {
auto& assignment = hint.at(i);
TypeHint type_hint;
type_hint.reg = Register(assignment.at(0).get<std::string>());
type_hint.type_name = assignment.at(1).get<std::string>();
gConfig.type_hints_by_function_by_idx[function_name][idx].push_back(type_hint);
}
}
}
}

View File

@ -6,6 +6,13 @@
#include <string>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include "decompiler/Disasm/Register.h"
struct TypeHint {
Register reg;
std::string type_name;
};
struct Config {
int game_version = -1;
@ -27,8 +34,12 @@ struct Config {
bool dump_objs = false;
bool write_func_json = false;
bool function_type_prop = false;
bool analyze_expressions = false;
std::unordered_set<std::string> asm_functions_by_name;
std::unordered_set<std::string> pair_functions_by_name;
std::unordered_set<std::string> no_type_analysis_functions_by_name;
std::unordered_map<std::string, std::unordered_map<int, std::vector<TypeHint>>>
type_hints_by_function_by_idx;
// ...
};

View File

@ -398,14 +398,13 @@
)
;; gkernel-h
;; todo
; (deftype handle (uint64)
; ()
; :method-count-assert 9
; :size-assert #x8
; :flag-assert #x900000008
; ;; likely a bitfield type
; )
(deftype handle (uint64)
((process (pointer process) :offset 0)
(pid int32 :offset 32)
(u64 uint64 :offset 0)
)
:flag-assert #x900000008
)
;; gkernel-h
(deftype state (protect-frame)

View File

@ -4,17 +4,17 @@
"game_version":1,
// the order here matters (not sure that this is true any more...). KERNEL and GAME should go first
"dgo_names":["CGO/KERNEL.CGO","CGO/GAME.CGO",
"CGO/ENGINE.CGO"
, "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO",
"DGO/FIN.DGO", "DGO/INT.DGO", "DGO/JUB.DGO", "DGO/JUN.DGO", "CGO/JUNGLE.CGO", "CGO/L1.CGO", "DGO/FIC.DGO",
"DGO/LAV.DGO", "DGO/MAI.DGO", "CGO/MAINCAVE.CGO", "DGO/MIS.DGO", "DGO/OGR.DGO", "CGO/RACERP.CGO", "DGO/ROB.DGO", "DGO/ROL.DGO",
"DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO",
"DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO"
],
//"dgo_names":["CGO/KERNEL.CGO"],
"CGO/ENGINE.CGO"
, "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO",
"DGO/FIN.DGO", "DGO/INT.DGO", "DGO/JUB.DGO", "DGO/JUN.DGO", "CGO/JUNGLE.CGO", "CGO/L1.CGO", "DGO/FIC.DGO",
"DGO/LAV.DGO", "DGO/MAI.DGO", "CGO/MAINCAVE.CGO", "DGO/MIS.DGO", "DGO/OGR.DGO", "CGO/RACERP.CGO", "DGO/ROB.DGO", "DGO/ROL.DGO",
"DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO",
"DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO"
],
"dgo_names_":["CGO/KERNEL.CGO"],
"object_file_names":["TEXT/0COMMON.TXT", "TEXT/1COMMON.TXT", "TEXT/2COMMON.TXT", "TEXT/3COMMON.TXT", "TEXT/4COMMON.TXT",
"TEXT/5COMMON.TXT", "TEXT/6COMMON.TXT"],
"TEXT/5COMMON.TXT", "TEXT/6COMMON.TXT"],
"str_file_names":["STR/BAFCELL.STR", "STR/SWTE4.STR", "STR/SWTE3.STR", "STR/SWTE2.STR", "STR/SWTE1.STR",
"STR/SNRBSBFC.STR", "STR/SNRBIPFC.STR", "STR/SNRBICFC.STR", "STR/ORR3.STR", "STR/ORR2.STR", "STR/MICANNON.STR",
@ -50,10 +50,12 @@
"STR/SAISA.STR","STR/SIHISC.STR","STR/MIIORBS.STR","STR/WAINTROD.STR","STR/SAISD2.STR","STR/GRSOPREB.STR",
"STR/GRSOBBB.STR","STR/SA3INTRO.STR"
],
//"str_file_names":[],
"str_file_names_":[],
"type_hints_file":"decompiler/config/jak1_ntsc_black_label/type_hints.jsonc",
"analyze_functions":true,
"analyze_expressions":false,
"function_type_prop":false,
"write_disassembly":true,
"write_hex_near_instructions":false,
@ -83,10 +85,17 @@
"engine",
"bsp-header",
"joint-anim-matrix",
"part-tracker"
"part-tracker"],
"no_type_analysis_functions_by_name":[
"(method 2 vec4s)", // 128-bit bitfield.
"(method 3 vec4s)", // 128-bit bitfield
"reset-and-call", // stack manipulation
"(method 10 cpu-thread)" // loading saved regs off of the stack.
],
"asm_functions_by_name":[
"asm_functions_by_name":[
// gcommon
"quad-copy!",
@ -482,6 +491,6 @@
"(anon-function 2 ogreboss)"
],
"pair_functions_by_name":["ref", "last", "member", "nmember", "assoc", "assoce", "append!", "delete!", "delete-car!",
"pair_functions_by_name":["ref", "last", "member", "nmember", "assoc", "assoce", "append!", "delete!", "delete-car!",
"insert-cons!", "sort", "unload-package", "(method 4 pair)", "nassoc", "nassoce"]
}

View File

@ -0,0 +1,39 @@
{
"(method 2 handle)":[
[10, ["a3", "process"]],
[11, ["v1", "int"]],
[15, ["gp", "int"]]
],
"(method 3 handle)":[
[10, ["gp", "int"]]
],
"(method 0 cpu-thread)":[
[13, ["v0", "cpu-thread"]]
],
"remove-exit":[
[0, ["s6", "process"]]
],
"(method 0 process)":[
[12, ["a0", "int"]],
[13, ["v0", "process"]]
],
"inspect-process-heap":[
[4, ["s5", "basic"]],
[17, ["s5", "int"]]
],
"return-from-thread-dead":[
[0, ["s6", "process"]]
],
"(method 14 dead-pool)":[
[23, ["v1", "process"]], // bad visit order with #f?
[28, ["s4", "(pointer process-tree)"]] // bug in real game, see gkernel.gc
]
}

View File

@ -88,6 +88,12 @@ int main(int argc, char** argv) {
db.write_debug_type_analysis(out_folder);
}
if (get_config().analyze_expressions) {
db.analyze_expressions();
db.write_disassembly(out_folder, false, false, "_expr");
db.write_debug_type_analysis(out_folder, "_expr");
}
// todo print type summary
// printf("%s\n", get_type_info().get_summary().c_str());

View File

@ -43,8 +43,7 @@ void for_each_in_list(goos::Object& list, T f) {
} // namespace
void DecompilerTypeSystem::parse_type_defs(const std::vector<std::string>& file_path) {
goos::Reader reader;
auto read = reader.read_from_file(file_path);
auto read = m_reader.read_from_file(file_path);
auto data = cdr(read);
for_each_in_list(data, [&](goos::Object& o) {
@ -81,6 +80,12 @@ void DecompilerTypeSystem::parse_type_defs(const std::vector<std::string>& file_
});
}
TypeSpec DecompilerTypeSystem::parse_type_spec(const std::string& str) {
auto read = m_reader.read_from_string(str);
auto data = cdr(read);
return parse_typespec(&ts, car(data));
}
std::string DecompilerTypeSystem::dump_symbol_types() {
assert(symbol_add_order.size() == symbols.size());
std::string result;
@ -152,128 +157,126 @@ void DecompilerTypeSystem::add_symbol(const std::string& name, const TypeSpec& t
}
}
TP_Type DecompilerTypeSystem::tp_lca_no_simplify(const TP_Type& existing,
const TP_Type& add,
bool* changed) {
switch (existing.kind) {
case TP_Type::OBJECT_OF_TYPE:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE: {
// two normal types, do LCA as normal.
TP_Type result;
result.kind = TP_Type::OBJECT_OF_TYPE;
result.ts = ts.lowest_common_ancestor_reg(existing.ts, add.ts);
*changed = (result.ts != existing.ts);
return result;
}
case TP_Type::TYPE_OBJECT: {
// normal, [type object]. Change type object to less specific "type".
TP_Type result;
result.kind = TP_Type::OBJECT_OF_TYPE;
result.ts = ts.lowest_common_ancestor_reg(existing.ts, ts.make_typespec("type"));
*changed = (result.ts != existing.ts);
return result;
}
case TP_Type::FALSE:
// allow #f anywhere
*changed = false;
return existing;
case TP_Type::NONE:
// allow possibly undefined.
*changed = false;
return existing;
default:
assert(false);
/*!
* Compute the least common ancestor of two TP Types.
*/
TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) {
// starting from most vague to most specific
// simplist case, no difference.
if (existing == add) {
*changed = false;
return existing;
}
// being sometimes uninitialized should not modify types.
if (add.kind == TP_Type::Kind::UNINITIALIZED) {
*changed = false;
return existing;
}
// replace anything that's uninitialized sometimes.
if (existing.kind == TP_Type::Kind::UNINITIALIZED) {
*changed = true; // existing != none because of previous check.
return add;
}
// similar to before, false as null shouldn't modify types.
if (add.kind == TP_Type::Kind::FALSE_AS_NULL) {
*changed = false;
return existing;
}
// replace any false as nulls.
if (existing.kind == TP_Type::Kind::FALSE_AS_NULL) {
*changed = true; // existing != false because of previous check.
return add;
}
// different values, but the same kind.
if (existing.kind == add.kind) {
switch (existing.kind) {
case TP_Type::Kind::TYPESPEC: {
auto new_result = TP_Type::make_from_typespec(coerce_to_reg_type(ts.lowest_common_ancestor(
existing.get_objects_typespec(), add.get_objects_typespec())));
*changed = (new_result != existing);
return new_result;
}
break;
case TP_Type::TYPE_OBJECT:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE: {
TP_Type result;
result.kind = TP_Type::OBJECT_OF_TYPE;
result.ts = ts.lowest_common_ancestor_reg(ts.make_typespec("type"), add.ts);
*changed = true; // changed type
return result;
}
case TP_Type::TYPE_OBJECT: {
// two type objects.
TP_Type result;
result.kind = TP_Type::TYPE_OBJECT;
result.ts = ts.lowest_common_ancestor_reg(existing.ts, add.ts);
*changed = (result.ts != existing.ts);
return result;
}
case TP_Type::FALSE:
// allow #f anywhere
*changed = false;
return existing;
case TP_Type::NONE:
// allow possibly undefined.
*changed = false;
return existing;
default:
assert(false);
}
break;
case TP_Type::FALSE:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE:
*changed = true;
return add;
case TP_Type::TYPE_OBJECT:
*changed = true;
return add;
case TP_Type::FALSE:
*changed = false;
return existing;
case TP_Type::NONE:
*changed = false;
return existing;
default:
assert(false);
}
break;
case TP_Type::NONE:
switch (add.kind) {
case TP_Type::OBJECT_OF_TYPE:
case TP_Type::TYPE_OBJECT:
case TP_Type::FALSE:
case TP_Type::METHOD_NEW_OF_OBJECT:
*changed = true;
return add;
case TP_Type::NONE:
*changed = false;
return existing;
default:
assert(false);
}
break;
case TP_Type::METHOD_NEW_OF_OBJECT:
switch (add.kind) {
case TP_Type::METHOD_NEW_OF_OBJECT: {
if (existing.ts == add.ts) {
*changed = false;
return existing;
} else {
assert(false);
}
}
case TP_Type::NONE:
*changed = false;
return existing;
default:
assert(false);
case TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD: {
auto new_result = TP_Type::make_type_object(ts.lowest_common_ancestor(
existing.get_type_objects_typespec(), add.get_type_objects_typespec()));
*changed = (new_result != existing);
return new_result;
}
default:
assert(false);
case TP_Type::Kind::PRODUCT_WITH_CONSTANT:
// we know they are different.
*changed = true;
return TP_Type::make_from_typespec(TypeSpec("int"));
case TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT:
*changed = true;
// todo - there might be cases where we need to LCA the base types??
return TP_Type::make_from_typespec(TypeSpec("object"));
case TP_Type::Kind::OBJECT_NEW_METHOD:
*changed = true;
// this case should never happen I think.
return TP_Type::make_from_typespec(TypeSpec("function"));
case TP_Type::Kind::STRING_CONSTANT: {
auto existing_count = get_format_arg_count(existing.get_string());
auto added_count = get_format_arg_count(add.get_string());
*changed = true;
if (added_count == existing_count) {
return TP_Type::make_from_format_string(existing_count);
} else {
return TP_Type::make_from_typespec(TypeSpec("string"));
}
}
case TP_Type::Kind::INTEGER_CONSTANT:
*changed = true;
return TP_Type::make_from_typespec(TypeSpec("int"));
case TP_Type::Kind::FORMAT_STRING:
if (existing.get_format_string_arg_count() == add.get_format_string_arg_count()) {
*changed = false;
return existing;
} else {
*changed = true;
return TP_Type::make_from_typespec(TypeSpec("string"));
}
case TP_Type::Kind::FALSE_AS_NULL:
case TP_Type::Kind::UNINITIALIZED:
case TP_Type::Kind::DYNAMIC_METHOD_ACCESS:
case TP_Type::Kind::INVALID:
default:
assert(false);
}
} else {
// trying to combine two of different types.
if (existing.can_be_format_string() && add.can_be_format_string()) {
int existing_count = get_format_arg_count(existing);
int add_count = get_format_arg_count(add);
TP_Type result_type;
if (existing_count == add_count) {
result_type = TP_Type::make_from_format_string(existing_count);
} else {
result_type = TP_Type::make_from_typespec(TypeSpec("string"));
}
*changed = (result_type == existing);
return result_type;
}
// otherwise, as an absolute fallback, convert both to TypeSpecs and do TypeSpec LCA
auto new_result =
TP_Type::make_from_typespec(ts.lowest_common_ancestor(existing.typespec(), add.typespec()));
*changed = (new_result != existing);
return new_result;
}
}
TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) {
return tp_lca_no_simplify(existing.simplify(), add.simplify(), changed);
}
/*!
* Find the least common ancestor of an entire typestate.
*/
bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) {
bool result = false;
for (int i = 0; i < 32; i++) {
@ -296,3 +299,26 @@ bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) {
return result;
}
int DecompilerTypeSystem::get_format_arg_count(const std::string& str) {
int arg_count = 0;
for (size_t i = 0; i < str.length(); i++) {
if (str.at(i) == '~') {
i++; // also eat the next character.
if (i < str.length() && (str.at(i) == '%' || str.at(i) == 'T')) {
// newline (~%) or tab (~T) don't take an argument.
continue;
}
arg_count++;
}
}
return arg_count;
}
int DecompilerTypeSystem::get_format_arg_count(const TP_Type& type) {
if (type.is_constant_string()) {
return get_format_arg_count(type.get_string());
} else {
return type.get_format_string_arg_count();
}
}

View File

@ -3,6 +3,7 @@
#include "common/type_system/TypeSystem.h"
#include "decompiler/Disasm/Register.h"
#include "common/goos/Reader.h"
struct TP_Type;
struct TypeState;
@ -30,6 +31,7 @@ class DecompilerTypeSystem {
void add_symbol(const std::string& name, const TypeSpec& type_spec);
void parse_type_defs(const std::vector<std::string>& file_path);
TypeSpec parse_type_spec(const std::string& str);
void add_type_flags(const std::string& name, u64 flags);
void add_type_parent(const std::string& child, const std::string& parent);
std::string dump_symbol_types();
@ -38,6 +40,8 @@ class DecompilerTypeSystem {
TP_Type tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed);
TP_Type tp_lca_no_simplify(const TP_Type& existing, const TP_Type& add, bool* changed);
bool tp_lca(TypeState* combined, const TypeState& add);
int get_format_arg_count(const std::string& str);
int get_format_arg_count(const TP_Type& type);
struct {
bool allow_pair;
std::string current_method_type;
@ -46,6 +50,9 @@ class DecompilerTypeSystem {
current_method_type.clear();
}
} type_prop_settings;
private:
goos::Reader m_reader;
};
#endif // JAK_DECOMPILERTYPESYSTEM_H

View File

@ -1,46 +1,6 @@
#include "TP_Type.h"
#include "third-party/fmt/core.h"
/*!
* Takes the weird TP_Types and converts them to one of the main 4.
* This is supposed to be used if the fancy type analysis steps are attempted but fail.
*/
TP_Type TP_Type::simplify() const {
switch (kind) {
case PRODUCT:
return TP_Type(ts);
case METHOD_NEW_OF_OBJECT:
return TP_Type(ts);
case OBJ_PLUS_PRODUCT:
return TP_Type(TypeSpec("none"));
default:
return *this;
}
}
std::string TP_Type::print() const {
switch (kind) {
case OBJECT_OF_TYPE:
return ts.print();
case TYPE_OBJECT:
return fmt::format("[{}]", ts.print());
case FALSE:
return fmt::format("[#f]");
case NONE:
return fmt::format("[none]");
case PRODUCT:
return fmt::format("[{} x {}]", ts.print(), multiplier);
case PARTIAL_METHOD_TABLE_ACCESS:
return fmt::format("[[vtable-access]]");
case METHOD_NEW_OF_OBJECT:
return fmt::format("[(method object new)]");
case OBJ_PLUS_PRODUCT:
return fmt::format("[{} + int x {}]", ts.print(), multiplier);
default:
assert(false);
}
}
std::string TypeState::print_gpr_masked(u32 mask) const {
std::string result;
for (int i = 0; i < 32; i++) {
@ -52,4 +12,108 @@ std::string TypeState::print_gpr_masked(u32 mask) const {
}
}
return result;
}
}
std::string TP_Type::print() const {
switch (kind) {
case Kind::TYPESPEC:
return m_ts.print();
case Kind::TYPE_OF_TYPE_OR_CHILD:
return fmt::format("<the type {}>", m_ts.print());
case Kind::FALSE_AS_NULL:
return fmt::format("'#f");
case Kind::UNINITIALIZED:
return fmt::format("<uninitialized>");
case Kind::PRODUCT_WITH_CONSTANT:
return fmt::format("<value x {}>", m_int);
case Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT:
return fmt::format("<{} + (value x {})>", m_ts.print(), m_int);
case Kind::OBJECT_NEW_METHOD:
return fmt::format("<(object-new) for {}>", m_ts.print());
case Kind::STRING_CONSTANT:
return fmt::format("<string \"{}\">", m_str);
case Kind::FORMAT_STRING:
return fmt::format("<string with {} args>", m_int);
case Kind::INTEGER_CONSTANT:
return fmt::format("<integer {}>", m_int);
case Kind::DYNAMIC_METHOD_ACCESS:
return fmt::format("<dynamic-method-access>");
case Kind::INVALID:
default:
assert(false);
}
}
bool TP_Type::operator==(const TP_Type& other) const {
if (kind != other.kind) {
return false;
}
switch (kind) {
case Kind::TYPESPEC:
return m_ts == other.m_ts;
case Kind::TYPE_OF_TYPE_OR_CHILD:
return m_ts == other.m_ts;
case Kind::FALSE_AS_NULL:
return true;
case Kind::UNINITIALIZED:
return true;
case Kind::PRODUCT_WITH_CONSTANT:
return m_int == other.m_int;
case Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT:
return m_ts == other.m_ts && m_int == other.m_int;
case Kind::OBJECT_NEW_METHOD:
return m_ts == other.m_ts;
case Kind::STRING_CONSTANT:
return m_str == other.m_str;
case Kind::INTEGER_CONSTANT:
return m_int == other.m_int;
case Kind::FORMAT_STRING:
return m_int == other.m_int;
case Kind::DYNAMIC_METHOD_ACCESS:
return true;
case Kind::INVALID:
default:
assert(false);
}
}
bool TP_Type::operator!=(const TP_Type& other) const {
return !((*this) == other);
}
TypeSpec TP_Type::typespec() const {
switch (kind) {
case Kind::TYPESPEC:
return m_ts;
case Kind::TYPE_OF_TYPE_OR_CHILD:
return TypeSpec("type");
case Kind::FALSE_AS_NULL:
return TypeSpec("symbol");
case Kind::UNINITIALIZED:
return TypeSpec("none");
case Kind::PRODUCT_WITH_CONSTANT:
return TypeSpec("int");
case Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT:
// this can be part of an array access, so we don't really know the type.
// probably not a good idea to try to do anything with this as a typespec
// so let's be very vague
return TypeSpec("object");
case Kind::OBJECT_NEW_METHOD:
// similar to previous case, being more vague than we need to be because we don't
// want to assume the return type incorrectly and you shouldn't try to do anything with
// this as a typespec.
return TypeSpec("function");
case Kind::STRING_CONSTANT:
return TypeSpec("string");
case Kind::INTEGER_CONSTANT:
return TypeSpec("int");
case Kind::DYNAMIC_METHOD_ACCESS:
return TypeSpec("object");
case Kind::FORMAT_STRING:
return TypeSpec("string");
case Kind::INVALID:
default:
assert(false);
}
}

View File

@ -5,62 +5,234 @@
#include "common/common_types.h"
#include "decompiler/Disasm/Register.h"
struct TP_Type {
enum Kind {
OBJECT_OF_TYPE,
TYPE_OBJECT,
FALSE,
NONE,
PRODUCT,
OBJ_PLUS_PRODUCT,
PARTIAL_METHOD_TABLE_ACCESS, // type + method_number * 4
METHOD_NEW_OF_OBJECT
} kind = NONE;
// in the case that we are type_object, just store the type name in a single arg ts.
TypeSpec ts;
int multiplier;
// struct TP_Type {
// enum Kind {
// OBJECT_OF_TYPE,
// TYPE_OBJECT,
// FALSE,
// NONE,
// PRODUCT,
// OBJ_PLUS_PRODUCT,
// PARTIAL_METHOD_TABLE_ACCESS, // type + method_number * 4
// METHOD_NEW_OF_OBJECT,
// STRING
// } kind = NONE;
// // in the case that we are type_object, just store the type name in a single arg ts.
// TypeSpec ts;
// int multiplier;
// std::string str_data;
//
// TP_Type() = default;
// explicit TP_Type(const TypeSpec& _ts) {
// kind = OBJECT_OF_TYPE;
// ts = _ts;
// }
//
// TP_Type simplify() const;
// std::string print() const;
//
// bool is_object_of_type() const { return kind == TYPE_OBJECT || ts == TypeSpec("type"); }
//
// TypeSpec as_typespec() const {
// switch (kind) {
// case OBJECT_OF_TYPE:
// return ts;
// case TYPE_OBJECT:
// return TypeSpec("type");
// case FALSE:
// return TypeSpec("symbol");
// case NONE:
// return TypeSpec("none");
// case PRODUCT:
// case METHOD_NEW_OF_OBJECT:
// return ts;
// default:
// assert(false);
// }
// }
//
// static TP_Type make_partial_method_table_access(TypeSpec ts) {
// TP_Type result;
// result.kind = PARTIAL_METHOD_TABLE_ACCESS;
// result.ts = std::move(ts);
// return result;
// }
//
// static TP_Type make_type_object(const std::string& name) {
// TP_Type result;
// result.kind = TYPE_OBJECT;
// result.ts = TypeSpec(name);
// return result;
// }
//
// static TP_Type make_string_object(const std::string& str) {
// TP_Type result;
// result.kind = STRING;
// result.ts = TypeSpec("string");
// result.str_data = str;
// return result;
// }
//
// static TP_Type make_none() {
// TP_Type result;
// result.kind = NONE;
// return result;
// }
//
// bool operator==(const TP_Type& other) const;
//};
/*!
* A TP_Type is a specialized typespec used in the type propagation algorithm.
* It is basically a normal typespec plus some optional information.
* It should always use register types.
*/
class TP_Type {
public:
enum class Kind {
TYPESPEC, // just a normal typespec
TYPE_OF_TYPE_OR_CHILD, // a type object, of the given type of a child type.
FALSE_AS_NULL, // the GOAL "false" object, possibly used as a null.
UNINITIALIZED, // representing data which is uninitialized.
PRODUCT_WITH_CONSTANT, // representing: (val * multiplier)
OBJECT_PLUS_PRODUCT_WITH_CONSTANT, // address: obj + (val * multiplier)
OBJECT_NEW_METHOD, // the method new of object, as used in an (object-new) or similar.
STRING_CONSTANT, // a string that's part of the string pool
FORMAT_STRING, // a string with a given number of format arguments
INTEGER_CONSTANT, // a constant integer.
DYNAMIC_METHOD_ACCESS, // partial access into a
INVALID
} kind = Kind::UNINITIALIZED;
TP_Type() = default;
explicit TP_Type(const TypeSpec& _ts) {
kind = OBJECT_OF_TYPE;
ts = _ts;
}
TP_Type simplify() const;
std::string print() const;
bool operator==(const TP_Type& other) const;
bool operator!=(const TP_Type& other) const;
TypeSpec typespec() const;
bool is_object_of_type() const { return kind == TYPE_OBJECT || ts == TypeSpec("type"); }
bool is_constant_string() const { return kind == Kind::STRING_CONSTANT; }
bool is_integer_constant() const { return kind == Kind::INTEGER_CONSTANT; }
bool is_integer_constant(int64_t value) const { return is_integer_constant() && m_int == value; }
bool is_product() const { return kind == Kind::PRODUCT_WITH_CONSTANT; }
bool is_product_with(int64_t value) const {
return kind == Kind::PRODUCT_WITH_CONSTANT && m_int == value;
}
bool is_format_string() const { return kind == Kind::FORMAT_STRING; }
bool can_be_format_string() const { return is_format_string() || is_constant_string(); }
TypeSpec as_typespec() const {
switch (kind) {
case OBJECT_OF_TYPE:
return ts;
case TYPE_OBJECT:
return TypeSpec("type");
case FALSE:
return TypeSpec("symbol");
case NONE:
return TypeSpec("none");
case PRODUCT:
case METHOD_NEW_OF_OBJECT:
return ts;
default:
assert(false);
}
int get_format_string_arg_count() const {
assert(is_format_string());
return m_int;
}
static TP_Type make_partial_method_table_access() {
const std::string& get_string() const {
assert(is_constant_string());
return m_str;
}
static TP_Type make_from_format_string(int n_args) {
TP_Type result;
result.kind = PARTIAL_METHOD_TABLE_ACCESS;
result.kind = Kind::FORMAT_STRING;
result.m_int = n_args;
return result;
}
static TP_Type make_type_object(const std::string& name) {
static TP_Type make_from_typespec(const TypeSpec& ts) {
TP_Type result;
result.kind = TYPE_OBJECT;
result.ts = TypeSpec(name);
result.kind = Kind::TYPESPEC;
result.m_ts = ts;
return result;
}
static TP_Type make_from_string(const std::string& str) {
TP_Type result;
result.kind = Kind::STRING_CONSTANT;
result.m_str = str;
return result;
}
static TP_Type make_type_object(const TypeSpec& type) {
TP_Type result;
result.kind = Kind::TYPE_OF_TYPE_OR_CHILD;
result.m_ts = type;
return result;
}
static TP_Type make_false() {
TP_Type result;
result.kind = Kind::FALSE_AS_NULL;
return result;
}
static TP_Type make_uninitialized() {
TP_Type result;
result.kind = Kind::UNINITIALIZED;
return result;
}
static TP_Type make_from_integer(int64_t value) {
TP_Type result;
result.kind = Kind::INTEGER_CONSTANT;
result.m_int = value;
return result;
}
static TP_Type make_from_product(int64_t multiplier) {
TP_Type result;
result.kind = Kind::PRODUCT_WITH_CONSTANT;
result.m_int = multiplier;
return result;
}
static TP_Type make_partial_dyanmic_vtable_access() {
TP_Type result;
result.kind = Kind::DYNAMIC_METHOD_ACCESS;
return result;
}
static TP_Type make_object_new(const TypeSpec& ts) {
TP_Type result;
result.kind = Kind::OBJECT_NEW_METHOD;
result.m_ts = ts;
return result;
}
static TP_Type make_object_plus_product(const TypeSpec& ts, int64_t multiplier) {
TP_Type result;
result.kind = Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT;
result.m_ts = ts;
result.m_int = multiplier;
return result;
}
const TypeSpec& get_objects_typespec() const {
assert(kind == Kind::TYPESPEC);
return m_ts;
}
const TypeSpec& get_type_objects_typespec() const {
assert(kind == Kind::TYPE_OF_TYPE_OR_CHILD);
return m_ts;
}
const TypeSpec& get_method_new_object_typespec() const {
assert(kind == Kind::OBJECT_NEW_METHOD);
return m_ts;
}
const TypeSpec& get_obj_plus_const_mult_typespec() const {
assert(kind == Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT);
return m_ts;
}
uint64_t get_multiplier() const {
assert(kind == Kind::PRODUCT_WITH_CONSTANT || kind == Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT);
return m_int;
}
private:
TypeSpec m_ts;
std::string m_str;
int64_t m_int = 0;
};
struct TypeState {

2
doc/expressions_todo.txt Normal file
View File

@ -0,0 +1,2 @@
order of floating point argument evaluation is different
GPR -> FPR conversions should not happen silently

View File

@ -373,7 +373,7 @@
)
(deftype handle (uint64)
((process (pointer process) :offset 0) ;; todo, more specific type
((process (pointer process) :offset 0)
(pid int32 :offset 32)
(u64 uint64 :offset 0)
)