diff --git a/common/type_system/TypeSpec.h b/common/type_system/TypeSpec.h index cba693ca67..885ebe5771 100644 --- a/common/type_system/TypeSpec.h +++ b/common/type_system/TypeSpec.h @@ -51,6 +51,7 @@ class TypeSpec { size_t arg_count() const { return m_arguments.size(); } const TypeSpec& get_arg(int idx) const { return m_arguments.at(idx); } + TypeSpec& get_arg(int idx) { return m_arguments.at(idx); } const TypeSpec& last_arg() const { assert(!m_arguments.empty()); return m_arguments.back(); diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index e5c1ad5830..74fde36fd4 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -1095,6 +1095,11 @@ std::string TypeSystem::lca_base(const std::string& a, const std::string& b) { */ TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b) { auto result = make_typespec(lca_base(a.base_type(), b.base_type())); + if (result == TypeSpec("function") && a.m_arguments.size() == 2 && b.m_arguments.size() == 2 && + (a.m_arguments.at(0) == TypeSpec("_varargs_") || + b.m_arguments.at(0) == TypeSpec("_varargs_"))) { + return TypeSpec("function"); + } if (!a.m_arguments.empty() && !b.m_arguments.empty() && a.m_arguments.size() == b.m_arguments.size()) { // recursively add arguments @@ -1184,10 +1189,21 @@ bool TypeSystem::reverse_deref(const ReverseDerefInputInfo& input, assert(di.mem_deref); if (offset_into_elt == 0) { if (input.mem_deref) { - path->push_back(token); - *addr_of = false; - *result_type = base_type; - return true; + // todo - this is a hack to let quadword loads always succeed because we don't support it + // correctly at this point. + if (input.load_size == 16 || + (di.load_size == input.load_size && di.sign_extend == input.sign_extend)) { + path->push_back(token); + *addr_of = false; + *result_type = base_type; + return true; + } else { + if (debug_reverse_deref) { + fmt::print("load size {} {}, sext {} {}, input {}\n", di.load_size, input.load_size, + di.sign_extend, input.sign_extend, input.input_type.print().c_str()); + } + return false; + } } else { path->push_back(token); *addr_of = true; diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index fd0f1adbfb..e4efe9a57a 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -46,6 +46,16 @@ struct ReverseDerefInfo { enum Kind { INDEX, FIELD } kind; std::string name; int index; + std::string print() const { + switch (kind) { + case INDEX: + return std::to_string(index); + case FIELD: + return name; + default: + assert(false); + } + } }; TypeSpec result_type; diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 4573cba883..8dd17f2878 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -23,7 +23,11 @@ add_executable(decompiler data/game_count.cpp Function/TypeAnalysis.cpp IR/IR_TypeAnalysis.cpp - util/TP_Type.cpp) + util/TP_Type.cpp + Function/RegUsage.cpp + Function/ExpressionBuilder.cpp + Function/ExpressionStack.cpp + IR/IR_ExpressionStack.cpp) target_link_libraries(decompiler goos diff --git a/decompiler/Disasm/Register.cpp b/decompiler/Disasm/Register.cpp index 93ba6f1681..17ed886671 100644 --- a/decompiler/Disasm/Register.cpp +++ b/decompiler/Disasm/Register.cpp @@ -109,6 +109,26 @@ Register::Register(Reg::RegisterKind kind, uint32_t num) { } } +Register::Register(const std::string& name) { + // first try gprs, + for (int i = 0; i < Reg::MAX_GPR; i++) { + if (name == gpr_names[i]) { + id = (Reg::GPR << 8) | i; + return; + } + } + + // next fprs + for (int i = 0; i < 32; i++) { + if (name == fpr_names[i]) { + id = (Reg::FPR << 8) | i; + return; + } + } + + throw std::runtime_error("Unknown register name: " + name); +} + /*! * Convert to string. The register must be valid. */ diff --git a/decompiler/Disasm/Register.h b/decompiler/Disasm/Register.h index 2578397176..4f1ba1dc36 100644 --- a/decompiler/Disasm/Register.h +++ b/decompiler/Disasm/Register.h @@ -127,6 +127,7 @@ class Register { public: Register() = default; Register(Reg::RegisterKind kind, uint32_t num); + Register(const std::string& name); const char* to_charp() const; std::string to_string() const; Reg::RegisterKind get_kind() const; diff --git a/decompiler/Function/BasicBlocks.h b/decompiler/Function/BasicBlocks.h index df2c7bf04e..452e1833c8 100644 --- a/decompiler/Function/BasicBlocks.h +++ b/decompiler/Function/BasicBlocks.h @@ -10,13 +10,17 @@ class LinkedObjectFile; class Function; +using RegSet = std::unordered_set; + struct BasicBlock { int start_word; int end_word; TypeState init_types; + // [start, end) int start_basic_op = -1; int end_basic_op = -1; + int basic_op_size() const { return end_basic_op - start_basic_op; } std::string label_name; @@ -24,6 +28,15 @@ struct BasicBlock { int succ_ft = -1; int succ_branch = -1; + std::vector live, dead; + RegSet use, defs; + RegSet input, output; + + bool op_has_reg_live_out(int basic_op_idx, Register reg) { + auto& lv = live.at(basic_op_idx - start_basic_op); + return lv.find(reg) != lv.end(); + } + BasicBlock(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {} }; diff --git a/decompiler/Function/CfgVtx.cpp b/decompiler/Function/CfgVtx.cpp index 67ece7540b..b9fa854f55 100644 --- a/decompiler/Function/CfgVtx.cpp +++ b/decompiler/Function/CfgVtx.cpp @@ -1850,7 +1850,7 @@ std::shared_ptr build_cfg(const LinkedObjectFile& file, int se } if (!cfg->is_fully_resolved()) { - func.warnings += "Failed to fully resolve CFG\n"; + func.warnings += ";; Failed to fully resolve CFG\n"; } return cfg; diff --git a/decompiler/Function/ExpressionBuilder.cpp b/decompiler/Function/ExpressionBuilder.cpp new file mode 100644 index 0000000000..aa1f111309 --- /dev/null +++ b/decompiler/Function/ExpressionBuilder.cpp @@ -0,0 +1,53 @@ +#include "Function.h" +#include "decompiler/IR/IR.h" +#include "ExpressionStack.h" + +namespace { +bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) { + ExpressionStack stack; + // todo - this might need to run multiple times? + for (auto& op : begin->forms) { + op->expression_stack(stack, file); + } + begin->forms = stack.get_result(); + return true; +} +} // namespace + +bool Function::build_expression(LinkedObjectFile& file) { + if (!ir) { + return false; + } + + try { + // first we get a list of begins, which are where we can build up expressions. + // we want to start with innermost begins because we'll probably need to do some fixing up + // or more complicated analysis to do as good as possible on outer begins. + auto all_children = ir->get_all_ir(file); + std::vector all_begins; + for (auto i = all_children.size(); i-- > 0;) { + auto as_begin = dynamic_cast(all_children.at(i).get()); + if (as_begin) { + all_begins.push_back(as_begin); + } + } + + // the top level may also be a begin + auto as_begin = dynamic_cast(ir.get()); + if (as_begin) { + all_begins.push_back(as_begin); + } + + // turn each begin into an expression + for (auto b : all_begins) { + if (!expressionize_begin(b, file)) { + return false; + } + } + } catch (std::exception& e) { + printf("build_expression failed on %s due to %s\n", guessed_name.to_string().c_str(), e.what()); + return false; + } + + return true; +} \ No newline at end of file diff --git a/decompiler/Function/ExpressionStack.cpp b/decompiler/Function/ExpressionStack.cpp new file mode 100644 index 0000000000..7e2bbca020 --- /dev/null +++ b/decompiler/Function/ExpressionStack.cpp @@ -0,0 +1,80 @@ +#include "third-party/fmt/core.h" +#include "ExpressionStack.h" + +std::string ExpressionStack::StackEntry::print(LinkedObjectFile& file) { + return fmt::format("d: {} {} <- {}", display, destination.to_charp(), source->print(file)); +} + +std::string ExpressionStack::print(LinkedObjectFile& file) { + std::string result; + for (auto& x : m_stack) { + result += x.print(file); + result += '\n'; + } + return result; +} + +void ExpressionStack::set(Register reg, std::shared_ptr value) { + StackEntry entry; + entry.display = true; // by default, we should display everything! + entry.destination = reg; + entry.source = std::move(value); + m_stack.push_back(entry); +} + +bool ExpressionStack::is_single_expression() { + int count = 0; + for (auto& e : m_stack) { + if (e.display) { + count++; + } + } + return count == 1; +} + +std::shared_ptr ExpressionStack::get(Register reg) { + // see if the stack top is this register... + if (!display_stack_empty()) { + auto& top = get_display_stack_top(); + if (top.destination == reg) { + // yep. We can compact! + top.display = false; + return top.source; + } + } + return std::make_shared(reg, -1); +} + +std::vector> ExpressionStack::get_result() { + std::vector> result; + + for (auto& e : m_stack) { + if (!e.display) { + continue; + } + auto dst_reg = std::make_shared(e.destination, -1); + auto op = std::make_shared(IR_Set::EXPR, dst_reg, e.source); + result.push_back(op); + } + + return result; +} + +bool ExpressionStack::display_stack_empty() { + for (auto& e : m_stack) { + if (e.display) { + return false; + } + } + return true; +} + +ExpressionStack::StackEntry& ExpressionStack::get_display_stack_top() { + for (size_t i = m_stack.size(); i-- > 0;) { + auto& entry = m_stack.at(i); + if (entry.display) { + return entry; + } + } + assert(false); +} \ No newline at end of file diff --git a/decompiler/Function/ExpressionStack.h b/decompiler/Function/ExpressionStack.h new file mode 100644 index 0000000000..d5deeb39a6 --- /dev/null +++ b/decompiler/Function/ExpressionStack.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include "decompiler/IR/IR.h" +#include "decompiler/Disasm/Register.h" +#include "decompiler/util/TP_Type.h" + +class ExpressionStack { + public: + ExpressionStack() = default; + void set(Register reg, std::shared_ptr value); + std::shared_ptr get(Register reg); + bool is_single_expression(); + std::string print(LinkedObjectFile& file); + std::vector> get_result(); + + private: + struct StackEntry { + bool display = true; // should this appear in the output? + Register destination; // what register we are setting + std::shared_ptr source; // the value we are setting the register to. + // TP_Type type; + std::string print(LinkedObjectFile& file); + }; + std::vector m_stack; + + bool display_stack_empty(); + StackEntry& get_display_stack_top(); +}; \ No newline at end of file diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index fdec77d5ef..cf38418282 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -71,7 +71,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { if (instr.kind == InstructionKind::SW && instr.get_src(0).get_reg() == make_gpr(Reg::SP)) { printf("[Warning] %s Suspected ASM function based on this instruction in prologue: %s\n", guessed_name.to_string().c_str(), instr.to_string(file).c_str()); - warnings += "Flagged as ASM function because of " + instr.to_string(file) + "\n"; + warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n"; suspected_asm = true; return; } @@ -94,7 +94,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { if (instr.kind == InstructionKind::SD && instr.get_src(0).get_reg() == make_gpr(Reg::S7)) { spdlog::warn("{} Suspected ASM function based on this instruction in prologue: {}\n", guessed_name.to_string(), instr.to_string(file)); - warnings += "Flagged as ASM function because of " + instr.to_string(file) + "\n"; + warnings += ";; Flagged as ASM function because of " + instr.to_string(file) + "\n"; suspected_asm = true; return; } @@ -134,7 +134,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { "[Warning] %s Stack Zeroing Detected in Function::analyze_prologue, prologue may be " "wrong\n", guessed_name.to_string().c_str()); - warnings += "Stack Zeroing Detected, prologue may be wrong\n"; + warnings += ";; Stack Zeroing Detected, prologue may be wrong\n"; expect_nothing_after_gprs = true; break; } @@ -146,7 +146,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { printf( "[Warning] %s Suspected ASM function because register $a0 was stored on the stack!\n", guessed_name.to_string().c_str()); - warnings += "a0 on stack detected, flagging as asm\n"; + warnings += ";; a0 on stack detected, flagging as asm\n"; return; } @@ -165,7 +165,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n", guessed_name.to_string().c_str(), instructions.at(idx + i).to_string(file).c_str()); - warnings += "Suspected asm function due to stack store: " + + warnings += ";; Suspected asm function due to stack store: " + instructions.at(idx + i).to_string(file) + "\n"; return; } @@ -195,7 +195,7 @@ void Function::analyze_prologue(const LinkedObjectFile& file) { printf("[Warning] %s Suspected asm function that isn't flagged due to stack store %s\n", guessed_name.to_string().c_str(), instructions.at(idx + i).to_string(file).c_str()); - warnings += "Suspected asm function due to stack store: " + + warnings += ";; Suspected asm function due to stack store: " + instructions.at(idx + i).to_string(file) + "\n"; return; } @@ -356,7 +356,7 @@ void Function::check_epilogue(const LinkedObjectFile& file) { "[Warning] %s Double Return Epilogue Hack! This is probably an ASM function in " "disguise\n", guessed_name.to_string().c_str()); - warnings += "Double Return Epilogue - this is probably an ASM function\n"; + warnings += ";; Double Return Epilogue - this is probably an ASM function\n"; } // delay slot should be daddiu sp, sp, offset assert(is_gpr_2_imm_int(instructions.at(idx), InstructionKind::DADDIU, make_gpr(Reg::SP), diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 34e1f47bcb..b1fd56ed58 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -7,10 +7,13 @@ #include #include #include +#include #include "decompiler/Disasm/Instruction.h" +#include "decompiler/Disasm/Register.h" #include "BasicBlocks.h" #include "CfgVtx.h" #include "common/type_system/TypeSpec.h" +#include "decompiler/config.h" class DecompilerTypeSystem; class IR_Atomic; @@ -79,7 +82,10 @@ class Function { int get_reginfo_basic_op_count(); bool run_type_analysis(const TypeSpec& my_type, DecompilerTypeSystem& dts, - LinkedObjectFile& file); + LinkedObjectFile& file, + const std::unordered_map>& hints); + void run_reg_usage(); + bool build_expression(LinkedObjectFile& file); BlockTopologicalSort bb_topo_sort(); TypeSpec type; diff --git a/decompiler/Function/RegUsage.cpp b/decompiler/Function/RegUsage.cpp new file mode 100644 index 0000000000..156590d92a --- /dev/null +++ b/decompiler/Function/RegUsage.cpp @@ -0,0 +1,165 @@ +#include "Function.h" +#include "decompiler/IR/IR.h" + +namespace { +bool in_set(RegSet& set, const Register& obj) { + return set.find(obj) != set.end(); +} + +void phase1(Function& f, BasicBlock& block) { + for (int i = block.end_basic_op; i-- > block.start_basic_op;) { + auto& instr = f.basic_ops.at(i); + auto& lv = block.live.at(i - block.start_basic_op); + auto& dd = block.dead.at(i - block.start_basic_op); + + // make all read live out + auto read = instr->read_regs; + lv.clear(); + for (auto& x : read) { + lv.insert(x); + } + + // kill things which are overwritten + dd.clear(); + auto write = instr->write_regs; + for (auto& x : write) { + if (!in_set(lv, x)) { + dd.insert(x); + } + } + + // b.use = i.liveout + RegSet use_old = block.use; + block.use.clear(); + for (auto& x : lv) { + block.use.insert(x); + } + // | (bu.use & !i.dead) + for (auto& x : use_old) { + if (!in_set(dd, x)) { + block.use.insert(x); + } + } + + // b.defs = i.dead + RegSet defs_old = block.defs; + block.defs.clear(); + for (auto& x : dd) { + block.defs.insert(x); + } + // | b.defs & !i.lv + for (auto& x : defs_old) { + if (!in_set(lv, x)) { + block.defs.insert(x); + } + } + } +} + +bool phase2(std::vector& blocks, BasicBlock& block) { + bool changed = false; + auto out = block.defs; + + for (auto s : {block.succ_branch, block.succ_ft}) { + if (s == -1) { + continue; + } + for (auto in : blocks.at(s).input) { + out.insert(in); + } + } + + RegSet in = block.use; + for (auto x : out) { + if (!in_set(block.defs, x)) { + in.insert(x); + } + } + + if (in != block.input || out != block.output) { + changed = true; + block.input = in; + block.output = out; + } + + return changed; +} + +void phase3(std::vector& blocks, BasicBlock& block) { + RegSet live_local; + for (auto s : {block.succ_branch, block.succ_ft}) { + if (s == -1) { + continue; + } + for (auto i : blocks.at(s).input) { + live_local.insert(i); + } + } + + for (int i = block.end_basic_op; i-- > block.start_basic_op;) { + auto& lv = block.live.at(i - block.start_basic_op); + auto& dd = block.dead.at(i - block.start_basic_op); + + RegSet new_live = lv; + for (auto x : live_local) { + if (!in_set(dd, x)) { + new_live.insert(x); + } + } + lv = live_local; + live_local = new_live; + } +} + +} // namespace +/*! + * Analyze the function use of registers to determine which are live where. + */ +void Function::run_reg_usage() { + // phase 1 + for (auto& block : basic_blocks) { + block.live.resize(block.basic_op_size()); + block.dead.resize(block.basic_op_size()); + phase1(*this, block); + } + + // phase 2 + bool changed = false; + do { + changed = false; + for (auto& block : basic_blocks) { + if (phase2(basic_blocks, block)) { + changed = true; + } + } + } while (changed); + + // phase 3 + for (auto& block : basic_blocks) { + phase3(basic_blocks, block); + } + + // we want to know if an op "consumes" a register. + // this means that the value of the register coming in to the operation is: + // A. read by the operation. + // B. no longer read after the operation. + for (auto& block : basic_blocks) { + for (int i = block.start_basic_op; i < block.end_basic_op; i++) { + auto& op = basic_ops.at(i); + // look at each register that we read + for (auto reg : op->read_regs) { + if (!block.op_has_reg_live_out(i, reg)) { + // if the register is not live out, we definitely consume it. + op->consumed.insert(reg); + } else { + // it's live out... but it could be a new value. + for (auto wr : op->write_regs) { + if (wr == reg) { + op->consumed.insert(reg); + } + } + } + } + } + } +} \ No newline at end of file diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index 5f49a2aac0..585cc81ebf 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -9,20 +9,41 @@ TypeState construct_initial_typestate(const TypeSpec& f_ts) { int goal_args[] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::T0, Reg::T1, Reg::T2, Reg::T3}; assert(f_ts.base_type() == "function"); assert(f_ts.arg_count() >= 1); - assert(f_ts.arg_count() <= 8); + assert(f_ts.arg_count() <= 8 + 1); // 8 args + 1 return. for (int i = 0; i < int(f_ts.arg_count()) - 1; i++) { auto reg_id = goal_args[i]; auto reg_type = f_ts.get_arg(i); - result.gpr_types[reg_id].ts = reg_type; - result.gpr_types[reg_id].kind = TP_Type::OBJECT_OF_TYPE; + result.gpr_types[reg_id] = TP_Type::make_from_typespec(reg_type); } return result; } + +void apply_hints(const std::vector& hints, TypeState* state, DecompilerTypeSystem& dts) { + for (auto& hint : hints) { + try { + state->get(hint.reg) = TP_Type::make_from_typespec(dts.parse_type_spec(hint.type_name)); + } catch (std::exception& e) { + printf("failed to parse hint: %s\n", e.what()); + assert(false); + } + } +} + +void try_apply_hints(int idx, + const std::unordered_map>& hints, + TypeState* state, + DecompilerTypeSystem& dts) { + auto kv = hints.find(idx); + if (kv != hints.end()) { + apply_hints(kv->second, state, dts); + } +} } // namespace bool Function::run_type_analysis(const TypeSpec& my_type, DecompilerTypeSystem& dts, - LinkedObjectFile& file) { + LinkedObjectFile& file, + const std::unordered_map>& hints) { // STEP 0 - setup settings dts.type_prop_settings.reset(); if (get_config().pair_functions_by_name.find(guessed_name.to_string()) != @@ -48,6 +69,8 @@ bool Function::run_type_analysis(const TypeSpec& my_type, // STEP 3 - initialize type state. basic_blocks.at(0).init_types = construct_initial_typestate(my_type); + // and add hints: + try_apply_hints(0, hints, &basic_blocks.at(0).init_types, dts); // STEP 2 - loop while types are changing bool run_again = true; @@ -60,13 +83,18 @@ bool Function::run_type_analysis(const TypeSpec& my_type, for (int op_id = block.start_basic_op; op_id < block.end_basic_op; op_id++) { auto& op = basic_ops.at(op_id); + // apply type hints only if we are not the first op. + if (op_id != block.start_basic_op) { + try_apply_hints(op_id, hints, init_types, dts); + } + // while the implementation of propagate_types is in progress, it may throw // for unimplemented cases. Eventually this try/catch should be removed. try { op->propagate_types(*init_types, file, dts); } catch (std::runtime_error& e) { - fmt::print("Type prop fail: {}\n\n\n", e.what()); - warnings += "Type prop attempted and failed. "; + fmt::print("Type prop fail on {}: {}\n", guessed_name.to_string(), e.what()); + warnings += ";; Type prop attempted and failed.\n"; return false; } @@ -80,6 +108,9 @@ bool Function::run_type_analysis(const TypeSpec& my_type, for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { if (succ_block_id != -1) { auto& succ_block = basic_blocks.at(succ_block_id); + // apply hint + try_apply_hints(succ_block.start_basic_op, hints, init_types, dts); + // set types to LCA (current, new) if (dts.tp_lca(&succ_block.init_types, *init_types)) { // if something changed, run again! @@ -91,9 +122,9 @@ bool Function::run_type_analysis(const TypeSpec& my_type, } auto last_op = basic_ops.back(); - auto last_type = last_op->end_types.get(Register(Reg::GPR, Reg::V0)).as_typespec(); + auto last_type = last_op->end_types.get(Register(Reg::GPR, Reg::V0)).typespec(); if (last_type != my_type.last_arg()) { - warnings += fmt::format("return type mismatch {} vs {}. ", last_type.print(), + warnings += fmt::format(";; return type mismatch {} vs {}. ", last_type.print(), my_type.last_arg().print()); } diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index f0115af005..d315bbe9c0 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -1403,6 +1403,12 @@ std::shared_ptr try_beq(Instruction& instr, Instruction& next_instr, instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), false); op->update_reginfo_self(0, 1, 0); return op; + } else if (instr.kind == InstructionKind::BEQ && instr.get_src(1).is_reg(make_gpr(Reg::R0))) { + auto op = std::make_shared( + Condition(Condition::ZERO, make_reg(instr.get_src(0).get_reg(), idx), nullptr, nullptr), + instr.get_src(2).get_label(), get_branch_delay(next_instr, idx), false); + op->update_reginfo_self(0, 1, 0); + return op; } else if (instr.kind == InstructionKind::BEQ) { auto op = std::make_shared( Condition(Condition::EQUAL, make_reg(instr.get_src(0).get_reg(), idx), @@ -1532,7 +1538,7 @@ std::shared_ptr try_slt(Instruction& i0, Instruction& i1, int idx) { result->clobber_regs.push_back(temp); result->write_regs.push_back(left); result->read_regs.push_back(right); - result->read_regs.push_back(right); + result->read_regs.push_back(left); result->reg_info_set = true; return result; } @@ -1547,7 +1553,7 @@ std::shared_ptr try_slt(Instruction& i0, Instruction& i1, int idx) { result->clobber_regs.push_back(temp); result->write_regs.push_back(left); result->read_regs.push_back(right); - result->read_regs.push_back(right); + result->read_regs.push_back(left); result->reg_info_set = true; return result; } @@ -1646,12 +1652,22 @@ std::shared_ptr try_slt(Instruction& i0, Instruction& i1, Instruction if (i2.get_src(1).get_reg() != clobber_reg) { return nullptr; // TODO! } - auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx), - std::make_shared( - Condition(Condition::LESS_THAN_SIGNED, make_reg(src0_reg, idx), - make_reg(src1_reg, idx), make_reg(clobber_reg, idx)))); - op->update_reginfo_self(1, 2, 1); - return op; + if (src1_reg == make_gpr(Reg::R0)) { + auto op = make_set_atomic( + IR_Set_Atomic::REG_64, make_reg(dst_reg, idx), + std::make_shared(Condition(Condition::LESS_THAN_ZERO, make_reg(src0_reg, idx), + nullptr, make_reg(clobber_reg, idx)))); + op->update_reginfo_self(1, 1, 1); + return op; + } else { + auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx), + std::make_shared(Condition( + Condition::LESS_THAN_SIGNED, make_reg(src0_reg, idx), + make_reg(src1_reg, idx), make_reg(clobber_reg, idx)))); + op->update_reginfo_self(1, 2, 1); + return op; + } + } else if (i0.kind == InstructionKind::SLT && i1.kind == InstructionKind::BEQ) { auto clobber_reg = i0.get_dst(0).get_reg(); auto src0_reg = i0.get_src(0).get_reg(); @@ -2435,7 +2451,7 @@ void add_basic_ops_to_block(Function* func, const BasicBlock& block, LinkedObjec func->add_basic_op(std::make_shared(), instr, instr + 1); } else { if (!func->contains_asm_ops && dynamic_cast(result.get())) { - func->warnings += "Function contains asm op"; + func->warnings += ";; Function contains asm op\n"; func->contains_asm_ops = true; } diff --git a/decompiler/IR/CfgBuilder.cpp b/decompiler/IR/CfgBuilder.cpp index b00abefeb2..59ecdaf6e4 100644 --- a/decompiler/IR/CfgBuilder.cpp +++ b/decompiler/IR/CfgBuilder.cpp @@ -623,7 +623,8 @@ std::shared_ptr try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCirc auto b0_ptr = cfg_to_ir(f, file, b0); auto b0_ir = dynamic_cast(b0_ptr.get()); - auto branch = dynamic_cast(b0_ir->forms.back().get()); + auto branch_sp = b0_ir->forms.back(); + auto branch = dynamic_cast(branch_sp.get()); if (!branch) { return nullptr; } @@ -647,7 +648,10 @@ std::shared_ptr try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCirc b0_ir->forms.pop_back(); // add the ash b0_ir->forms.push_back(std::make_shared( - IR_Set::REG_64, output, std::make_shared(IR_IntMath1::ABS, input))); + IR_Set::REG_64, output, + std::make_shared(IR_IntMath1::ABS, input, + std::dynamic_pointer_cast(branch_sp)))); + return b0_ptr; } @@ -682,7 +686,8 @@ std::shared_ptr try_sc_as_ash(Function& f, LinkedObjectFile& file, ShortCirc return nullptr; } - auto branch = dynamic_cast(b0_ir->forms.back().get()); + auto branch_sp = b0_ir->forms.back(); + auto branch = dynamic_cast(branch_sp.get()); if (!branch || b1_ir->forms.size() != 2) { return nullptr; } @@ -752,7 +757,10 @@ std::shared_ptr try_sc_as_ash(Function& f, LinkedObjectFile& file, ShortCirc // add the ash b0_ir->forms.push_back(std::make_shared( IR_Set::REG_64, dest_ir, - std::make_shared(shift_ir, value_ir, clobber_ir, is_arith))); + std::make_shared(shift_ir, value_ir, clobber_ir, + std::dynamic_pointer_cast(branch_sp), + std::dynamic_pointer_cast(dsubu_candidate), + std::dynamic_pointer_cast(dsrav_candidate), is_arith))); return b0_ptr; } @@ -1145,7 +1153,6 @@ std::shared_ptr build_cfg_ir(Function& function, auto all_children = ir->get_all_ir(file); all_children.push_back(ir); for (auto& child : all_children) { - // printf("child is %s\n", child->print(file).c_str()); auto as_begin = dynamic_cast(child.get()); if (as_begin) { clean_up_while_loops(as_begin, file); diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index bbb1df8831..a8b22b8da4 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -3,6 +3,9 @@ #include "common/goos/PrettyPrinter.h" #include "third-party/fmt/core.h" +// hack to print out reverse deref paths on loads to help with debugging load stuff. +bool enable_hack_load_path_print = false; + std::vector> IR::get_all_ir(LinkedObjectFile& file) const { (void)file; std::vector> result; @@ -94,6 +97,14 @@ std::string IR_Atomic::print_with_types(const TypeState& init_types, result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask), end_types.print_gpr_masked(write_mask)); + + if (!consumed.empty()) { + result += "c:"; + for (auto x : consumed) { + result += " "; + result += x.to_charp(); + } + } return result; } @@ -377,15 +388,6 @@ void IR_EmptyPair::get_children(std::vector>* output) const (void)output; } -TP_Type IR_EmptyPair::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - return TP_Type(TypeSpec("pair")); -} - goos::Object IR_StaticAddress::to_form(const LinkedObjectFile& file) const { // return pretty_print::build_list(pretty_print::to_symbol("&"), file.get_label_name(label_id)); return pretty_print::to_symbol(file.get_label_name(label_id)); @@ -396,6 +398,19 @@ void IR_StaticAddress::get_children(std::vector>* output) co } goos::Object IR_Load::to_form(const LinkedObjectFile& file) const { + if (load_path_set && enable_hack_load_path_print) { + std::vector list; + if (load_path_addr_of) { + list.push_back(pretty_print::to_symbol("&->")); + } else { + list.push_back(pretty_print::to_symbol("->")); + } + list.push_back(load_path_base->to_form(file)); + for (auto& x : load_path) { + list.push_back(pretty_print::to_symbol(x)); + } + return pretty_print::build_list(list); + } std::string load_operator; switch (kind) { case FLOAT: @@ -599,7 +614,18 @@ goos::Object IR_FloatMath1::to_form(const LinkedObjectFile& file) const { goos::Object IR_Call::to_form(const LinkedObjectFile& file) const { (void)file; - return pretty_print::build_list("call!"); + std::vector result; + result.push_back(pretty_print::to_symbol("call!")); + + if (call_type_set) { + result.push_back(pretty_print::to_symbol(":arg-count")); + result.push_back(pretty_print::to_symbol(std::to_string(call_type.arg_count() - 1))); + } + + for (auto& x : args) { + result.push_back(x->to_form(file)); + } + return pretty_print::build_list(result); } void IR_Call::get_children(std::vector>* output) const { diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index d10a2a2b9b..e81a6b13c3 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "decompiler/Disasm/Register.h" #include "common/type_system/TypeSpec.h" #include "decompiler/util/DecompilerTypeSystem.h" @@ -12,6 +13,7 @@ class LinkedObjectFile; class DecompilerTypeSystem; +class ExpressionStack; namespace goos { class Object; @@ -27,12 +29,33 @@ class IR { virtual TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts); + + // update the expression stack + virtual bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { + (void)stack; + (void)file; + throw std::runtime_error("expression_stack NYI for " + print(file)); + } + + // update myself to use consumed registers from the stack. + virtual bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + (void)consume; + (void)stack; + throw std::runtime_error("update_from_stack NYI for " + print(file)); + } + + virtual std::unordered_set get_consumed(LinkedObjectFile& file) { + throw std::runtime_error("get_consumed NYI for " + print(file)); + } virtual ~IR() = default; }; class IR_Atomic : public virtual IR { public: std::vector read_regs, write_regs, clobber_regs; + std::unordered_set consumed; bool reg_info_set = false; TypeState end_types; // types at the end of this instruction @@ -81,12 +104,15 @@ class IR_Set : public virtual IR { FPR_TO_GPR64, GPR_TO_FPR, REG_FLT, - REG_I128 + REG_I128, + EXPR } kind; IR_Set(Kind _kind, std::shared_ptr _dst, std::shared_ptr _src) : kind(_kind), dst(std::move(_dst)), src(std::move(_src)) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; + std::shared_ptr dst, src; std::shared_ptr clobber = nullptr; }; @@ -103,6 +129,7 @@ class IR_Set_Atomic : public IR_Set, public IR_Atomic { void propagate_types(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; }; class IR_IntMath2; @@ -144,6 +171,14 @@ class IR_Symbol : public virtual IR { TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override { + (void)consume; + (void)stack; + (void)file; + return true; + } }; class IR_SymbolValue : public virtual IR { @@ -155,6 +190,14 @@ class IR_SymbolValue : public virtual IR { TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override { + (void)consume; + (void)stack; + (void)file; + return true; + } }; class IR_EmptyPair : public virtual IR { @@ -176,6 +219,9 @@ class IR_StaticAddress : public virtual IR { TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override; }; class IR_Load : public virtual IR { @@ -191,6 +237,22 @@ class IR_Load : public virtual IR { TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override; + + // this load_path stuff is just for debugging and shouldn't be used as part of the real + // decompilation. + void clear_load_path() { + load_path_set = false; + load_path_addr_of = false; + load_path.clear(); + load_path_base = nullptr; + } + std::shared_ptr load_path_base = nullptr; + bool load_path_set = false; + bool load_path_addr_of = false; + std::vector load_path; }; class IR_FloatMath2 : public virtual IR { @@ -204,6 +266,9 @@ class IR_FloatMath2 : public virtual IR { TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override; }; class IR_FloatMath1 : public virtual IR { @@ -213,9 +278,9 @@ class IR_FloatMath1 : public virtual IR { std::shared_ptr arg; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - TP_Type get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; + // TP_Type get_expression_type(const TypeState& input, + // const LinkedObjectFile& file, + // DecompilerTypeSystem& dts) override; }; class IR_IntMath2 : public virtual IR { @@ -247,18 +312,30 @@ class IR_IntMath2 : public virtual IR { TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override; }; class IR_IntMath1 : public virtual IR { public: enum Kind { NOT, ABS, NEG } kind; IR_IntMath1(Kind _kind, std::shared_ptr _arg) : kind(_kind), arg(std::move(_arg)) {} + IR_IntMath1(Kind _kind, std::shared_ptr _arg, std::shared_ptr _abs_op) + : kind(_kind), arg(std::move(_arg)), abs_op(std::move(_abs_op)) { + assert(abs_op); + } std::shared_ptr arg; + std::shared_ptr abs_op = nullptr; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; TP_Type get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + std::unordered_set get_consumed(LinkedObjectFile& file) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override; }; class IR_Call : public virtual IR { @@ -266,6 +343,9 @@ class IR_Call : public virtual IR { IR_Call() = default; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + std::vector> args; + TypeSpec call_type; + bool call_type_set = false; }; // todo @@ -275,6 +355,7 @@ class IR_Call_Atomic : public virtual IR_Call, public IR_Atomic { void propagate_types(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) override; + bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override; }; class IR_IntegerConstant : public virtual IR { @@ -436,9 +517,9 @@ class IR_Breakpoint_Atomic : public virtual IR_Atomic { IR_Breakpoint_Atomic() = default; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; + // void propagate_types(const TypeState& input, + // const LinkedObjectFile& file, + // DecompilerTypeSystem& dts) override; }; class IR_Begin : public virtual IR { @@ -530,17 +611,32 @@ class IR_ShortCircuit : public virtual IR { class IR_Ash : public virtual IR { public: std::shared_ptr shift_amount, value, clobber; + std::shared_ptr branch_op, sub_op, shift_op; bool is_signed = true; IR_Ash(std::shared_ptr _shift_amount, std::shared_ptr _value, std::shared_ptr _clobber, + std::shared_ptr _branch_op, + std::shared_ptr _sub_op, + std::shared_ptr _shift_op, bool _is_signed) : shift_amount(std::move(_shift_amount)), value(std::move(_value)), clobber(std::move(_clobber)), - is_signed(_is_signed) {} + branch_op(std::move(_branch_op)), + sub_op(std::move(_sub_op)), + shift_op(std::move(_shift_op)), + is_signed(_is_signed) { + assert(sub_op); + assert(shift_op); + assert(branch_op); + } goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + std::unordered_set get_consumed(LinkedObjectFile& file) override; + bool update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) override; }; class IR_AsmOp : public virtual IR { @@ -559,9 +655,9 @@ class IR_AsmOp_Atomic : public virtual IR_AsmOp, public IR_Atomic { public: IR_AsmOp_Atomic(std::string _name) : IR_AsmOp(std::move(_name)) {} void set_reg_info(); - void propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) override; + // void propagate_types(const TypeState& input, + // const LinkedObjectFile& file, + // DecompilerTypeSystem& dts) override; }; class IR_CMoveF : public virtual IR { diff --git a/decompiler/IR/IR_ExpressionStack.cpp b/decompiler/IR/IR_ExpressionStack.cpp new file mode 100644 index 0000000000..52883a02be --- /dev/null +++ b/decompiler/IR/IR_ExpressionStack.cpp @@ -0,0 +1,224 @@ +#include +#include "IR.h" +#include "decompiler/Function/ExpressionStack.h" + +bool IR_Set_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { + // first determine the type of the set. + switch (kind) { + case IR_Set::REG_64: + case IR_Set::LOAD: + case IR_Set::GPR_TO_FPR: // TODO - this should probably not be invisible. + case IR_Set::FPR_TO_GPR64: + case IR_Set::REG_FLT: + case IR_Set::SYM_LOAD: { + // normal 64-bit GPR set! + // first, we update our source to substitute in more complicated expressions. + auto src_as_reg = dynamic_cast(src.get()); + if (src_as_reg) { + // an annoying special case. + if (consumed.find(src_as_reg->reg) != consumed.end()) { + // we consume it. + src = stack.get(src_as_reg->reg); + } + } else { + src->update_from_stack(consumed, stack, file); + } + + // next, we tell the stack the value of the register we just set + auto dest_reg = dynamic_cast(dst.get()); + assert(dest_reg); + stack.set(dest_reg->reg, src); + return true; + } + + break; + default: + throw std::runtime_error("IR_Set_Atomic::expression_stack NYI for " + print(file)); + } +} + +bool IR_Set::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { + // first determine the type of the set. + switch (kind) { + case IR_Set::REG_64: + case IR_Set::LOAD: + case IR_Set::GPR_TO_FPR: // TODO - this should probably not be invisible. + case IR_Set::FPR_TO_GPR64: + case IR_Set::REG_FLT: { + // normal 64-bit GPR set! + // first, we update our source to substitute in more complicated expressions. + auto consumed = src->get_consumed(file); + auto src_as_reg = dynamic_cast(src.get()); + if (src_as_reg) { + // an annoying special case. + if (consumed.find(src_as_reg->reg) != consumed.end()) { + // we consume it. + src = stack.get(src_as_reg->reg); + } + } else { + src->update_from_stack(consumed, stack, file); + } + + // next, we tell the stack the value of the register we just set + auto dest_reg = dynamic_cast(dst.get()); + assert(dest_reg); + stack.set(dest_reg->reg, src); + return true; + } + + break; + default: + throw std::runtime_error("IR_Set_Atomic::expression_stack NYI for " + print(file)); + } +} + +bool IR_Call_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) { + (void)file; + if (!call_type_set) { + throw std::runtime_error("Call type is unknown on an IR_Call_Atomic"); + } + + const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, + Reg::T0, Reg::T1, Reg::T2, Reg::T3}; + int nargs = int(call_type.arg_count()) - 1; + // get all arguments. + for (int i = nargs; i-- > 0;) { + args.push_back(stack.get(Register(Reg::GPR, arg_regs[i]))); + } + args.push_back(stack.get(Register(Reg::GPR, Reg::T9))); + std::reverse(args.begin(), args.end()); + + auto return_type = call_type.get_arg(call_type.arg_count() - 1); + // bleh... + stack.set(Register(Reg::GPR, Reg::V0), std::make_shared(*this)); + + return true; +} + +namespace { +void update_from_stack_helper(std::shared_ptr* ir, + const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + auto as_reg = dynamic_cast(ir->get()); + if (as_reg) { + if (consume.find(as_reg->reg) != consume.end()) { + *ir = stack.get(as_reg->reg); + } + } else { + (*ir)->update_from_stack(consume, stack, file); + } +} +} // namespace + +bool IR_Load::update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + update_from_stack_helper(&location, consume, stack, file); + return true; +} + +bool IR_StaticAddress::update_from_stack( + const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + (void)consume; + (void)stack; + (void)file; + return true; +} + +bool IR_FloatMath2::update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + if (kind == DIV) { + for (auto reg : {&arg1, &arg0}) { + auto as_reg = dynamic_cast(reg->get()); + if (as_reg) { + if (consume.find(as_reg->reg) != consume.end()) { + *reg = stack.get(as_reg->reg); + } + } else { + (*reg)->update_from_stack(consume, stack, file); + } + } + } else { + for (auto reg : {&arg0, &arg1}) { + auto as_reg = dynamic_cast(reg->get()); + if (as_reg) { + if (consume.find(as_reg->reg) != consume.end()) { + *reg = stack.get(as_reg->reg); + } + } else { + (*reg)->update_from_stack(consume, stack, file); + } + } + } + + return true; +} + +bool IR_IntMath2::update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + for (auto reg : {&arg1, &arg0}) { + auto as_reg = dynamic_cast(reg->get()); + if (as_reg) { + if (consume.find(as_reg->reg) != consume.end()) { + *reg = stack.get(as_reg->reg); + } + } else { + (*reg)->update_from_stack(consume, stack, file); + } + } + return true; +} + +std::unordered_set IR_Ash::get_consumed(LinkedObjectFile& file) { + (void)file; + // first get the set of read registers... + auto value_as_reg = dynamic_cast(value.get()); + auto sa_as_reg = dynamic_cast(shift_amount.get()); + if (!sa_as_reg || !value_as_reg) { + // consume nobody. + // todo - is this actually right? If not, this is "safe", but might lead to ugly code. + return {}; + } + + std::unordered_set result; + + for (auto& op : {branch_op, sub_op, shift_op}) { + for (auto& reg : {value_as_reg->reg, sa_as_reg->reg}) { + if (op->consumed.find(reg) != op->consumed.end()) { + result.insert(reg); + } + } + } + + return result; +} + +bool IR_Ash::update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + for (auto x : {&value, &shift_amount}) { + update_from_stack_helper(x, consume, stack, file); + } + return true; +} + +std::unordered_set IR_IntMath1::get_consumed(LinkedObjectFile& file) { + if (kind == ABS) { + assert(abs_op); + return abs_op->consumed; + } else { + throw std::runtime_error("IR_IntMath1::get_consumed NYI for " + print(file)); + } +} + +bool IR_IntMath1::update_from_stack(const std::unordered_set& consume, + ExpressionStack& stack, + LinkedObjectFile& file) { + update_from_stack_helper(&arg, consume, stack, file); + return true; +} \ No newline at end of file diff --git a/decompiler/IR/IR_TypeAnalysis.cpp b/decompiler/IR/IR_TypeAnalysis.cpp index 2819fa880b..d085e683b3 100644 --- a/decompiler/IR/IR_TypeAnalysis.cpp +++ b/decompiler/IR/IR_TypeAnalysis.cpp @@ -6,28 +6,39 @@ #include "decompiler/ObjectFile/LinkedObjectFile.h" namespace { -bool is_plain_type(const TP_Type& type, const TypeSpec& ts) { - return type.as_typespec() == ts; +// bool is_plain_type(const TP_Type& type, const TypeSpec& ts) { +// return type.as_typespec() == ts; +//} +// +// bool is_integer_type(const TP_Type& type) { +// return is_plain_type(type, TypeSpec("int")) || is_plain_type(type, TypeSpec("uint")); +//} +// +///*! +// * If first arg is unsigned, make the result unsigned. +// * Otherwise signed. This is the default GOAL behavior I guess. +// * This strips away any fancy stuff like [uint x 4] +// */ +// TP_Type get_int_type(const TP_Type& one) { +// if (is_plain_type(one, TypeSpec("uint"))) { +// return TP_Type(one.as_typespec()); +// } else { +// return TP_Type(TypeSpec("int")); +// } +//} +// + +bool tc(DecompilerTypeSystem& dts, const TypeSpec& expected, const TP_Type& actual) { + return dts.ts.typecheck(expected, actual.typespec(), "", false, false); } -bool is_integer_type(const TP_Type& type) { - return is_plain_type(type, TypeSpec("int")) || is_plain_type(type, TypeSpec("uint")); -} - -/*! - * If first arg is unsigned, make the result unsigned. - * Otherwise signed. This is the default GOAL behavior I guess. - */ -TP_Type get_int_type(const TP_Type& one) { - if (is_plain_type(one, TypeSpec("uint"))) { - return one; - } else { - return TP_Type(TypeSpec("int")); - } +bool is_int_or_uint(DecompilerTypeSystem& dts, const TP_Type& type) { + return tc(dts, TypeSpec("int"), type) || tc(dts, TypeSpec("uint"), type); } struct RegOffset { Register reg; + std::shared_ptr reg_ir; int offset; }; @@ -35,6 +46,7 @@ bool get_as_reg_offset(const IR* ir, RegOffset* out) { auto as_reg = dynamic_cast(ir); if (as_reg) { out->reg = as_reg->reg; + out->reg_ir = std::make_shared(*as_reg); out->offset = 0; return true; } @@ -46,6 +58,7 @@ bool get_as_reg_offset(const IR* ir, RegOffset* out) { if (first_as_reg && second_as_const) { out->reg = first_as_reg->reg; out->offset = second_as_const->value; + out->reg_ir = std::dynamic_pointer_cast(as_math->arg0); return true; } } @@ -64,6 +77,9 @@ RegKind get_reg_kind(const Register& r) { } } // namespace +/*! + * Default implementation of propagate types, throw an NYI error. + */ void IR_Atomic::propagate_types(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { @@ -73,6 +89,9 @@ void IR_Atomic::propagate_types(const TypeState& input, fmt::format("Could not propagate types for {}, not yet implemented", print(file))); } +/*! + * Default implementation of get_expression_type. + */ TP_Type IR::get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { @@ -82,6 +101,9 @@ TP_Type IR::get_expression_type(const TypeState& input, fmt::format("Could not get expression types for {}, not yet implemented", print(file))); } +/*! + * Propagate types through a set! operation. + */ void IR_Set_Atomic::propagate_types(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { @@ -95,9 +117,12 @@ void IR_Set_Atomic::propagate_types(const TypeState& input, case IR_Set::FPR_TO_GPR64: case IR_Set::REG_FLT: case IR_Set::SYM_LOAD: { + // all these should set a register. auto as_reg = dynamic_cast(dst.get()); assert(as_reg); + // get the type of the source, auto t = src->get_expression_type(input, file, dts); + // set the type of the register. end_types.get(as_reg->reg) = t; } break; @@ -112,6 +137,9 @@ void IR_Set_Atomic::propagate_types(const TypeState& input, } } +/*! + * Get the type of a register. + */ TP_Type IR_Register::get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { @@ -123,105 +151,113 @@ TP_Type IR_Register::get_expression_type(const TypeState& input, TP_Type IR_Load::get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { - (void)input; + clear_load_path(); + + //////////////////// + // STATIC + //////////////////// auto as_static = dynamic_cast(location.get()); if (as_static) { + // todo - we should map out static data and use an actual type system lookup to figure this out. + // but for now, this is probably good enough. if (kind == FLOAT) { // loading static data with a FLOAT kind load (lwc1), assume result is a float. - return TP_Type(dts.ts.make_typespec("float")); + return TP_Type::make_from_typespec(dts.ts.make_typespec("float")); } if (size == 8) { - // kinda hacky - if (kind == SIGNED) { - return TP_Type(dts.ts.make_typespec("int")); - } else if (kind == UNSIGNED) { - return TP_Type(dts.ts.make_typespec("uint")); - } + // 8 byte integer constants are always loaded from a static pool + // this could technically hide loading a different type from inside of a static basic. + return TP_Type::make_from_typespec(dts.ts.make_typespec("uint")); } } + /////////////////////////////////////// + // REGISTER + OFFSET (possibly 0) + /////////////////////////////////////// RegOffset ro; if (get_as_reg_offset(location.get(), &ro)) { auto& input_type = input.get(ro.reg); - if (input_type.kind == TP_Type::TYPE_OBJECT && ro.offset >= 16 && (ro.offset & 3) == 0 && - size == 4 && kind == UNSIGNED) { - // method get + if (input_type.kind == TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD && ro.offset >= 16 && + (ro.offset & 3) == 0 && size == 4 && kind == UNSIGNED) { + // method get of fixed type + auto type_name = input_type.get_type_objects_typespec().base_type(); auto method_id = (ro.offset - 16) / 4; - if (input_type.ts.base_type() == "object" && method_id == GOAL_NEW_METHOD) { + auto method_info = dts.ts.lookup_method(type_name, method_id); + auto method_type = method_info.type.substitute_for_method_call(type_name); + if (type_name == "object" && method_id == GOAL_NEW_METHOD) { // remember that we're an object new. - auto method_info = dts.ts.lookup_method(input_type.ts.print(), method_id); - auto result = TP_Type(method_info.type.substitute_for_method_call(input_type.ts.print())); - result.kind = TP_Type::METHOD_NEW_OF_OBJECT; - return result; + return TP_Type::make_object_new(method_type); } - auto method_info = dts.ts.lookup_method(input_type.ts.print(), method_id); - return TP_Type(method_info.type.substitute_for_method_call(input_type.ts.print())); + return TP_Type::make_from_typespec(method_type); } - if (input_type.kind == TP_Type::OBJECT_OF_TYPE && - input_type.as_typespec() == TypeSpec("type") && ro.offset >= 16 && (ro.offset & 3) == 0 && - size == 4 && kind == UNSIGNED) { - // method get - auto method_id = (ro.offset - 16) / 4; - auto method_info = dts.ts.lookup_method("object", method_id); - return TP_Type(method_info.type.substitute_for_method_call("object")); - } + // if (input_type.kind == TP_Type::OBJECT_OF_TYPE && + // input_type.as_typespec() == TypeSpec("type") && ro.offset >= 16 && (ro.offset & 3) == + // 0 + // && size == 4 && kind == UNSIGNED) { + // // method get of dynamic type. + // auto method_id = (ro.offset - 16) / 4; + // auto method_info = dts.ts.lookup_method("object", method_id); + // return TP_Type(method_info.type.substitute_for_method_call("object")); + // } + // + // if (input_type.kind == TP_Type::OBJECT_OF_TYPE && + // input_type.as_typespec() == TypeSpec("pointer")) { + // // we got a plain pointer. let's just assume we're loading an integer. + // // perhaps we should disable this feature by default on 4-byte loads if we're getting + // // lots of false positives for loading pointers from plain pointers. + // + // // todo, load_path + // switch (kind) { + // case UNSIGNED: + // switch (size) { + // case 1: + // return TP_Type(TypeSpec("uint")); + // case 2: + // return TP_Type(TypeSpec("uint")); + // case 4: + // return TP_Type(TypeSpec("uint")); + // case 8: + // return TP_Type(TypeSpec("uint")); + // case 16: + // return TP_Type(TypeSpec("uint")); + // default: + // assert(false); + // } + // break; + // case SIGNED: + // switch (size) { + // case 1: + // return TP_Type(TypeSpec("int")); + // case 2: + // return TP_Type(TypeSpec("int")); + // case 4: + // return TP_Type(TypeSpec("int")); + // case 8: + // return TP_Type(TypeSpec("int")); + // case 16: + // return TP_Type(TypeSpec("int")); + // default: + // assert(false); + // } + // break; + // case FLOAT: + // return TP_Type(TypeSpec("float")); + // default: + // assert(false); + // } + // } + // - if (input_type.kind == TP_Type::OBJECT_OF_TYPE && - input_type.as_typespec() == TypeSpec("pointer")) { - // we got a plain pointer. let's just assume we're loading an integer. - // perhaps we should disable this feature by default on 4-byte loads if we're getting - // lots of false positives for loading pointers from plain pointers. - switch (kind) { - case UNSIGNED: - switch (size) { - case 1: - return TP_Type(TypeSpec("uint")); - case 2: - return TP_Type(TypeSpec("uint")); - case 4: - return TP_Type(TypeSpec("uint")); - case 8: - return TP_Type(TypeSpec("uint")); - case 16: - return TP_Type(TypeSpec("uint")); - default: - assert(false); - } - break; - case SIGNED: - switch (size) { - case 1: - return TP_Type(TypeSpec("int")); - case 2: - return TP_Type(TypeSpec("int")); - case 4: - return TP_Type(TypeSpec("int")); - case 8: - return TP_Type(TypeSpec("int")); - case 16: - return TP_Type(TypeSpec("int")); - default: - assert(false); - } - break; - case FLOAT: - return TP_Type(TypeSpec("float")); - default: - assert(false); - } - } - - if (input_type.kind == TP_Type::PARTIAL_METHOD_TABLE_ACCESS && ro.offset == 16) { - // access method vtable - return TP_Type(TypeSpec("function")); - } else if (input_type.kind == TP_Type::OBJ_PLUS_PRODUCT) { + // } else + // + if (input_type.kind == TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT) { // note, we discard and completely ignore the stride here. ReverseDerefInputInfo rd_in; rd_in.mem_deref = true; - rd_in.input_type = input_type.ts; + rd_in.input_type = input_type.get_obj_plus_const_mult_typespec(); rd_in.reg = get_reg_kind(ro.reg); // bleh rd_in.offset = ro.offset; rd_in.sign_extend = kind == SIGNED; @@ -229,61 +265,88 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, auto rd = dts.ts.get_reverse_deref_info(rd_in); if (rd.success) { - return TP_Type(coerce_to_reg_type(rd.result_type)); + return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); } - } else { - if (input_type.as_typespec() == TypeSpec("object") && ro.offset == -4 && kind == UNSIGNED && - size == 4 && ro.reg.get_kind() == Reg::GPR) { - // get type of basic likely, but misrecognized as an object. - // occurs often in typecase-like structures because other possible types are "stripped". - return TP_Type(TypeSpec("type")); + } + // } else { + // if (input_type.kind == TP_Type::OBJECT_OF_TYPE && ro.offset == -4 && kind == UNSIGNED + // && + // size == 4 && ro.reg.get_kind() == Reg::GPR) { + // // get type of basic likely, but misrecognized as an object. + // // occurs often in typecase-like structures because other possible types are + // "stripped". load_path_base = ro.reg_ir; load_path_addr_of = false; + // load_path.push_back("type"); + // load_path_set = true; + // + // return TP_Type::make_type_object(input_type.as_typespec().base_type()); + // } + // + // if (input_type.as_typespec() == TypeSpec("object") && ro.offset == -4 && kind == + // UNSIGNED + // && + // size == 4 && ro.reg.get_kind() == Reg::GPR) { + // // get type of basic likely, but misrecognized as an object. + // // occurs often in typecase-like structures because other possible types are + // "stripped". return TP_Type(TypeSpec("type")); + // } + // + + if (input_type.kind == TP_Type::Kind::DYNAMIC_METHOD_ACCESS && ro.offset == 16) { + // access method vtable. The input is type + (4 * method), and the 16 is the offset + // of method 0. + return TP_Type::make_from_typespec(TypeSpec("function")); + } + // Assume we're accessing a field of an object. + ReverseDerefInputInfo rd_in; + rd_in.mem_deref = true; + rd_in.input_type = input_type.typespec(); + rd_in.reg = get_reg_kind(ro.reg); + rd_in.offset = ro.offset; + rd_in.sign_extend = kind == SIGNED; + rd_in.load_size = size; + + auto rd = dts.ts.get_reverse_deref_info(rd_in); + + // only error on failure if "pair" is disabled. otherwise it might be a pair. + if (!rd.success && !dts.type_prop_settings.allow_pair) { + printf("input type is %s, offset is %d, sign %d size %d\n", rd_in.input_type.print().c_str(), + rd_in.offset, rd_in.sign_extend, rd_in.load_size); + throw std::runtime_error( + fmt::format("Could not get type of load: {}. Reverse Deref Failed.", print(file))); + } + + if (rd.success) { + load_path_set = true; + load_path_addr_of = rd.addr_of; + load_path_base = ro.reg_ir; + for (auto& x : rd.deref_path) { + load_path.push_back(x.print()); } + return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); + } - if (input_type.kind == TP_Type::OBJECT_OF_TYPE && ro.offset == -4 && kind == UNSIGNED && - size == 4 && ro.reg.get_kind() == Reg::GPR) { - // get type of basic likely, but misrecognized as an object. - // occurs often in typecase-like structures because other possible types are "stripped". - return TP_Type::make_type_object(input_type.as_typespec().base_type()); - } - - // nice - ReverseDerefInputInfo rd_in; - rd_in.mem_deref = true; - rd_in.input_type = input_type.as_typespec(); - rd_in.reg = get_reg_kind(ro.reg); // bleh - rd_in.offset = ro.offset; - rd_in.sign_extend = kind == SIGNED; - rd_in.load_size = size; - - auto rd = dts.ts.get_reverse_deref_info(rd_in); - if (!rd.success && !dts.type_prop_settings.allow_pair) { - printf("input type is %s, offset is %d, sign %d size %d\n", - rd_in.input_type.print().c_str(), rd_in.offset, rd_in.sign_extend, rd_in.load_size); - throw std::runtime_error( - fmt::format("Could not get type of load: {}. Reverse Deref Failed.", print(file))); - } - - if (rd.success) { - return TP_Type(coerce_to_reg_type(rd.result_type)); - } - - if (dts.type_prop_settings.allow_pair) { - if (kind == SIGNED && size == 4 && - (input_type.as_typespec() == TypeSpec("object") || - input_type.as_typespec() == TypeSpec("pair"))) { - // pair access! - if (ro.offset == 2) { - return TP_Type(TypeSpec("pair")); - } else if (ro.offset == -2) { - return TP_Type(TypeSpec("object")); - } + // rd failed, try as pair. + if (dts.type_prop_settings.allow_pair) { + // we are strict here - only permit pair-type loads from object or pair. + // object is permitted for stuff like association lists where the car is also a pair. + if (kind == SIGNED && size == 4 && + (input_type.typespec() == TypeSpec("object") || + input_type.typespec() == TypeSpec("pair"))) { + // these rules are of course not always correct or the most specific, but it's the best + // we can do. + if (ro.offset == 2) { + // cdr = another pair. + return TP_Type::make_from_typespec(TypeSpec("pair")); + } else if (ro.offset == -2) { + // car = some object. + return TP_Type::make_from_typespec(TypeSpec("object")); } } } } - throw std::runtime_error( - fmt::format("Could not get type of load: {}. Not handled.", print(file))); + throw std::runtime_error(fmt::format("Could not get type of load: {}. Not handled: {}", + print(file), location->print(file))); } TP_Type IR_FloatMath2::get_expression_type(const TypeState& input, @@ -293,6 +356,7 @@ TP_Type IR_FloatMath2::get_expression_type(const TypeState& input, (void)file; // regardless of input types, the output is going to be a float. + // todo - if we ever support meters we should do something better here. switch (kind) { case DIV: case MUL: @@ -300,133 +364,196 @@ TP_Type IR_FloatMath2::get_expression_type(const TypeState& input, case SUB: case MIN: case MAX: - return TP_Type(dts.ts.make_typespec("float")); - default: - assert(false); - } -} - -TP_Type IR_FloatMath1::get_expression_type(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)input; - (void)file; - (void)dts; - // FLOAT_TO_INT, INT_TO_FLOAT, ABS, NEG, SQRT - switch (kind) { - case FLOAT_TO_INT: - return TP_Type(TypeSpec("int")); - case INT_TO_FLOAT: - case ABS: - case NEG: - case SQRT: - return TP_Type(TypeSpec("float")); + return TP_Type::make_from_typespec(dts.ts.make_typespec("float")); default: assert(false); } } +// TP_Type IR_FloatMath1::get_expression_type(const TypeState& input, +// const LinkedObjectFile& file, +// DecompilerTypeSystem& dts) { +// (void)input; +// (void)file; +// (void)dts; +// // FLOAT_TO_INT, INT_TO_FLOAT, ABS, NEG, SQRT +// switch (kind) { +// case FLOAT_TO_INT: +// return TP_Type(TypeSpec("int")); +// case INT_TO_FLOAT: +// case ABS: +// case NEG: +// case SQRT: +// return TP_Type(TypeSpec("float")); +// default: +// assert(false); +// } +//} +// TP_Type IR_IntMath2::get_expression_type(const TypeState& input, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { auto arg0_type = arg0->get_expression_type(input, file, dts); auto arg1_type = arg1->get_expression_type(input, file, dts); - if (is_integer_type(arg0_type) && is_integer_type(arg1_type)) { - // case where both arguments are integers. - // in this case we assume we're actually doing math. - switch (kind) { - case ADD: - case SUB: - case AND: - case OR: - case NOR: - case XOR: - // we don't know if we're signed or unsigned. so let's just go with the first type. - return get_int_type(arg0_type); - case MUL_SIGNED: - case DIV_SIGNED: - case RIGHT_SHIFT_ARITH: - case MOD_SIGNED: - case MIN_SIGNED: - case MAX_SIGNED: - // result is going to be signed, regardless of inputs. - return TP_Type(TypeSpec("int")); - - case MUL_UNSIGNED: - case RIGHT_SHIFT_LOGIC: - // result is going to be unsigned, regardless of inputs. - return TP_Type(TypeSpec("uint")); - - case LEFT_SHIFT: { - // multiply! + // special cases for integers + switch (kind) { + case LEFT_SHIFT: + // multiply! + { auto as_const = dynamic_cast(arg1.get()); - if (as_const) { - // shift by constant integer. could be accessing the method array. - TP_Type result; - result.kind = TP_Type::PRODUCT; - result.ts = get_int_type(arg0_type).ts; - result.multiplier = (1 << as_const->value); - return result; - } else { - // normal variable shift. - return get_int_type(arg0_type); + if (as_const && is_int_or_uint(dts, arg0_type)) { + assert(as_const->value >= 0); + assert(as_const->value < 64); + return TP_Type::make_from_product((1ull << as_const->value)); } - } - default: break; + } + + case ADD: + if (arg0_type.is_product_with(4) && tc(dts, TypeSpec("type"), arg1_type)) { + // dynamic access into the method array with shift, add, offset-load + // no need to track the type because we don't know the method index anyway. + return TP_Type::make_partial_dyanmic_vtable_access(); + } + break; + + default: + break; + } + + if (arg0_type == arg1_type && is_int_or_uint(dts, arg0_type)) { + // both are the same type and both are int/uint, so we assume that we're doing integer math. + // we strip off any weird things like multiplication or integer constant. + return TP_Type::make_from_typespec(arg0_type.typespec()); + } + + if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) { + // usually we would want to use arg0's type as the "winning" type. + // but we use arg1's if arg0 is an integer constant + // in either case, strip off weird stuff. + if (arg0_type.is_integer_constant() && !arg1_type.is_integer_constant()) { + return TP_Type::make_from_typespec(arg1_type.typespec()); } + return TP_Type::make_from_typespec(arg0_type.typespec()); } - if (kind == ADD && arg0_type.kind == TP_Type::PRODUCT && arg1_type.is_object_of_type()) { - // access the methods! - return TP_Type::make_partial_method_table_access(); + // special cases for non-integers + if ((arg0_type.typespec() == TypeSpec("object") || arg0_type.typespec() == TypeSpec("pair")) && + arg1_type.is_integer_constant(62)) { + // boxed object tag trick. + return TP_Type::make_from_typespec(TypeSpec("int")); } - auto a1_const = dynamic_cast(arg1.get()); - if (a1_const && kind == ADD && arg0_type.kind == TP_Type::OBJECT_OF_TYPE) { - // access a field. - ReverseDerefInputInfo rd_in; - rd_in.mem_deref = false; - rd_in.input_type = arg0_type.as_typespec(); - rd_in.offset = a1_const->value; - rd_in.load_size = 0; - auto rd = dts.ts.get_reverse_deref_info(rd_in); - - if (rd.success) { - return TP_Type(coerce_to_reg_type(rd.result_type)); - } + // + // if (is_integer_type(arg0_type) && is_integer_type(arg1_type)) { + // // case where both arguments are integers. + // // in this case we assume we're actually doing math. + // switch (kind) { + // case ADD: + // case SUB: + // case AND: + // case OR: + // case NOR: + // case XOR: + // // we don't know if we're signed or unsigned. so let's just go with the first type. + // return get_int_type(arg0_type); + // case MUL_SIGNED: + // case DIV_SIGNED: + // case RIGHT_SHIFT_ARITH: + // case MOD_SIGNED: + // case MIN_SIGNED: + // case MAX_SIGNED: + // // result is going to be signed, regardless of inputs. + // return TP_Type(TypeSpec("int")); + // + // case MUL_UNSIGNED: + // case RIGHT_SHIFT_LOGIC: + // // result is going to be unsigned, regardless of inputs. + // return TP_Type(TypeSpec("uint")); + // + // case LEFT_SHIFT: { + // // multiply! + // auto as_const = dynamic_cast(arg1.get()); + // if (as_const) { + // // shift by constant integer. could be accessing the method array. + // TP_Type result; + // result.kind = TP_Type::PRODUCT; + // result.ts = get_int_type(arg0_type).ts; + // result.multiplier = (1 << as_const->value); + // return result; + // } else { + // // normal variable shift. + // return get_int_type(arg0_type); + // } + // } + // default: + // break; + // } + // } + // + // + // auto a1_const = dynamic_cast(arg1.get()); + // if (a1_const && kind == ADD && arg0_type.kind == TP_Type::OBJECT_OF_TYPE) { + // // access a field. + // ReverseDerefInputInfo rd_in; + // rd_in.mem_deref = false; + // rd_in.input_type = arg0_type.as_typespec(); + // rd_in.offset = a1_const->value; + // rd_in.load_size = 0; + // auto rd = dts.ts.get_reverse_deref_info(rd_in); + // + // if (rd.success) { + // return TP_Type(coerce_to_reg_type(rd.result_type)); + // } + // } + // + // if (kind == ADD && is_integer_type(arg0_type) && arg1_type.kind == TP_Type::OBJECT_OF_TYPE) + // { + // // product + object with multiplier 1 (access array of bytes for example) + // TP_Type result; + // result.kind = TP_Type::OBJ_PLUS_PRODUCT; + // result.ts = arg1_type.as_typespec(); + // result.multiplier = 1; + // return result; + // } + // + if (kind == ADD && arg0_type.is_product() && arg1_type.kind == TP_Type::Kind::TYPESPEC) { + return TP_Type::make_object_plus_product(arg1_type.typespec(), arg0_type.get_multiplier()); } - if (kind == ADD && is_integer_type(arg0_type) && arg1_type.kind == TP_Type::OBJECT_OF_TYPE) { - // product + object with multiplier 1 (access array of bytes for example) - TP_Type result; - result.kind = TP_Type::OBJ_PLUS_PRODUCT; - result.ts = arg1_type.as_typespec(); - result.multiplier = 1; - return result; + // byte access of offset array field trick. + // arg1 holds a structure. + // arg0 is an integer in a register. + if (tc(dts, TypeSpec("structure"), arg1_type) && !dynamic_cast(arg0.get()) && + is_int_or_uint(dts, arg0_type)) { + return TP_Type::make_object_plus_product(arg1_type.typespec(), 1); } - if (kind == ADD && arg0_type.kind == TP_Type::PRODUCT && - arg1_type.kind == TP_Type::OBJECT_OF_TYPE) { - TP_Type result; - result.kind = TP_Type::OBJ_PLUS_PRODUCT; - result.ts = arg1_type.as_typespec(); - result.multiplier = arg0_type.multiplier; - return result; - } - - if ((arg0_type.as_typespec() == TypeSpec("object") || - arg0_type.as_typespec() == TypeSpec("pair")) && - is_integer_type(arg1_type)) { - // boxed object tag trick - return TP_Type(TypeSpec("int")); - } - - if (dts.ts.typecheck(TypeSpec("pointer"), arg0_type.as_typespec(), "", false, false) && - is_integer_type(arg1_type)) { - return arg0_type; - } + // + // if (kind == ADD && + // dts.ts.typecheck(TypeSpec("pointer"), arg0_type.as_typespec(), "", false, false) && + // is_integer_type(arg1_type)) { + // return arg0_type; + // } + // + // if ((kind == ADD || kind == AND) && + // dts.ts.typecheck(TypeSpec("pointer"), arg1_type.as_typespec(), "", false, false) && + // is_integer_type(arg0_type)) { + // return arg1_type; + // } + // + // if (kind == ADD && + // dts.ts.typecheck(TypeSpec("binteger"), arg0_type.as_typespec(), "", false, false) && + // is_integer_type(arg1_type)) { + // return arg0_type; + // } + // + // if (kind == SUB && + // dts.ts.typecheck(TypeSpec("pointer"), arg0_type.as_typespec(), "", false, false) && + // dts.ts.typecheck(TypeSpec("pointer"), arg1_type.as_typespec(), "", false, false)) { + // return TP_Type(TypeSpec("int")); + // } throw std::runtime_error( fmt::format("Can't get_expression_type on this IR_IntMath2: {}, args {} and {}", print(file), @@ -436,30 +563,36 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, void BranchDelay::type_prop(TypeState& output, const LinkedObjectFile& file, DecompilerTypeSystem& dts) { - (void)dts; + // (void)dts; switch (kind) { case DSLLV: { - // I think this is only used in ash, in which case the output should be an int/uint - // welll + // I believe this is only used in ash. We ignore the shift amount's type and just look + // at the input value. If it's a uint/int based type, we just return uint/int (not the type) + // this will kill any weird stuff like product, etc. + // if it's not an integer type, it's currently an error. auto dst = dynamic_cast(destination.get()); assert(dst); auto src = dynamic_cast(source.get()); assert(src); - if (is_plain_type(output.get(src->reg), TypeSpec("uint"))) { - // todo, this won't catch child uint types. I think this doesn't matter though. - output.get(dst->reg) = TP_Type(TypeSpec("uint")); + if (tc(dts, TypeSpec("uint"), output.get(src->reg))) { + output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("uint")); + } else if (tc(dts, TypeSpec("int"), output.get(src->reg))) { + output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("int")); + } else { + throw std::runtime_error("BranchDelay::type_prop DSLLV for src " + + output.get(src->reg).print()); } - output.get(dst->reg) = TP_Type(TypeSpec("int")); } break; case NEGATE: { auto dst = dynamic_cast(destination.get()); assert(dst); - output.get(dst->reg) = TP_Type(TypeSpec("int")); + // to match the behavior in IntMath1, assume signed when negating. + output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("int")); } break; case SET_REG_FALSE: { auto dst = dynamic_cast(destination.get()); assert(dst); - output.get(dst->reg).kind = TP_Type::FALSE; + output.get(dst->reg) = TP_Type::make_false(); } break; case SET_REG_REG: { auto dst = dynamic_cast(destination.get()); @@ -472,19 +605,19 @@ void BranchDelay::type_prop(TypeState& output, case SET_REG_TRUE: { auto dst = dynamic_cast(destination.get()); assert(dst); - output.get(dst->reg) = TP_Type(TypeSpec("symbol")); + output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("symbol")); } break; case SET_BINTEGER: { auto dst = dynamic_cast(destination.get()); assert(dst); - output.get(dst->reg) = TP_Type::make_type_object("binteger"); + output.get(dst->reg) = TP_Type::make_type_object(TypeSpec("binteger")); } break; case SET_PAIR: { auto dst = dynamic_cast(destination.get()); assert(dst); - output.get(dst->reg) = TP_Type::make_type_object("pair"); + output.get(dst->reg) = TP_Type::make_type_object(TypeSpec("pair")); } break; case NOP: @@ -510,16 +643,23 @@ TP_Type IR_IntMath1::get_expression_type(const TypeState& input, (void)input; (void)dts; auto arg_type = arg->get_expression_type(input, file, dts); - switch (kind) { - case NEG: - // if we negate a thing, let's just make it a signed integer. - return TP_Type(TypeSpec("int")); - case NOT: - return get_int_type(arg_type); - default: - throw std::runtime_error("IR_IntMath1::get_expression_type case not handled: " + - to_form(file).print()); + if (is_int_or_uint(dts, arg_type)) { + switch (kind) { + case NEG: + // if we negate a thing, let's just make it a signed integer. + return TP_Type::make_from_typespec(TypeSpec("int")); + case ABS: + // if we take the absolute value of a thing, just make it signed. + return TP_Type::make_from_typespec(TypeSpec("int")); + case NOT: + // otherwise, make it int/uint as needed (this works because we check is_int_or_uint + // above) + return TP_Type::make_from_typespec(arg_type.typespec()); + } } + + throw std::runtime_error("IR_IntMath1::get_expression_type case not handled: " + + to_form(file).print()); } TP_Type IR_SymbolValue::get_expression_type(const TypeState& input, @@ -528,24 +668,28 @@ TP_Type IR_SymbolValue::get_expression_type(const TypeState& input, (void)input; (void)file; if (name == "#f") { - TP_Type result; - result.kind = TP_Type::FALSE; - return result; + // if we ever read the false symbol, it should contain the false symbol as its value. + return TP_Type::make_false(); } else if (name == "__START-OF-TABLE__") { - return TP_Type(TypeSpec("uint")); + // another annoying special case. We have a fake symbol called __START-OF-TABLE__ + // which actually means that you get the first address in the symbol table. + // it's not really a linked symbol, but the basic op builder represents it as one. + return TP_Type::make_from_typespec(TypeSpec("pointer")); } + // look up the type of the symbol auto type = dts.symbol_types.find(name); if (type == dts.symbol_types.end()) { throw std::runtime_error("Don't have the type of symbol " + name); } if (type->second == TypeSpec("type")) { - // let's remember what we got this from. - return TP_Type::make_type_object(name); + // if we get a type by symbol, we should remember which type we got it from. + return TP_Type::make_type_object(TypeSpec(name)); } - return TP_Type(type->second); + // otherwise, just return a normal typespec + return TP_Type::make_from_typespec(type->second); } TP_Type IR_Symbol::get_expression_type(const TypeState& input, @@ -555,12 +699,10 @@ TP_Type IR_Symbol::get_expression_type(const TypeState& input, (void)file; (void)dts; if (name == "#f") { - TP_Type result; - result.kind = TP_Type::FALSE; - return result; + return TP_Type::make_false(); } - return TP_Type(TypeSpec("symbol")); + return TP_Type::make_from_typespec(TypeSpec("symbol")); } TP_Type IR_IntegerConstant::get_expression_type(const TypeState& input, @@ -569,7 +711,7 @@ TP_Type IR_IntegerConstant::get_expression_type(const TypeState& input, (void)input; (void)file; (void)dts; - return TP_Type(TypeSpec("int")); + return TP_Type::make_from_integer(value); } TP_Type IR_Compare::get_expression_type(const TypeState& input, @@ -578,7 +720,8 @@ TP_Type IR_Compare::get_expression_type(const TypeState& input, (void)input; (void)file; (void)dts; - return TP_Type(TypeSpec("symbol")); + // really a boolean. + return TP_Type::make_from_typespec(TypeSpec("symbol")); } void IR_Nop_Atomic::propagate_types(const TypeState& input, @@ -594,17 +737,29 @@ void IR_Call_Atomic::propagate_types(const TypeState& input, DecompilerTypeSystem& dts) { (void)file; (void)dts; - // todo clobber + const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, + Reg::T0, Reg::T1, Reg::T2, Reg::T3}; + const Reg::Gpr goal_function_clobber_regs[] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, + Reg::T0, Reg::T1, Reg::T2, Reg::T3, + Reg::T4, Reg::V1, Reg::T9}; end_types = input; auto in_tp = input.get(Register(Reg::GPR, Reg::T9)); - if (in_tp.kind == TP_Type::METHOD_NEW_OF_OBJECT && + if (in_tp.kind == TP_Type::Kind::OBJECT_NEW_METHOD && !dts.type_prop_settings.current_method_type.empty()) { + // calling object new method. Set the result to a new object of our type end_types.get(Register(Reg::GPR, Reg::V0)) = - TP_Type(dts.type_prop_settings.current_method_type); + TP_Type::make_from_typespec(dts.type_prop_settings.current_method_type); + // update the call type + call_type = in_tp.get_method_new_object_typespec(); + call_type.get_arg(call_type.arg_count() - 1) = + TypeSpec(dts.type_prop_settings.current_method_type); + call_type_set = true; return; } - auto in_type = in_tp.as_typespec(); + + auto in_type = in_tp.typespec(); + if (in_type.base_type() != "function") { throw std::runtime_error("Called something that wasn't a function: " + in_type.print()); } @@ -613,7 +768,65 @@ void IR_Call_Atomic::propagate_types(const TypeState& input, throw std::runtime_error("Called a function, but we don't know its type"); } - end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type(in_type.last_arg()); + if (in_type.arg_count() == 2 && in_type.get_arg(0) == TypeSpec("_varargs_")) { + // we're calling a varags function, which is format. We can determine the argument count + // by looking at the format string, if we can get it. + auto arg_type = input.get(Register(Reg::GPR, Reg::A1)); + if (arg_type.is_constant_string() || arg_type.is_format_string()) { + int arg_count = -1; + + if (arg_type.is_constant_string()) { + auto& str = arg_type.get_string(); + arg_count = dts.get_format_arg_count(str); + } else { + // is format string. + arg_count = arg_type.get_format_string_arg_count(); + } + + TypeSpec format_call_type("function"); + format_call_type.add_arg(TypeSpec("object")); // destination + format_call_type.add_arg(TypeSpec("string")); // format string + for (int i = 0; i < arg_count; i++) { + format_call_type.add_arg(TypeSpec("object")); + } + format_call_type.add_arg(TypeSpec("object")); + arg_count += 2; // for destination and format string. + call_type = format_call_type; + call_type_set = true; + + end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_typespec(in_type.last_arg()); + + // we can also update register usage here. + read_regs.clear(); + read_regs.emplace_back(Reg::GPR, Reg::T9); + for (int i = 0; i < arg_count; i++) { + read_regs.emplace_back(Reg::GPR, arg_regs[i]); + } + + for (auto reg : goal_function_clobber_regs) { + end_types.get(Register(Reg::GPR, reg)) = TP_Type::make_uninitialized(); + } + return; + } else { + throw std::runtime_error("Failed to get string for _varags_ call, got " + arg_type.print()); + } + } + // set the call type! + call_type = in_type; + call_type_set = true; + + end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_typespec(in_type.last_arg()); + + // we can also update register usage here. + read_regs.clear(); + read_regs.emplace_back(Reg::GPR, Reg::T9); + + for (uint32_t i = 0; i < in_type.arg_count() - 1; i++) { + read_regs.emplace_back(Reg::GPR, arg_regs[i]); + } + for (auto reg : goal_function_clobber_regs) { + end_types.get(Register(Reg::GPR, reg)) = TP_Type::make_uninitialized(); + } } void IR_Store_Atomic::propagate_types(const TypeState& input, @@ -629,36 +842,52 @@ TP_Type IR_StaticAddress::get_expression_type(const TypeState& input, DecompilerTypeSystem& dts) { (void)input; (void)dts; + // todo - we should map out static data and use a real type system lookup here. auto label = file.labels.at(label_id); if ((label.offset & 0xf) == 4) { // it's a basic! probably. const auto& word = file.words_by_seg.at(label.target_segment).at((label.offset - 4) / 4); if (word.kind == LinkedWord::TYPE_PTR) { - return TP_Type(TypeSpec(word.symbol_name)); + if (word.symbol_name == "string") { + return TP_Type::make_from_string(file.get_goal_string_by_label(label)); + } else { + // otherwise, some other static basic. + return TP_Type::make_from_typespec(TypeSpec(word.symbol_name)); + } } } throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name); } - -void IR_AsmOp_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { +// +// void IR_AsmOp_Atomic::propagate_types(const TypeState& input, +// const LinkedObjectFile& file, +// DecompilerTypeSystem& dts) { +// (void)file; +// (void)dts; +// auto dst_reg = dynamic_cast(dst.get()); +// end_types = input; +// if (dst_reg) { +// if (name == "daddu") { +// end_types.get(dst_reg->reg) = TP_Type(TypeSpec("uint")); +// } +// } +//} +// +// void IR_Breakpoint_Atomic::propagate_types(const TypeState& input, +// const LinkedObjectFile& file, +// DecompilerTypeSystem& dts) { +// (void)file; +// (void)dts; +// end_types = input; +//} +// +TP_Type IR_EmptyPair::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; (void)file; (void)dts; - auto dst_reg = dynamic_cast(dst.get()); - end_types = input; - if (dst_reg) { - if (name == "daddu") { - end_types.get(dst_reg->reg) = TP_Type(TypeSpec("uint")); - } - } -} - -void IR_Breakpoint_Atomic::propagate_types(const TypeState& input, - const LinkedObjectFile& file, - DecompilerTypeSystem& dts) { - (void)file; - (void)dts; - end_types = input; + // GOAL's empty pair is actually a pair type, containing the empty pair as the car and cdr + return TP_Type::make_from_typespec(TypeSpec("pair")); } \ No newline at end of file diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 8bbcbc7238..d0b25986b8 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -586,7 +586,7 @@ std::string LinkedObjectFile::print_function_disassembly(Function& func, result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; result += func.prologue.to_string(2) + "\n"; if (!func.warnings.empty()) { - result += "Warnings: " + func.warnings + "\n"; + result += ";;Warnings:\n" + func.warnings + "\n"; } // print each instruction in the function. @@ -784,7 +784,7 @@ std::string LinkedObjectFile::print_type_analysis_debug() { result += "; .function " + func.guessed_name.to_string() + "\n"; result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; if (!func.warnings.empty()) { - result += ";; WARNING: " + func.warnings + "\n"; + result += ";; WARNING:\n" + func.warnings + "\n"; } for (auto& block : func.basic_blocks) { @@ -799,10 +799,12 @@ std::string LinkedObjectFile::print_type_analysis_debug() { // result += func.basic_ops.at(i)->print_with_reguse(*this); // result += func.basic_ops.at(i)->print(*this); if (func.attempted_type_analysis) { + result += fmt::format("[{:3d}] ", i); result += func.basic_ops.at(i)->print_with_types(*init_types, *this); result += "\n"; init_types = &func.basic_ops.at(i)->end_types; } else { + result += fmt::format("[{:3d}] ", i); result += func.basic_ops.at(i)->print(*this); result += "\n"; } @@ -817,7 +819,7 @@ std::string LinkedObjectFile::print_type_analysis_debug() { /*! * Hacky way to get a GOAL string object */ -std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_quotes) { +std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_quotes) const { std::string result; if (with_quotes) { result += "\""; @@ -826,7 +828,7 @@ std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_q if (word_idx + 1 >= int(words_by_seg[seg].size())) { return "invalid string!\n"; } - LinkedWord& size_word = words_by_seg[seg].at(word_idx + 1); + const LinkedWord& size_word = words_by_seg[seg].at(word_idx + 1); if (size_word.kind != LinkedWord::PLAIN_DATA) { // sometimes an array of string pointer triggers this! return "invalid string!\n"; @@ -1036,7 +1038,7 @@ u32 LinkedObjectFile::read_data_word(const Label& label) { return word.data; } -std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) { +std::string LinkedObjectFile::get_goal_string_by_label(const Label& label) const { assert(0 == (label.offset % 4)); return get_goal_string(label.target_segment, (label.offset / 4) - 1, false); } \ No newline at end of file diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index 7b84bbdf65..d15809a89f 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -70,7 +70,7 @@ class LinkedObjectFile { std::string print_asm_function_disassembly(const std::string& my_name); u32 read_data_word(const Label& label); - std::string get_goal_string_by_label(const Label& label); + std::string get_goal_string_by_label(const Label& label) const; struct Stats { uint32_t total_code_bytes = 0; @@ -138,7 +138,7 @@ class LinkedObjectFile { goos::Object to_form_script_object(int seg, int byte_idx, std::vector& seen); bool is_empty_list(int seg, int byte_idx); bool is_string(int seg, int byte_idx); - std::string get_goal_string(int seg, int word_idx, bool with_quotes = true); + std::string get_goal_string(int seg, int word_idx, bool with_quotes = true) const; std::vector> label_per_seg_by_offset; }; diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index f163ccf289..312405f35c 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -560,7 +560,8 @@ void ObjectFileDB::write_object_file_words(const std::string& output_dir, bool d // printf("\n"); } -void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) { +void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir, + const std::string& suffix) { spdlog::info("- Writing debug type analysis..."); Timer timer; uint32_t total_bytes = 0, total_files = 0; @@ -568,7 +569,8 @@ void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) { for_each_obj([&](ObjectFileData& obj) { if (obj.linked_data.has_any_functions()) { auto file_text = obj.linked_data.print_type_analysis_debug(); - auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + "_db.asm"); + auto file_name = + file_util::combine_path(output_dir, obj.to_unique_name() + suffix + "_db.asm"); total_bytes += file_text.size(); file_util::write_text_file(file_name, file_text); @@ -588,7 +590,8 @@ void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir) { */ void ObjectFileDB::write_disassembly(const std::string& output_dir, bool disassemble_objects_without_functions, - bool write_json) { + bool write_json, + const std::string& file_suffix) { spdlog::info("- Writing functions..."); Timer timer; uint32_t total_bytes = 0, total_files = 0; @@ -599,7 +602,8 @@ void ObjectFileDB::write_disassembly(const std::string& output_dir, if (obj.linked_data.has_any_functions() || disassemble_objects_without_functions) { auto file_text = obj.linked_data.print_disassembly(); asm_functions += obj.linked_data.print_asm_function_disassembly(obj.to_unique_name()); - auto file_name = file_util::combine_path(output_dir, obj.to_unique_name() + ".asm"); + auto file_name = + file_util::combine_path(output_dir, obj.to_unique_name() + file_suffix + ".asm"); if (get_config().analyze_functions && write_json) { auto json_asm_text = obj.linked_data.to_asm_json(obj.to_unique_name()); @@ -811,7 +815,7 @@ void ObjectFileDB::analyze_functions() { unique_names.insert(name); if (config.asm_functions_by_name.find(name) != config.asm_functions_by_name.end()) { - func.warnings += "flagged as asm by config\n"; + func.warnings += ";; flagged as asm by config\n"; func.suspected_asm = true; } } @@ -824,7 +828,7 @@ void ObjectFileDB::analyze_functions() { if (duplicated_functions.find(name) != duplicated_functions.end()) { duplicated_functions[name].insert(data.to_unique_name()); - func.warnings += "this function exists in multiple non-identical object files"; + func.warnings += ";; this function exists in multiple non-identical object files"; } }); /* @@ -917,54 +921,66 @@ void ObjectFileDB::analyze_functions() { // type analysis if (get_config().function_type_prop) { - if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) { - // we're a global named function. This means we're stored in a symbol - auto kv = dts.symbol_types.find(func.guessed_name.function_name); - if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { - if (kv->second.base_type() != "function") { - spdlog::error("Found a function named {} but the symbol has type {}", - func.guessed_name.to_string(), kv->second.print()); - assert(false); - } - // GOOD! - func.type = kv->second; - func.attempted_type_analysis = true; - attempted_type_analysis++; - spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(), - kv->second.print()); - if (func.run_type_analysis(kv->second, dts, data.linked_data)) { - successful_type_analysis++; - } - } - } else if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { - // it's a method. - try { - auto info = - dts.ts.lookup_method(func.guessed_name.type_name, func.guessed_name.method_id); - if (info.type.arg_count() >= 1) { - if (info.type.base_type() != "function") { - spdlog::error("Found a method named {} but the symbol has type {}", - func.guessed_name.to_string(), info.type.print()); + auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()]; + if (get_config().no_type_analysis_functions_by_name.find(func.guessed_name.to_string()) == + get_config().no_type_analysis_functions_by_name.end()) { + if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) { + // we're a global named function. This means we're stored in a symbol + auto kv = dts.symbol_types.find(func.guessed_name.function_name); + if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { + if (kv->second.base_type() != "function") { + spdlog::error("Found a function named {} but the symbol has type {}", + func.guessed_name.to_string(), kv->second.print()); assert(false); } // GOOD! - func.type = info.type.substitute_for_method_call(func.guessed_name.type_name); + func.type = kv->second; func.attempted_type_analysis = true; attempted_type_analysis++; - spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(), - func.type.print()); - if (func.run_type_analysis(func.type, dts, data.linked_data)) { + // spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(), + // kv->second.print()); + if (func.run_type_analysis(kv->second, dts, data.linked_data, hints)) { successful_type_analysis++; } } + } else if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { + // it's a method. + try { + auto info = + dts.ts.lookup_method(func.guessed_name.type_name, func.guessed_name.method_id); + if (info.type.arg_count() >= 1) { + if (info.type.base_type() != "function") { + spdlog::error("Found a method named {} but the symbol has type {}", + func.guessed_name.to_string(), info.type.print()); + assert(false); + } + // GOOD! + func.type = info.type.substitute_for_method_call(func.guessed_name.type_name); + func.attempted_type_analysis = true; + attempted_type_analysis++; + // spdlog::info("Type Analysis on {} {}", + // func.guessed_name.to_string(), + // func.type.print()); + if (func.run_type_analysis(func.type, dts, data.linked_data, hints)) { + successful_type_analysis++; + } + } - } catch (std::runtime_error& e) { - // failed to lookup method info + } catch (std::runtime_error& e) { + // failed to lookup method info + } } + + if (!func.attempted_type_analysis) { + func.warnings.append(";; Failed to try type analysis\n"); + } + } else { + func.warnings.append(";; Marked as no type analysis in config\n"); } } } else { asm_funcs++; + func.warnings.append(";; Assembly Function. Analysis passes were not attempted.\n"); } if (func.basic_blocks.size() > 1 && !func.suspected_asm) { @@ -982,6 +998,10 @@ void ObjectFileDB::analyze_functions() { if (!func.guessed_name.empty()) { total_named_functions++; } + + // if (func.guessed_name.to_string() == "reset-and-call") { + // assert(false); + // } }); spdlog::info("Found {} functions ({} with no control flow)", total_functions, @@ -1015,6 +1035,27 @@ void ObjectFileDB::analyze_functions() { // } } +void ObjectFileDB::analyze_expressions() { + spdlog::info("- Analyzing Expressions..."); + Timer timer; + int attempts = 0; + int success = 0; + for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { + (void)segment_id; + // register usage + func.run_reg_usage(); + attempts++; + if (func.build_expression(data.linked_data)) { + success++; + } else { + func.warnings.append(";; Expression analysis failed.\n"); + } + }); + + spdlog::info(" {}/{} functions passed expression building ({:.2f}%)\n", success, attempts, + 100.f * float(success) / float(attempts)); +} + void ObjectFileDB::dump_raw_objects(const std::string& output_dir) { for_each_obj([&](ObjectFileData& data) { auto dest = output_dir + "/" + data.to_unique_name(); diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 31d64e4064..876bcb0029 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -60,11 +60,13 @@ class ObjectFileDB { void write_object_file_words(const std::string& output_dir, bool dump_v3_only); void write_disassembly(const std::string& output_dir, bool disassemble_objects_without_functions, - bool write_json); + bool write_json, + const std::string& file_suffix = ""); - void write_debug_type_analysis(const std::string& output_dir); + void write_debug_type_analysis(const std::string& output_dir, const std::string& suffix = ""); void analyze_functions(); void process_tpages(); + void analyze_expressions(); std::string process_game_count(); std::string process_game_text(); diff --git a/decompiler/config.cpp b/decompiler/config.cpp index b9c302de5f..d451b80174 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -34,6 +34,7 @@ void set_config(const std::string& path_to_config_file) { gConfig.dump_objs = cfg.at("dump_objs").get(); gConfig.write_func_json = cfg.at("write_func_json").get(); gConfig.function_type_prop = cfg.at("function_type_prop").get(); + gConfig.analyze_expressions = cfg.at("analyze_expressions").get(); std::vector asm_functions_by_name = cfg.at("asm_functions_by_name").get>(); @@ -47,8 +48,33 @@ void set_config(const std::string& path_to_config_file) { gConfig.pair_functions_by_name.insert(x); } + std::vector no_type_analysis_functions_by_name = + cfg.at("no_type_analysis_functions_by_name").get>(); + for (const auto& x : no_type_analysis_functions_by_name) { + gConfig.no_type_analysis_functions_by_name.insert(x); + } + auto bad_inspect = cfg.at("types_with_bad_inspect_methods").get>(); for (const auto& x : bad_inspect) { gConfig.bad_inspect_types.insert(x); } + + auto type_hints_file_name = cfg.at("type_hints_file").get(); + auto type_hints_txt = file_util::read_text_file(file_util::get_file_path({type_hints_file_name})); + auto type_hints_json = nlohmann::json::parse(type_hints_txt, nullptr, true, true); + + for (auto& kv : type_hints_json.items()) { + auto& function_name = kv.key(); + auto& hints = kv.value(); + for (auto& hint : hints) { + auto idx = hint.at(0).get(); + for (size_t i = 1; i < hint.size(); i++) { + auto& assignment = hint.at(i); + TypeHint type_hint; + type_hint.reg = Register(assignment.at(0).get()); + type_hint.type_name = assignment.at(1).get(); + gConfig.type_hints_by_function_by_idx[function_name][idx].push_back(type_hint); + } + } + } } diff --git a/decompiler/config.h b/decompiler/config.h index 209df20fc6..9d7aeab4bd 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -6,6 +6,13 @@ #include #include #include +#include +#include "decompiler/Disasm/Register.h" + +struct TypeHint { + Register reg; + std::string type_name; +}; struct Config { int game_version = -1; @@ -27,8 +34,12 @@ struct Config { bool dump_objs = false; bool write_func_json = false; bool function_type_prop = false; + bool analyze_expressions = false; std::unordered_set asm_functions_by_name; std::unordered_set pair_functions_by_name; + std::unordered_set no_type_analysis_functions_by_name; + std::unordered_map>> + type_hints_by_function_by_idx; // ... }; diff --git a/decompiler/config/all-types.gc b/decompiler/config/all-types.gc index 50d2da8928..a83a986d97 100644 --- a/decompiler/config/all-types.gc +++ b/decompiler/config/all-types.gc @@ -398,14 +398,13 @@ ) ;; gkernel-h -;; todo -; (deftype handle (uint64) -; () -; :method-count-assert 9 -; :size-assert #x8 -; :flag-assert #x900000008 -; ;; likely a bitfield type -; ) +(deftype handle (uint64) + ((process (pointer process) :offset 0) + (pid int32 :offset 32) + (u64 uint64 :offset 0) + ) + :flag-assert #x900000008 + ) ;; gkernel-h (deftype state (protect-frame) diff --git a/decompiler/config/decompiler-only-types.gc b/decompiler/config/decompiler-only-types.gc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index c2b46e094f..f40d301b30 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -4,17 +4,17 @@ "game_version":1, // the order here matters (not sure that this is true any more...). KERNEL and GAME should go first "dgo_names":["CGO/KERNEL.CGO","CGO/GAME.CGO", - "CGO/ENGINE.CGO" - , "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO", - "DGO/FIN.DGO", "DGO/INT.DGO", "DGO/JUB.DGO", "DGO/JUN.DGO", "CGO/JUNGLE.CGO", "CGO/L1.CGO", "DGO/FIC.DGO", - "DGO/LAV.DGO", "DGO/MAI.DGO", "CGO/MAINCAVE.CGO", "DGO/MIS.DGO", "DGO/OGR.DGO", "CGO/RACERP.CGO", "DGO/ROB.DGO", "DGO/ROL.DGO", - "DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO", - "DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO" - ], - //"dgo_names":["CGO/KERNEL.CGO"], + "CGO/ENGINE.CGO" + , "CGO/ART.CGO", "DGO/BEA.DGO", "DGO/CIT.DGO", "CGO/COMMON.CGO", "DGO/DAR.DGO", "DGO/DEM.DGO", + "DGO/FIN.DGO", "DGO/INT.DGO", "DGO/JUB.DGO", "DGO/JUN.DGO", "CGO/JUNGLE.CGO", "CGO/L1.CGO", "DGO/FIC.DGO", + "DGO/LAV.DGO", "DGO/MAI.DGO", "CGO/MAINCAVE.CGO", "DGO/MIS.DGO", "DGO/OGR.DGO", "CGO/RACERP.CGO", "DGO/ROB.DGO", "DGO/ROL.DGO", + "DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO", + "DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO" + ], + "dgo_names_":["CGO/KERNEL.CGO"], "object_file_names":["TEXT/0COMMON.TXT", "TEXT/1COMMON.TXT", "TEXT/2COMMON.TXT", "TEXT/3COMMON.TXT", "TEXT/4COMMON.TXT", - "TEXT/5COMMON.TXT", "TEXT/6COMMON.TXT"], + "TEXT/5COMMON.TXT", "TEXT/6COMMON.TXT"], "str_file_names":["STR/BAFCELL.STR", "STR/SWTE4.STR", "STR/SWTE3.STR", "STR/SWTE2.STR", "STR/SWTE1.STR", "STR/SNRBSBFC.STR", "STR/SNRBIPFC.STR", "STR/SNRBICFC.STR", "STR/ORR3.STR", "STR/ORR2.STR", "STR/MICANNON.STR", @@ -50,10 +50,12 @@ "STR/SAISA.STR","STR/SIHISC.STR","STR/MIIORBS.STR","STR/WAINTROD.STR","STR/SAISD2.STR","STR/GRSOPREB.STR", "STR/GRSOBBB.STR","STR/SA3INTRO.STR" ], - //"str_file_names":[], + "str_file_names_":[], + "type_hints_file":"decompiler/config/jak1_ntsc_black_label/type_hints.jsonc", "analyze_functions":true, + "analyze_expressions":false, "function_type_prop":false, "write_disassembly":true, "write_hex_near_instructions":false, @@ -83,10 +85,17 @@ "engine", "bsp-header", "joint-anim-matrix", - "part-tracker" + "part-tracker"], + + "no_type_analysis_functions_by_name":[ + "(method 2 vec4s)", // 128-bit bitfield. + "(method 3 vec4s)", // 128-bit bitfield + "reset-and-call", // stack manipulation + "(method 10 cpu-thread)" // loading saved regs off of the stack. ], - "asm_functions_by_name":[ + + "asm_functions_by_name":[ // gcommon "quad-copy!", @@ -482,6 +491,6 @@ "(anon-function 2 ogreboss)" ], - "pair_functions_by_name":["ref", "last", "member", "nmember", "assoc", "assoce", "append!", "delete!", "delete-car!", +"pair_functions_by_name":["ref", "last", "member", "nmember", "assoc", "assoce", "append!", "delete!", "delete-car!", "insert-cons!", "sort", "unload-package", "(method 4 pair)", "nassoc", "nassoce"] } \ No newline at end of file diff --git a/decompiler/config/jak1_ntsc_black_label/type_hints.jsonc b/decompiler/config/jak1_ntsc_black_label/type_hints.jsonc new file mode 100644 index 0000000000..2a00faf862 --- /dev/null +++ b/decompiler/config/jak1_ntsc_black_label/type_hints.jsonc @@ -0,0 +1,39 @@ +{ + "(method 2 handle)":[ + [10, ["a3", "process"]], + [11, ["v1", "int"]], + [15, ["gp", "int"]] + ], + + "(method 3 handle)":[ + [10, ["gp", "int"]] + ], + + "(method 0 cpu-thread)":[ + [13, ["v0", "cpu-thread"]] + ], + + "remove-exit":[ + [0, ["s6", "process"]] + ], + + "(method 0 process)":[ + [12, ["a0", "int"]], + [13, ["v0", "process"]] + ], + + "inspect-process-heap":[ + [4, ["s5", "basic"]], + [17, ["s5", "int"]] + ], + + "return-from-thread-dead":[ + [0, ["s6", "process"]] + ], + + "(method 14 dead-pool)":[ + [23, ["v1", "process"]], // bad visit order with #f? + [28, ["s4", "(pointer process-tree)"]] // bug in real game, see gkernel.gc + ] + +} \ No newline at end of file diff --git a/decompiler/main.cpp b/decompiler/main.cpp index a1376357e8..c332d16140 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -88,6 +88,12 @@ int main(int argc, char** argv) { db.write_debug_type_analysis(out_folder); } + if (get_config().analyze_expressions) { + db.analyze_expressions(); + db.write_disassembly(out_folder, false, false, "_expr"); + db.write_debug_type_analysis(out_folder, "_expr"); + } + // todo print type summary // printf("%s\n", get_type_info().get_summary().c_str()); diff --git a/decompiler/util/DecompilerTypeSystem.cpp b/decompiler/util/DecompilerTypeSystem.cpp index 35e6a3c534..849fd73769 100644 --- a/decompiler/util/DecompilerTypeSystem.cpp +++ b/decompiler/util/DecompilerTypeSystem.cpp @@ -43,8 +43,7 @@ void for_each_in_list(goos::Object& list, T f) { } // namespace void DecompilerTypeSystem::parse_type_defs(const std::vector& file_path) { - goos::Reader reader; - auto read = reader.read_from_file(file_path); + auto read = m_reader.read_from_file(file_path); auto data = cdr(read); for_each_in_list(data, [&](goos::Object& o) { @@ -81,6 +80,12 @@ void DecompilerTypeSystem::parse_type_defs(const std::vector& file_ }); } +TypeSpec DecompilerTypeSystem::parse_type_spec(const std::string& str) { + auto read = m_reader.read_from_string(str); + auto data = cdr(read); + return parse_typespec(&ts, car(data)); +} + std::string DecompilerTypeSystem::dump_symbol_types() { assert(symbol_add_order.size() == symbols.size()); std::string result; @@ -152,128 +157,126 @@ void DecompilerTypeSystem::add_symbol(const std::string& name, const TypeSpec& t } } -TP_Type DecompilerTypeSystem::tp_lca_no_simplify(const TP_Type& existing, - const TP_Type& add, - bool* changed) { - switch (existing.kind) { - case TP_Type::OBJECT_OF_TYPE: - switch (add.kind) { - case TP_Type::OBJECT_OF_TYPE: { - // two normal types, do LCA as normal. - TP_Type result; - result.kind = TP_Type::OBJECT_OF_TYPE; - result.ts = ts.lowest_common_ancestor_reg(existing.ts, add.ts); - *changed = (result.ts != existing.ts); - return result; - } - case TP_Type::TYPE_OBJECT: { - // normal, [type object]. Change type object to less specific "type". - TP_Type result; - result.kind = TP_Type::OBJECT_OF_TYPE; - result.ts = ts.lowest_common_ancestor_reg(existing.ts, ts.make_typespec("type")); - *changed = (result.ts != existing.ts); - return result; - } - case TP_Type::FALSE: - // allow #f anywhere - *changed = false; - return existing; - case TP_Type::NONE: - // allow possibly undefined. - *changed = false; - return existing; - default: - assert(false); +/*! + * Compute the least common ancestor of two TP Types. + */ +TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) { + // starting from most vague to most specific + + // simplist case, no difference. + if (existing == add) { + *changed = false; + return existing; + } + + // being sometimes uninitialized should not modify types. + if (add.kind == TP_Type::Kind::UNINITIALIZED) { + *changed = false; + return existing; + } + + // replace anything that's uninitialized sometimes. + if (existing.kind == TP_Type::Kind::UNINITIALIZED) { + *changed = true; // existing != none because of previous check. + return add; + } + + // similar to before, false as null shouldn't modify types. + if (add.kind == TP_Type::Kind::FALSE_AS_NULL) { + *changed = false; + return existing; + } + + // replace any false as nulls. + if (existing.kind == TP_Type::Kind::FALSE_AS_NULL) { + *changed = true; // existing != false because of previous check. + return add; + } + + // different values, but the same kind. + if (existing.kind == add.kind) { + switch (existing.kind) { + case TP_Type::Kind::TYPESPEC: { + auto new_result = TP_Type::make_from_typespec(coerce_to_reg_type(ts.lowest_common_ancestor( + existing.get_objects_typespec(), add.get_objects_typespec()))); + *changed = (new_result != existing); + return new_result; } - break; - case TP_Type::TYPE_OBJECT: - switch (add.kind) { - case TP_Type::OBJECT_OF_TYPE: { - TP_Type result; - result.kind = TP_Type::OBJECT_OF_TYPE; - result.ts = ts.lowest_common_ancestor_reg(ts.make_typespec("type"), add.ts); - *changed = true; // changed type - return result; - } - case TP_Type::TYPE_OBJECT: { - // two type objects. - TP_Type result; - result.kind = TP_Type::TYPE_OBJECT; - result.ts = ts.lowest_common_ancestor_reg(existing.ts, add.ts); - *changed = (result.ts != existing.ts); - return result; - } - case TP_Type::FALSE: - // allow #f anywhere - *changed = false; - return existing; - case TP_Type::NONE: - // allow possibly undefined. - *changed = false; - return existing; - default: - assert(false); - } - break; - case TP_Type::FALSE: - switch (add.kind) { - case TP_Type::OBJECT_OF_TYPE: - *changed = true; - return add; - case TP_Type::TYPE_OBJECT: - *changed = true; - return add; - case TP_Type::FALSE: - *changed = false; - return existing; - case TP_Type::NONE: - *changed = false; - return existing; - default: - assert(false); - } - break; - case TP_Type::NONE: - switch (add.kind) { - case TP_Type::OBJECT_OF_TYPE: - case TP_Type::TYPE_OBJECT: - case TP_Type::FALSE: - case TP_Type::METHOD_NEW_OF_OBJECT: - *changed = true; - return add; - case TP_Type::NONE: - *changed = false; - return existing; - default: - assert(false); - } - break; - case TP_Type::METHOD_NEW_OF_OBJECT: - switch (add.kind) { - case TP_Type::METHOD_NEW_OF_OBJECT: { - if (existing.ts == add.ts) { - *changed = false; - return existing; - } else { - assert(false); - } - } - case TP_Type::NONE: - *changed = false; - return existing; - default: - assert(false); + case TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD: { + auto new_result = TP_Type::make_type_object(ts.lowest_common_ancestor( + existing.get_type_objects_typespec(), add.get_type_objects_typespec())); + *changed = (new_result != existing); + return new_result; } - default: - assert(false); + case TP_Type::Kind::PRODUCT_WITH_CONSTANT: + // we know they are different. + *changed = true; + return TP_Type::make_from_typespec(TypeSpec("int")); + case TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT: + *changed = true; + // todo - there might be cases where we need to LCA the base types?? + return TP_Type::make_from_typespec(TypeSpec("object")); + case TP_Type::Kind::OBJECT_NEW_METHOD: + *changed = true; + // this case should never happen I think. + return TP_Type::make_from_typespec(TypeSpec("function")); + case TP_Type::Kind::STRING_CONSTANT: { + auto existing_count = get_format_arg_count(existing.get_string()); + auto added_count = get_format_arg_count(add.get_string()); + *changed = true; + if (added_count == existing_count) { + return TP_Type::make_from_format_string(existing_count); + } else { + return TP_Type::make_from_typespec(TypeSpec("string")); + } + } + case TP_Type::Kind::INTEGER_CONSTANT: + *changed = true; + return TP_Type::make_from_typespec(TypeSpec("int")); + case TP_Type::Kind::FORMAT_STRING: + if (existing.get_format_string_arg_count() == add.get_format_string_arg_count()) { + *changed = false; + return existing; + } else { + *changed = true; + return TP_Type::make_from_typespec(TypeSpec("string")); + } + + case TP_Type::Kind::FALSE_AS_NULL: + case TP_Type::Kind::UNINITIALIZED: + case TP_Type::Kind::DYNAMIC_METHOD_ACCESS: + case TP_Type::Kind::INVALID: + default: + assert(false); + } + } else { + // trying to combine two of different types. + if (existing.can_be_format_string() && add.can_be_format_string()) { + int existing_count = get_format_arg_count(existing); + int add_count = get_format_arg_count(add); + TP_Type result_type; + if (existing_count == add_count) { + result_type = TP_Type::make_from_format_string(existing_count); + } else { + result_type = TP_Type::make_from_typespec(TypeSpec("string")); + } + + *changed = (result_type == existing); + return result_type; + } + + // otherwise, as an absolute fallback, convert both to TypeSpecs and do TypeSpec LCA + auto new_result = + TP_Type::make_from_typespec(ts.lowest_common_ancestor(existing.typespec(), add.typespec())); + *changed = (new_result != existing); + return new_result; } } -TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) { - return tp_lca_no_simplify(existing.simplify(), add.simplify(), changed); -} - +/*! + * Find the least common ancestor of an entire typestate. + */ bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) { bool result = false; for (int i = 0; i < 32; i++) { @@ -296,3 +299,26 @@ bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) { return result; } + +int DecompilerTypeSystem::get_format_arg_count(const std::string& str) { + int arg_count = 0; + for (size_t i = 0; i < str.length(); i++) { + if (str.at(i) == '~') { + i++; // also eat the next character. + if (i < str.length() && (str.at(i) == '%' || str.at(i) == 'T')) { + // newline (~%) or tab (~T) don't take an argument. + continue; + } + arg_count++; + } + } + return arg_count; +} + +int DecompilerTypeSystem::get_format_arg_count(const TP_Type& type) { + if (type.is_constant_string()) { + return get_format_arg_count(type.get_string()); + } else { + return type.get_format_string_arg_count(); + } +} \ No newline at end of file diff --git a/decompiler/util/DecompilerTypeSystem.h b/decompiler/util/DecompilerTypeSystem.h index 5a7e784ab0..4428768e8c 100644 --- a/decompiler/util/DecompilerTypeSystem.h +++ b/decompiler/util/DecompilerTypeSystem.h @@ -3,6 +3,7 @@ #include "common/type_system/TypeSystem.h" #include "decompiler/Disasm/Register.h" +#include "common/goos/Reader.h" struct TP_Type; struct TypeState; @@ -30,6 +31,7 @@ class DecompilerTypeSystem { void add_symbol(const std::string& name, const TypeSpec& type_spec); void parse_type_defs(const std::vector& file_path); + TypeSpec parse_type_spec(const std::string& str); void add_type_flags(const std::string& name, u64 flags); void add_type_parent(const std::string& child, const std::string& parent); std::string dump_symbol_types(); @@ -38,6 +40,8 @@ class DecompilerTypeSystem { TP_Type tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed); TP_Type tp_lca_no_simplify(const TP_Type& existing, const TP_Type& add, bool* changed); bool tp_lca(TypeState* combined, const TypeState& add); + int get_format_arg_count(const std::string& str); + int get_format_arg_count(const TP_Type& type); struct { bool allow_pair; std::string current_method_type; @@ -46,6 +50,9 @@ class DecompilerTypeSystem { current_method_type.clear(); } } type_prop_settings; + + private: + goos::Reader m_reader; }; #endif // JAK_DECOMPILERTYPESYSTEM_H diff --git a/decompiler/util/TP_Type.cpp b/decompiler/util/TP_Type.cpp index b4bfc70f39..ae23b102b5 100644 --- a/decompiler/util/TP_Type.cpp +++ b/decompiler/util/TP_Type.cpp @@ -1,46 +1,6 @@ #include "TP_Type.h" #include "third-party/fmt/core.h" -/*! - * Takes the weird TP_Types and converts them to one of the main 4. - * This is supposed to be used if the fancy type analysis steps are attempted but fail. - */ -TP_Type TP_Type::simplify() const { - switch (kind) { - case PRODUCT: - return TP_Type(ts); - case METHOD_NEW_OF_OBJECT: - return TP_Type(ts); - case OBJ_PLUS_PRODUCT: - return TP_Type(TypeSpec("none")); - default: - return *this; - } -} - -std::string TP_Type::print() const { - switch (kind) { - case OBJECT_OF_TYPE: - return ts.print(); - case TYPE_OBJECT: - return fmt::format("[{}]", ts.print()); - case FALSE: - return fmt::format("[#f]"); - case NONE: - return fmt::format("[none]"); - case PRODUCT: - return fmt::format("[{} x {}]", ts.print(), multiplier); - case PARTIAL_METHOD_TABLE_ACCESS: - return fmt::format("[[vtable-access]]"); - case METHOD_NEW_OF_OBJECT: - return fmt::format("[(method object new)]"); - case OBJ_PLUS_PRODUCT: - return fmt::format("[{} + int x {}]", ts.print(), multiplier); - default: - assert(false); - } -} - std::string TypeState::print_gpr_masked(u32 mask) const { std::string result; for (int i = 0; i < 32; i++) { @@ -52,4 +12,108 @@ std::string TypeState::print_gpr_masked(u32 mask) const { } } return result; -} \ No newline at end of file +} + +std::string TP_Type::print() const { + switch (kind) { + case Kind::TYPESPEC: + return m_ts.print(); + case Kind::TYPE_OF_TYPE_OR_CHILD: + return fmt::format("", m_ts.print()); + case Kind::FALSE_AS_NULL: + return fmt::format("'#f"); + case Kind::UNINITIALIZED: + return fmt::format(""); + case Kind::PRODUCT_WITH_CONSTANT: + return fmt::format("", m_int); + case Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT: + return fmt::format("<{} + (value x {})>", m_ts.print(), m_int); + case Kind::OBJECT_NEW_METHOD: + return fmt::format("<(object-new) for {}>", m_ts.print()); + case Kind::STRING_CONSTANT: + return fmt::format("", m_str); + case Kind::FORMAT_STRING: + return fmt::format("", m_int); + case Kind::INTEGER_CONSTANT: + return fmt::format("", m_int); + case Kind::DYNAMIC_METHOD_ACCESS: + return fmt::format(""); + case Kind::INVALID: + default: + assert(false); + } +} + +bool TP_Type::operator==(const TP_Type& other) const { + if (kind != other.kind) { + return false; + } + + switch (kind) { + case Kind::TYPESPEC: + return m_ts == other.m_ts; + case Kind::TYPE_OF_TYPE_OR_CHILD: + return m_ts == other.m_ts; + case Kind::FALSE_AS_NULL: + return true; + case Kind::UNINITIALIZED: + return true; + case Kind::PRODUCT_WITH_CONSTANT: + return m_int == other.m_int; + case Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT: + return m_ts == other.m_ts && m_int == other.m_int; + case Kind::OBJECT_NEW_METHOD: + return m_ts == other.m_ts; + case Kind::STRING_CONSTANT: + return m_str == other.m_str; + case Kind::INTEGER_CONSTANT: + return m_int == other.m_int; + case Kind::FORMAT_STRING: + return m_int == other.m_int; + case Kind::DYNAMIC_METHOD_ACCESS: + return true; + case Kind::INVALID: + default: + assert(false); + } +} + +bool TP_Type::operator!=(const TP_Type& other) const { + return !((*this) == other); +} + +TypeSpec TP_Type::typespec() const { + switch (kind) { + case Kind::TYPESPEC: + return m_ts; + case Kind::TYPE_OF_TYPE_OR_CHILD: + return TypeSpec("type"); + case Kind::FALSE_AS_NULL: + return TypeSpec("symbol"); + case Kind::UNINITIALIZED: + return TypeSpec("none"); + case Kind::PRODUCT_WITH_CONSTANT: + return TypeSpec("int"); + case Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT: + // this can be part of an array access, so we don't really know the type. + // probably not a good idea to try to do anything with this as a typespec + // so let's be very vague + return TypeSpec("object"); + case Kind::OBJECT_NEW_METHOD: + // similar to previous case, being more vague than we need to be because we don't + // want to assume the return type incorrectly and you shouldn't try to do anything with + // this as a typespec. + return TypeSpec("function"); + case Kind::STRING_CONSTANT: + return TypeSpec("string"); + case Kind::INTEGER_CONSTANT: + return TypeSpec("int"); + case Kind::DYNAMIC_METHOD_ACCESS: + return TypeSpec("object"); + case Kind::FORMAT_STRING: + return TypeSpec("string"); + case Kind::INVALID: + default: + assert(false); + } +} diff --git a/decompiler/util/TP_Type.h b/decompiler/util/TP_Type.h index 05aefe15e0..2a3169f9ef 100644 --- a/decompiler/util/TP_Type.h +++ b/decompiler/util/TP_Type.h @@ -5,62 +5,234 @@ #include "common/common_types.h" #include "decompiler/Disasm/Register.h" -struct TP_Type { - enum Kind { - OBJECT_OF_TYPE, - TYPE_OBJECT, - FALSE, - NONE, - PRODUCT, - OBJ_PLUS_PRODUCT, - PARTIAL_METHOD_TABLE_ACCESS, // type + method_number * 4 - METHOD_NEW_OF_OBJECT - } kind = NONE; - // in the case that we are type_object, just store the type name in a single arg ts. - TypeSpec ts; - int multiplier; +// struct TP_Type { +// enum Kind { +// OBJECT_OF_TYPE, +// TYPE_OBJECT, +// FALSE, +// NONE, +// PRODUCT, +// OBJ_PLUS_PRODUCT, +// PARTIAL_METHOD_TABLE_ACCESS, // type + method_number * 4 +// METHOD_NEW_OF_OBJECT, +// STRING +// } kind = NONE; +// // in the case that we are type_object, just store the type name in a single arg ts. +// TypeSpec ts; +// int multiplier; +// std::string str_data; +// +// TP_Type() = default; +// explicit TP_Type(const TypeSpec& _ts) { +// kind = OBJECT_OF_TYPE; +// ts = _ts; +// } +// +// TP_Type simplify() const; +// std::string print() const; +// +// bool is_object_of_type() const { return kind == TYPE_OBJECT || ts == TypeSpec("type"); } +// +// TypeSpec as_typespec() const { +// switch (kind) { +// case OBJECT_OF_TYPE: +// return ts; +// case TYPE_OBJECT: +// return TypeSpec("type"); +// case FALSE: +// return TypeSpec("symbol"); +// case NONE: +// return TypeSpec("none"); +// case PRODUCT: +// case METHOD_NEW_OF_OBJECT: +// return ts; +// default: +// assert(false); +// } +// } +// +// static TP_Type make_partial_method_table_access(TypeSpec ts) { +// TP_Type result; +// result.kind = PARTIAL_METHOD_TABLE_ACCESS; +// result.ts = std::move(ts); +// return result; +// } +// +// static TP_Type make_type_object(const std::string& name) { +// TP_Type result; +// result.kind = TYPE_OBJECT; +// result.ts = TypeSpec(name); +// return result; +// } +// +// static TP_Type make_string_object(const std::string& str) { +// TP_Type result; +// result.kind = STRING; +// result.ts = TypeSpec("string"); +// result.str_data = str; +// return result; +// } +// +// static TP_Type make_none() { +// TP_Type result; +// result.kind = NONE; +// return result; +// } +// +// bool operator==(const TP_Type& other) const; +//}; +/*! + * A TP_Type is a specialized typespec used in the type propagation algorithm. + * It is basically a normal typespec plus some optional information. + * It should always use register types. + */ +class TP_Type { + public: + enum class Kind { + TYPESPEC, // just a normal typespec + TYPE_OF_TYPE_OR_CHILD, // a type object, of the given type of a child type. + FALSE_AS_NULL, // the GOAL "false" object, possibly used as a null. + UNINITIALIZED, // representing data which is uninitialized. + PRODUCT_WITH_CONSTANT, // representing: (val * multiplier) + OBJECT_PLUS_PRODUCT_WITH_CONSTANT, // address: obj + (val * multiplier) + OBJECT_NEW_METHOD, // the method new of object, as used in an (object-new) or similar. + STRING_CONSTANT, // a string that's part of the string pool + FORMAT_STRING, // a string with a given number of format arguments + INTEGER_CONSTANT, // a constant integer. + DYNAMIC_METHOD_ACCESS, // partial access into a + INVALID + } kind = Kind::UNINITIALIZED; TP_Type() = default; - explicit TP_Type(const TypeSpec& _ts) { - kind = OBJECT_OF_TYPE; - ts = _ts; - } - - TP_Type simplify() const; std::string print() const; + bool operator==(const TP_Type& other) const; + bool operator!=(const TP_Type& other) const; + TypeSpec typespec() const; - bool is_object_of_type() const { return kind == TYPE_OBJECT || ts == TypeSpec("type"); } + bool is_constant_string() const { return kind == Kind::STRING_CONSTANT; } + bool is_integer_constant() const { return kind == Kind::INTEGER_CONSTANT; } + bool is_integer_constant(int64_t value) const { return is_integer_constant() && m_int == value; } + bool is_product() const { return kind == Kind::PRODUCT_WITH_CONSTANT; } + bool is_product_with(int64_t value) const { + return kind == Kind::PRODUCT_WITH_CONSTANT && m_int == value; + } + bool is_format_string() const { return kind == Kind::FORMAT_STRING; } + bool can_be_format_string() const { return is_format_string() || is_constant_string(); } - TypeSpec as_typespec() const { - switch (kind) { - case OBJECT_OF_TYPE: - return ts; - case TYPE_OBJECT: - return TypeSpec("type"); - case FALSE: - return TypeSpec("symbol"); - case NONE: - return TypeSpec("none"); - case PRODUCT: - case METHOD_NEW_OF_OBJECT: - return ts; - default: - assert(false); - } + int get_format_string_arg_count() const { + assert(is_format_string()); + return m_int; } - static TP_Type make_partial_method_table_access() { + const std::string& get_string() const { + assert(is_constant_string()); + return m_str; + } + + static TP_Type make_from_format_string(int n_args) { TP_Type result; - result.kind = PARTIAL_METHOD_TABLE_ACCESS; + result.kind = Kind::FORMAT_STRING; + result.m_int = n_args; return result; } - static TP_Type make_type_object(const std::string& name) { + static TP_Type make_from_typespec(const TypeSpec& ts) { TP_Type result; - result.kind = TYPE_OBJECT; - result.ts = TypeSpec(name); + result.kind = Kind::TYPESPEC; + result.m_ts = ts; return result; } + + static TP_Type make_from_string(const std::string& str) { + TP_Type result; + result.kind = Kind::STRING_CONSTANT; + result.m_str = str; + return result; + } + + static TP_Type make_type_object(const TypeSpec& type) { + TP_Type result; + result.kind = Kind::TYPE_OF_TYPE_OR_CHILD; + result.m_ts = type; + return result; + } + + static TP_Type make_false() { + TP_Type result; + result.kind = Kind::FALSE_AS_NULL; + return result; + } + + static TP_Type make_uninitialized() { + TP_Type result; + result.kind = Kind::UNINITIALIZED; + return result; + } + + static TP_Type make_from_integer(int64_t value) { + TP_Type result; + result.kind = Kind::INTEGER_CONSTANT; + result.m_int = value; + return result; + } + + static TP_Type make_from_product(int64_t multiplier) { + TP_Type result; + result.kind = Kind::PRODUCT_WITH_CONSTANT; + result.m_int = multiplier; + return result; + } + + static TP_Type make_partial_dyanmic_vtable_access() { + TP_Type result; + result.kind = Kind::DYNAMIC_METHOD_ACCESS; + return result; + } + + static TP_Type make_object_new(const TypeSpec& ts) { + TP_Type result; + result.kind = Kind::OBJECT_NEW_METHOD; + result.m_ts = ts; + return result; + } + + static TP_Type make_object_plus_product(const TypeSpec& ts, int64_t multiplier) { + TP_Type result; + result.kind = Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT; + result.m_ts = ts; + result.m_int = multiplier; + return result; + } + + const TypeSpec& get_objects_typespec() const { + assert(kind == Kind::TYPESPEC); + return m_ts; + } + + const TypeSpec& get_type_objects_typespec() const { + assert(kind == Kind::TYPE_OF_TYPE_OR_CHILD); + return m_ts; + } + + const TypeSpec& get_method_new_object_typespec() const { + assert(kind == Kind::OBJECT_NEW_METHOD); + return m_ts; + } + + const TypeSpec& get_obj_plus_const_mult_typespec() const { + assert(kind == Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT); + return m_ts; + } + + uint64_t get_multiplier() const { + assert(kind == Kind::PRODUCT_WITH_CONSTANT || kind == Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT); + return m_int; + } + + private: + TypeSpec m_ts; + std::string m_str; + int64_t m_int = 0; }; struct TypeState { diff --git a/doc/expressions_todo.txt b/doc/expressions_todo.txt new file mode 100644 index 0000000000..29ba68c461 --- /dev/null +++ b/doc/expressions_todo.txt @@ -0,0 +1,2 @@ +order of floating point argument evaluation is different +GPR -> FPR conversions should not happen silently \ No newline at end of file diff --git a/goal_src/kernel/gkernel-h.gc b/goal_src/kernel/gkernel-h.gc index 8699b31d5d..104838560f 100644 --- a/goal_src/kernel/gkernel-h.gc +++ b/goal_src/kernel/gkernel-h.gc @@ -373,7 +373,7 @@ ) (deftype handle (uint64) - ((process (pointer process) :offset 0) ;; todo, more specific type + ((process (pointer process) :offset 0) (pid int32 :offset 32) (u64 uint64 :offset 0) )