[Decompiler] WIP Expression Stacking (#178)

* wip

* fix the stupid if thing

* update

* fix

* fix some ordering issues
This commit is contained in:
water111 2021-01-02 18:24:45 -05:00 committed by GitHub
parent feead303aa
commit 7af6dce1b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 693 additions and 140 deletions

View File

@ -259,18 +259,6 @@ goos::Object ShortCircuit::to_form() {
return pretty_print::build_list(forms);
}
/*
goos::Object IfElseVtx::to_form() {
std::vector<goos::Object> forms = {pretty_print::to_symbol("if"), condition->to_form(),
true_case->to_form(), false_case->to_form()};
return pretty_print::build_list(forms);
}
std::string IfElseVtx::to_string() {
return "if_else"; // todo - something nicer
}
*/
std::string GotoEnd::to_string() {
return "goto_end" + std::to_string(uid);
}
@ -1248,12 +1236,13 @@ bool ControlFlowGraph::find_cond_w_else() {
// printf("cwe try %s %s\n", c0->to_string().c_str(), b0->to_string().c_str());
// first condition should have the _option_ to fall through to first body
if (c0->succ_ft != b0) {
if (c0->succ_ft != b0 || c0->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
return true;
}
// first body MUST unconditionally jump to else
if (b0->succ_ft || b0->end_branch.branch_likely) {
if (b0->succ_ft || b0->end_branch.branch_likely ||
b0->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
return true;
}
@ -1307,12 +1296,14 @@ bool ControlFlowGraph::find_cond_w_else() {
// we're done!
// check the prev_condition, prev_body blocks properly go to the else/end_block
// prev_condition should jump to else:
if (prev_condition->succ_branch != else_block || prev_condition->end_branch.branch_likely) {
if (prev_condition->succ_branch != else_block || prev_condition->end_branch.branch_likely ||
prev_condition->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
return true;
}
// prev_body should jump to end
if (prev_body->succ_branch != end_block) {
if (prev_body->succ_branch != end_block ||
prev_body->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
return true;
}
@ -1335,7 +1326,12 @@ bool ControlFlowGraph::find_cond_w_else() {
}
// how to get to cond
if (prev_condition->succ_branch != c || prev_condition->end_branch.branch_likely) {
if (prev_condition->succ_branch != c || prev_condition->end_branch.branch_likely ||
prev_condition->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
return true;
}
if (prev_body->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
return true;
}
@ -1402,14 +1398,17 @@ bool ControlFlowGraph::find_cond_w_else() {
return found;
}
#define printf(format, ...) ;
bool ControlFlowGraph::find_cond_n_else() {
bool found = false;
for_each_top_level_vtx([&](CfgVtx* vtx) {
printf("Try CNE on %s\n", vtx->to_string().c_str());
auto* c0 = vtx; // first condition
auto* b0 = c0->next; // first body
if (!b0) {
// printf("reject 0\n");
printf("reject 0\n");
return true;
}
@ -1417,15 +1416,16 @@ bool ControlFlowGraph::find_cond_n_else() {
// first condition should have the _option_ to fall through to first body
if (c0->succ_ft != b0) {
// printf("reject 1\n");
printf("reject 1\n");
return true;
}
// first body MUST unconditionally jump to end
bool single_case = false;
if (b0->end_branch.has_branch) {
if (b0->succ_ft || b0->end_branch.branch_likely) {
// printf("reject 2A\n");
if (b0->succ_ft || b0->end_branch.branch_likely ||
b0->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
printf("reject 2A\n");
return true;
}
assert(b0->end_branch.has_branch);
@ -1436,25 +1436,27 @@ bool ControlFlowGraph::find_cond_n_else() {
}
if (b0->pred.size() != 1) {
// printf("reject 3\n");
printf("reject 3\n");
return true;
}
// TODO - check what's in the delay slot!
auto* end_block = single_case ? b0->succ_ft : b0->succ_branch;
if (!end_block) {
// printf("reject 4");
printf("reject 4");
return true;
}
if (!is_found_after(end_block, b0)) {
// printf("reject 5");
printf("reject 5");
return true;
}
std::vector<CondNoElse::Entry> entries = {{c0, b0}};
auto* prev_condition = c0;
auto* prev_body = b0;
printf("add default entry %s %s\n", c0->to_string().c_str(), b0->to_string().c_str());
printf("end_block = %s\n", end_block->to_string().c_str());
// loop to try to grab all the cases up to the else, or reject if the inside is not sufficiently
// compact or if this is not actually a cond with else Note, we are responsible for checking the
@ -1466,62 +1468,81 @@ bool ControlFlowGraph::find_cond_n_else() {
// we're done!
// check the prev_condition, prev_body blocks properly go to the else/end_block
// prev_condition should jump to else:
if (prev_condition->succ_branch != end_block || prev_condition->end_branch.branch_likely) {
// printf("reject 6\n");
// note - a GOAL branching NOT will be recognized as a single case COND with no else.
// but the branch will be a register set true
if (prev_condition->succ_branch != end_block || prev_condition->end_branch.branch_likely ||
(prev_condition->end_branch.kind != CfgVtx::DelaySlotKind::SET_REG_FALSE &&
prev_condition->end_branch.kind != CfgVtx::DelaySlotKind::SET_REG_TRUE)) {
printf("reject 6\n");
return true;
}
// prev_body should jump to end
if (!single_case && prev_body->succ_branch != end_block) {
// printf("reject 7\n");
// if we are a not, we can have only one case. (I think).
if (prev_condition->end_branch.kind == CfgVtx::DelaySlotKind::SET_REG_TRUE &&
entries.size() > 1) {
return true;
}
// prev_body should fall through to end todo - this was wrong?
if (prev_body->succ_ft != end_block) {
printf("reject 7\n");
return true;
}
break;
} else {
// need to check pc->c
// need to check pb->e
// need to check c->b
auto* c = next;
auto* b = c->next;
printf("add next entry %s %s\n", c->to_string().c_str(), b->to_string().c_str());
if (!c || !b) {
// printf("reject 8\n");
printf("reject 8\n");
return true;
};
// attempt to add another
// printf(" e %s %s\n", c->to_string().c_str(), b->to_string().c_str());
if (c->pred.size() != 1) {
// printf("reject 9\n");
printf("reject 9\n");
return true;
}
if (b->pred.size() != 1) {
// printf("reject 10\n");
printf("reject 10 body %s\n", b->to_string().c_str());
return true;
}
// how to get to cond
if (prev_condition->succ_branch != c || prev_condition->end_branch.branch_likely) {
// printf("reject 11\n");
// how to get to cond (pc->c)
if (prev_condition->succ_branch != c || prev_condition->end_branch.branch_likely ||
prev_condition->end_branch.kind != CfgVtx::DelaySlotKind::SET_REG_FALSE) {
printf("reject 11\n");
return true;
}
// (c->b)
if (c->succ_ft != b) {
// printf("reject 12\n");
printf("reject 12\n");
return true; // condition should have the option to fall through if matched
}
// TODO - check what's in the delay slot!
if (c->end_branch.branch_likely) {
// printf("reject 13\n");
if (c->end_branch.branch_likely ||
c->end_branch.kind != CfgVtx::DelaySlotKind::SET_REG_FALSE) {
printf("reject 13\n");
return true; // otherwise should go to next with a non-likely branch
}
if (b->succ_ft || b->end_branch.branch_likely) {
// printf("reject 14\n");
if (prev_body->succ_ft || prev_body->end_branch.branch_likely ||
prev_body->end_branch.kind != CfgVtx::DelaySlotKind::NOP) {
printf("reject 14 on b %s %d %d %d\n", prev_body->to_string().c_str(),
!!prev_body->succ_ft, prev_body->end_branch.branch_likely,
prev_body->end_branch.kind != CfgVtx::DelaySlotKind::NOP);
return true; // body should go straight to else
}
if (b->succ_branch != end_block) {
// printf("reject 14\n");
if (prev_body->succ_branch != end_block) {
printf("reject 15\n");
return true;
}
@ -1531,6 +1552,24 @@ bool ControlFlowGraph::find_cond_n_else() {
}
}
// let's try to detect if this is an incomplete one.
if (c0->prev) {
if (c0->prev->succ_ft == nullptr && c0->prev->succ_branch == end_block &&
c0->prev->end_branch.kind == CfgVtx::DelaySlotKind::NOP &&
!c0->prev->end_branch.branch_likely) {
// the previous body looks suspicious.
for (auto pred : c0->pred) {
// also check that we have the body skip to avoid false positives when the entire body of
// a while loop is wrapped in a CNE with a single case.
if (pred->succ_branch == c0 &&
pred->end_branch.kind == CfgVtx::DelaySlotKind::SET_REG_FALSE) {
printf("Suspisious reject\n");
return true;
}
}
}
}
// now we need to add it
// printf("got cne\n");
auto new_cwe = alloc<CondNoElse>();
@ -1576,6 +1615,7 @@ bool ControlFlowGraph::find_cond_n_else() {
return found;
}
#undef printf
bool ControlFlowGraph::find_short_circuits() {
bool found = false;
@ -1625,7 +1665,7 @@ bool ControlFlowGraph::find_short_circuits() {
// check fallthrough to next
if (!next->succ_ft) {
// printf("reject 5\n");
return false;
return true;
}
assert(next->succ_ft == next->next); // bonus check
@ -1741,6 +1781,19 @@ void ControlFlowGraph::flag_early_exit(const std::vector<BasicBlock>& blocks) {
}
}
CfgVtx::DelaySlotKind get_delay_slot(const Instruction& i) {
if (is_nop(i)) {
return CfgVtx::DelaySlotKind::NOP;
} else if (is_gpr_3(i, InstructionKind::OR, {}, Register(Reg::GPR, Reg::S7),
Register(Reg::GPR, Reg::R0))) {
return CfgVtx::DelaySlotKind::SET_REG_FALSE;
} else if (is_gpr_2_imm_int(i, InstructionKind::DADDIU, {}, Register(Reg::GPR, Reg::S7), 8)) {
return CfgVtx::DelaySlotKind::SET_REG_TRUE;
} else {
return CfgVtx::DelaySlotKind::OTHER;
}
}
/*!
* Build and resolve a Control Flow Graph as much as possible.
*/
@ -1774,10 +1827,12 @@ std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int se
int idx = b.end_word - 2;
assert(idx >= b.start_word);
auto& branch_candidate = func.instructions.at(idx);
auto& delay_slot_candidate = func.instructions.at(idx + 1);
if (is_branch(branch_candidate, {})) {
blocks.at(i)->end_branch.has_branch = true;
blocks.at(i)->end_branch.branch_likely = is_branch(branch_candidate, true);
blocks.at(i)->end_branch.kind = get_delay_slot(delay_slot_candidate);
bool branch_always = is_always_branch(branch_candidate);
// need to find block target
@ -1827,15 +1882,17 @@ std::shared_ptr<ControlFlowGraph> build_cfg(const LinkedObjectFile& file, int se
while (changed) {
changed = false;
// note - we should prioritize finding short-circuiting expressions.
// printf("%s\n", cfg->to_dot().c_str());
// printf("%s\n", cfg->to_form()->toStringPretty().c_str());
// printf("%s\n", cfg->to_dot().c_str());
// printf("%s\n", cfg->to_form().print().c_str());
// todo - should we lower the priority of the conds?
changed = changed || cfg->find_cond_n_else();
changed = changed || cfg->find_cond_w_else();
changed = changed || cfg->find_while_loop_top_level();
changed = changed || cfg->find_seq_top_level();
changed = changed || cfg->find_short_circuits();
changed = changed || cfg->find_cond_n_else();
if (!changed) {
changed = changed || cfg->find_goto_end();

View File

@ -76,10 +76,13 @@ class CfgVtx {
std::vector<CfgVtx*> pred; // all vertices which have us as succ_branch or succ_ft
int uid = -1;
enum class DelaySlotKind { NO_BRANCH, SET_REG_FALSE, SET_REG_TRUE, NOP, OTHER };
struct {
bool has_branch = false; // does the block end in a branch (any kind)?
bool branch_likely = false; // does the block end in a likely branch?
bool branch_always = false; // does the branch always get taken?
DelaySlotKind kind = DelaySlotKind::NO_BRANCH;
} end_branch;
// each child class of CfgVtx will define its own children.

View File

@ -9,6 +9,7 @@ bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) {
for (auto& op : begin->forms) {
op->expression_stack(stack, file);
}
// printf("%s\n", stack.print(file).c_str());
begin->forms = stack.get_result();
return true;
}
@ -16,6 +17,7 @@ bool expressionize_begin(IR_Begin* begin, LinkedObjectFile& file) {
bool Function::build_expression(LinkedObjectFile& file) {
if (!ir) {
printf("build_expression on %s failed due to no IR.\n", guessed_name.to_string().c_str());
return false;
}
@ -41,9 +43,11 @@ bool Function::build_expression(LinkedObjectFile& file) {
// turn each begin into an expression
for (auto b : all_begins) {
// printf("BEFORE:\n%s\n", b->print(file).c_str());
if (!expressionize_begin(b, file)) {
return false;
}
// printf("AFTER:\n%s\n", b->print(file).c_str());
}
} catch (std::exception& e) {
printf("build_expression failed on %s due to %s\n", guessed_name.to_string().c_str(), e.what());

View File

@ -2,7 +2,9 @@
#include "ExpressionStack.h"
std::string ExpressionStack::StackEntry::print(LinkedObjectFile& file) {
return fmt::format("d: {} {} <- {}", display, destination.to_charp(), source->print(file));
return fmt::format("d: {} s: {} | {} <- {}", display, sequence_point,
destination.has_value() ? destination.value().to_charp() : "N/A",
source->print(file));
}
std::string ExpressionStack::print(LinkedObjectFile& file) {
@ -14,9 +16,10 @@ std::string ExpressionStack::print(LinkedObjectFile& file) {
return result;
}
void ExpressionStack::set(Register reg, std::shared_ptr<IR> value) {
void ExpressionStack::set(Register reg, std::shared_ptr<IR> value, bool sequence_point) {
StackEntry entry;
entry.display = true; // by default, we should display everything!
entry.sequence_point = sequence_point;
entry.destination = reg;
entry.source = std::move(value);
m_stack.push_back(entry);
@ -32,19 +35,42 @@ bool ExpressionStack::is_single_expression() {
return count == 1;
}
void ExpressionStack::add_no_set(std::shared_ptr<IR> value, bool sequence_point) {
StackEntry entry;
entry.display = true;
entry.destination = std::nullopt;
entry.source = value;
entry.sequence_point = sequence_point;
m_stack.push_back(entry);
}
/*!
* "Remove" an entry from the stack. Cannot cross a sequence point.
* Internally, the entry is still stored. It is just flagged with display=false.
*/
std::shared_ptr<IR> ExpressionStack::get(Register reg) {
// see if the stack top is this register...
if (!display_stack_empty()) {
auto& top = get_display_stack_top();
if (top.destination == reg) {
// yep. We can compact!
top.display = false;
return top.source;
for (size_t i = m_stack.size(); i-- > 0;) {
auto& entry = m_stack.at(i);
if (entry.display) {
if (entry.destination == reg) {
entry.display = false;
return entry.source;
} else {
// we didn't match
if (entry.sequence_point) {
// and it's a sequence point! can't look any more back than this.
return std::make_shared<IR_Register>(reg, -1);
}
}
}
}
return std::make_shared<IR_Register>(reg, -1);
}
/*!
* Convert the stack into a sequence of compacted expressions.
* This is final result of the expression compaction algorithm.
*/
std::vector<std::shared_ptr<IR>> ExpressionStack::get_result() {
std::vector<std::shared_ptr<IR>> result;
@ -52,9 +78,13 @@ std::vector<std::shared_ptr<IR>> ExpressionStack::get_result() {
if (!e.display) {
continue;
}
auto dst_reg = std::make_shared<IR_Register>(e.destination, -1);
auto op = std::make_shared<IR_Set>(IR_Set::EXPR, dst_reg, e.source);
result.push_back(op);
if (e.destination.has_value()) {
auto dst_reg = std::make_shared<IR_Register>(e.destination.value(), -1);
auto op = std::make_shared<IR_Set>(IR_Set::EXPR, dst_reg, e.source);
result.push_back(op);
} else {
result.push_back(e.source);
}
}
return result;

View File

@ -1,14 +1,20 @@
#pragma once
#include <vector>
#include <optional>
#include "decompiler/IR/IR.h"
#include "decompiler/Disasm/Register.h"
#include "decompiler/util/TP_Type.h"
/*!
* An ExpressionStack is used to track partial expressions when rebuilding the tree structure of
* GOAL code. Linear sequences of operations are added onto the expression stack.
*/
class ExpressionStack {
public:
ExpressionStack() = default;
void set(Register reg, std::shared_ptr<IR> value);
void set(Register reg, std::shared_ptr<IR> value, bool sequence_point);
void add_no_set(std::shared_ptr<IR> value, bool sequence_point);
std::shared_ptr<IR> get(Register reg);
bool is_single_expression();
std::string print(LinkedObjectFile& file);
@ -16,9 +22,10 @@ class ExpressionStack {
private:
struct StackEntry {
bool display = true; // should this appear in the output?
Register destination; // what register we are setting
std::shared_ptr<IR> source; // the value we are setting the register to.
bool display = true; // should this appear in the output?
std::optional<Register> destination; // what register we are setting (or nullopt if no dest.)
std::shared_ptr<IR> source; // the value we are setting the register to.
bool sequence_point = false;
// TP_Type type;
std::string print(LinkedObjectFile& file);
};

View File

@ -160,6 +160,14 @@ void Function::run_reg_usage() {
}
}
}
for (auto reg : op->write_regs) {
if (!block.op_has_reg_live_out(i, reg)) {
// we wrote it, but it is immediately dead. this is nice to know for things like
// "is this if/and/or expression used as a value?"
op->written_and_unused.insert(reg);
}
}
}
}
}

View File

@ -1515,9 +1515,10 @@ std::shared_ptr<IR_Atomic> try_daddiu(Instruction& i0, Instruction& i1, int idx)
assert(i0.get_src(1).get_imm() == 8);
assert(i1.get_dst(0).get_reg() == dst_reg);
assert(i1.get_src(0).get_reg() == make_gpr(Reg::S7));
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(
Condition::ZERO, make_reg(src_reg, idx), nullptr, nullptr)));
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::ZERO, make_reg(src_reg, idx), nullptr, nullptr), nullptr));
op->write_regs.push_back(dst_reg);
op->read_regs.push_back(src_reg);
op->reg_info_set = true;
@ -1530,9 +1531,10 @@ std::shared_ptr<IR_Atomic> try_daddiu(Instruction& i0, Instruction& i1, int idx)
assert(i0.get_src(1).get_imm() == 8);
assert(i1.get_dst(0).get_reg() == dst_reg);
assert(i1.get_src(0).get_reg() == make_gpr(Reg::S7));
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(
Condition::NONZERO, make_reg(src_reg, idx), nullptr, nullptr)));
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::NONZERO, make_reg(src_reg, idx), nullptr, nullptr), nullptr));
op->write_regs.push_back(dst_reg);
op->read_regs.push_back(src_reg);
op->reg_info_set = true;
@ -1646,10 +1648,11 @@ std::shared_ptr<IR_Atomic> try_dsubu(Instruction& i0, Instruction& i1, Instructi
assert(i2.get_dst(0).get_reg() == dst_reg);
assert(i2.get_src(0).get_reg() == make_gpr(Reg::S7));
assert(i2.get_src(1).get_reg() == clobber_reg);
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::EQUAL, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::EQUAL, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
} else if (i0.kind == InstructionKind::DSUBU && i1.kind == InstructionKind::DADDIU &&
@ -1666,10 +1669,11 @@ std::shared_ptr<IR_Atomic> try_dsubu(Instruction& i0, Instruction& i1, Instructi
if (i2.get_src(1).get_reg() != clobber_reg) {
return nullptr; // TODO!
}
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::NOT_EQUAL, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::NOT_EQUAL, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
}
@ -1706,14 +1710,16 @@ std::shared_ptr<IR_Atomic> try_slt(Instruction& i0, Instruction& i1, Instruction
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::LESS_THAN_ZERO, make_reg(src0_reg, idx),
nullptr, make_reg(clobber_reg, idx))));
nullptr, make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 1, 1);
return op;
} else {
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(
Condition::LESS_THAN_SIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
std::make_shared<IR_Compare>(
Condition(Condition::LESS_THAN_SIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
}
@ -1743,12 +1749,23 @@ std::shared_ptr<IR_Atomic> try_slt(Instruction& i0, Instruction& i1, Instruction
if (i2.get_src(1).get_reg() != clobber_reg) {
return nullptr; // TODO!
}
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::GEQ_SIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
if (src1_reg == make_gpr(Reg::R0)) {
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::GEQ_ZERO_SIGNED, make_reg(src0_reg, idx),
nullptr, make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 1, 1);
return op;
} else {
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::GEQ_SIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
}
}
return nullptr;
}
@ -1781,7 +1798,8 @@ std::shared_ptr<IR_Atomic> try_slti(Instruction& i0, Instruction& i1, Instructio
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::LESS_THAN_SIGNED, make_reg(src0_reg, idx),
src1, make_reg(clobber_reg, idx))));
src1, make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 1, 1);
return op;
} else if (i0.kind == InstructionKind::SLTI && i1.kind == InstructionKind::BEQ) {
@ -1809,7 +1827,8 @@ std::shared_ptr<IR_Atomic> try_slti(Instruction& i0, Instruction& i1, Instructio
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::GEQ_SIGNED, make_reg(src0_reg, idx), src1,
make_reg(clobber_reg, idx))));
make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 1, 1);
return op;
}
@ -1842,9 +1861,10 @@ std::shared_ptr<IR_Atomic> try_sltiu(Instruction& i0, Instruction& i1, Instructi
return nullptr; // TODO!
}
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::LESS_THAN_UNSIGNED,
make_reg(src0_reg, idx), src1,
make_reg(clobber_reg, idx))));
std::make_shared<IR_Compare>(
Condition(Condition::LESS_THAN_UNSIGNED, make_reg(src0_reg, idx),
src1, make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 1, 1);
return op;
} else if (i0.kind == InstructionKind::SLTIU && i1.kind == InstructionKind::BEQ) {
@ -1873,7 +1893,8 @@ std::shared_ptr<IR_Atomic> try_sltiu(Instruction& i0, Instruction& i1, Instructi
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::GEQ_UNSIGNED, make_reg(src0_reg, idx),
src1, make_reg(clobber_reg, idx))));
src1, make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 1, 1);
return op;
}
@ -1966,7 +1987,8 @@ std::shared_ptr<IR_Atomic> try_sltu(Instruction& i0, Instruction& i1, Instructio
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::LESS_THAN_UNSIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
make_reg(src1_reg, idx), make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
} else if (i0.kind == InstructionKind::SLTU && i1.kind == InstructionKind::BEQ) {
@ -1994,10 +2016,11 @@ std::shared_ptr<IR_Atomic> try_sltu(Instruction& i0, Instruction& i1, Instructio
if (i2.get_src(1).get_reg() != clobber_reg) {
return nullptr; // TODO!
}
auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(
Condition(Condition::GEQ_UNSIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx))));
auto op = make_set_atomic(
IR_Set_Atomic::REG_64, make_reg(dst_reg, idx),
std::make_shared<IR_Compare>(Condition(Condition::GEQ_UNSIGNED, make_reg(src0_reg, idx),
make_reg(src1_reg, idx), make_reg(clobber_reg, idx)),
nullptr));
op->update_reginfo_self<IR_Compare>(1, 2, 1);
return op;
}

View File

@ -76,14 +76,14 @@ std::pair<IR_Branch*, std::vector<std::shared_ptr<IR>>*> get_condition_branch_as
* Given an IR, find a branch IR at the end, and also the location of it so it can be patched.
* Returns nullptr as the first item in the pair if it didn't work.
*/
std::pair<IR_Branch*, std::shared_ptr<IR>*> get_condition_branch(std::shared_ptr<IR>* in) {
IR_Branch* condition_branch = dynamic_cast<IR_Branch*>(in->get());
std::pair<IR_Branch_Atomic*, std::shared_ptr<IR>*> get_condition_branch(std::shared_ptr<IR>* in) {
IR_Branch_Atomic* condition_branch = dynamic_cast<IR_Branch_Atomic*>(in->get());
std::shared_ptr<IR>* condition_branch_location = in;
if (!condition_branch) {
// not 100% sure this will always work
auto as_seq = dynamic_cast<IR_Begin*>(in->get());
if (as_seq) {
condition_branch = dynamic_cast<IR_Branch*>(as_seq->forms.back().get());
condition_branch = dynamic_cast<IR_Branch_Atomic*>(as_seq->forms.back().get());
condition_branch_location = &as_seq->forms.back();
}
}
@ -121,7 +121,8 @@ void clean_up_cond_with_else(std::shared_ptr<IR>* ir, LinkedObjectFile& file) {
assert(jump_to_next.first);
assert(jump_to_next.first->branch_delay.kind == BranchDelay::NOP);
// patch the jump to next with a condition.
auto replacement = std::make_shared<IR_Compare>(jump_to_next.first->condition);
auto replacement =
std::make_shared<IR_Compare>(jump_to_next.first->condition, jump_to_next.first);
replacement->condition.invert();
*(jump_to_next.second) = replacement;
@ -153,7 +154,9 @@ void clean_up_until_loop(IR_UntilLoop* ir) {
auto condition_branch = get_condition_branch(&ir->condition);
assert(condition_branch.first);
assert(condition_branch.first->branch_delay.kind == BranchDelay::NOP);
auto replacement = std::make_shared<IR_Compare>(condition_branch.first->condition);
auto replacement =
std::make_shared<IR_Compare>(condition_branch.first->condition, condition_branch.first);
replacement->condition.invert();
*(condition_branch.second) = replacement;
}
@ -277,16 +280,31 @@ bool try_clean_up_sc_as_and(std::shared_ptr<IR_ShortCircuit>& ir, LinkedObjectFi
ir->kind = IR_ShortCircuit::AND;
ir->final_result = ir_dest;
auto* dest_reg = dynamic_cast<IR_Register*>(ir_dest.get());
assert(dest_reg);
bool live_out_result = false;
// now get rid of the branches
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(&ir->entries.at(i).condition);
assert(branch.first);
auto replacement = std::make_shared<IR_Compare>(branch.first->condition);
if (i == 0) {
live_out_result = (branch.first->written_and_unused.find(dest_reg->reg) ==
branch.first->written_and_unused.end());
} else {
bool this_live_out = (branch.first->written_and_unused.find(dest_reg->reg) ==
branch.first->written_and_unused.end());
assert(live_out_result == this_live_out);
}
auto replacement = std::make_shared<IR_Compare>(branch.first->condition, branch.first);
replacement->condition.invert();
*(branch.second) = replacement;
}
ir->used_as_value = live_out_result;
return true;
}
@ -319,14 +337,27 @@ bool try_clean_up_sc_as_or(std::shared_ptr<IR_ShortCircuit>& ir, LinkedObjectFil
ir->kind = IR_ShortCircuit::OR;
ir->final_result = ir_dest;
auto* dest_reg = dynamic_cast<IR_Register*>(ir_dest.get());
assert(dest_reg);
bool live_out_result = false;
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(&ir->entries.at(i).condition);
assert(branch.first);
auto replacement = std::make_shared<IR_Compare>(branch.first->condition);
if (i == 0) {
live_out_result = (branch.first->written_and_unused.find(dest_reg->reg) ==
branch.first->written_and_unused.end());
} else {
bool this_live_out = (branch.first->written_and_unused.find(dest_reg->reg) ==
branch.first->written_and_unused.end());
assert(live_out_result == this_live_out);
}
auto replacement = std::make_shared<IR_Compare>(branch.first->condition, branch.first);
*(branch.second) = replacement;
}
ir->used_as_value = live_out_result;
return true;
}
@ -429,7 +460,8 @@ void convert_cond_no_else_to_compare(std::shared_ptr<IR>* ir) {
auto condition_as_single = dynamic_cast<IR_Branch*>(cne->entries.front().condition.get());
if (condition_as_single) {
auto replacement = std::make_shared<IR_Set>(
IR_Set::REG_64, dst, std::make_shared<IR_Compare>(condition.first->condition));
IR_Set::REG_64, dst,
std::make_shared<IR_Compare>(condition.first->condition, condition.first));
*ir = replacement;
} else {
auto condition_as_seq = dynamic_cast<IR_Begin*>(cne->entries.front().condition.get());
@ -440,12 +472,44 @@ void convert_cond_no_else_to_compare(std::shared_ptr<IR>* ir) {
assert(condition.second == &condition_as_seq->forms.back());
replacement->forms.pop_back();
replacement->forms.push_back(std::make_shared<IR_Set>(
IR_Set::REG_64, dst, std::make_shared<IR_Compare>(condition.first->condition)));
IR_Set::REG_64, dst,
std::make_shared<IR_Compare>(condition.first->condition, condition.first)));
*ir = replacement;
}
}
}
void clean_up_cond_no_else_final(IR_Cond* cne, LinkedObjectFile& file) {
(void)cne;
(void)file;
for (size_t idx = 0; idx < cne->entries.size(); idx++) {
auto& entry = cne->entries.at(idx);
if (entry.false_destination != nullptr) {
auto* fr = dynamic_cast<IR_Register*>(entry.false_destination.get());
assert(fr);
cne->final_destination = fr->reg;
} else {
assert(false);
}
}
auto last_branch =
dynamic_cast<IR_Branch_Atomic*>(cne->entries.back().original_condition_branch.get());
assert(last_branch);
cne->used_as_value = last_branch->written_and_unused.find(cne->final_destination) ==
last_branch->written_and_unused.end();
// check that all other delay slot writes are unused.
for (size_t i = 0; i < cne->entries.size() - 1; i++) {
auto branch =
dynamic_cast<IR_Branch_Atomic*>(cne->entries.at(i).original_condition_branch.get());
auto reg = dynamic_cast<IR_Register*>(cne->entries.at(i).false_destination.get());
assert(reg);
assert(branch);
assert(branch->written_and_unused.find(reg->reg) != branch->written_and_unused.end());
}
}
/*!
* Replace internal branches inside a CondNoElse IR.
* If possible will simplify the entire expression into a comparison operation if possible.
@ -483,7 +547,10 @@ void clean_up_cond_no_else(std::shared_ptr<IR>* ir, LinkedObjectFile& file) {
assert(e.false_destination);
}
auto replacement = std::make_shared<IR_Compare>(jump_to_next.first->condition);
e.original_condition_branch = *jump_to_next.second;
auto replacement =
std::make_shared<IR_Compare>(jump_to_next.first->condition, jump_to_next.first);
replacement->condition.invert();
*(jump_to_next.second) = replacement;
e.cleaned = true;
@ -503,6 +570,36 @@ void clean_up_cond_no_else(std::shared_ptr<IR>* ir, LinkedObjectFile& file) {
}
}
}
// bool has_any_falses = false;
// Register false_reg;
// for (size_t idx = 0; idx < cne->entries.size(); idx++) {
// auto& entry = cne->entries.at(idx);
// if (idx == 0) {
// has_any_falses = entry.false_destination != nullptr;
// if (has_any_falses) {
// auto* as_reg = dynamic_cast<IR_Register*>(entry.false_destination.get());
// assert(as_reg);
// false_reg = as_reg->reg;
// }
// } else {
// if (has_any_falses) {
// if (idx == cne->entries.size() - 1) {
// assert(entry.false_destination == nullptr);
// } else {
// auto* as_reg = dynamic_cast<IR_Register*>(entry.false_destination.get());
// assert(as_reg);
// assert(as_reg->reg == false_reg);
// }
// } else {
// if (entry.false_destination != nullptr) {
// printf("BAD set of %s\n", entry.false_destination->print(file).c_str());
// printf("%s\n", entry.condition->print(file).c_str());
// }
// assert(entry.false_destination == nullptr);
// }
// }
// }
}
/*!
@ -619,12 +716,18 @@ std::shared_ptr<IR> try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCirc
return nullptr;
}
// todo, seems possible to be a single op instead of a begin here.
auto b0_ptr = cfg_to_ir(f, file, b0);
auto b0_ir = dynamic_cast<IR_Begin*>(b0_ptr.get());
auto branch_sp = b0_ir->forms.back();
auto branch = dynamic_cast<IR_Branch*>(branch_sp.get());
IR_Branch* branch = nullptr;
std::shared_ptr<IR> branch_sp = nullptr;
if (b0_ir) {
branch_sp = b0_ir->forms.back();
} else {
branch_sp = b0_ptr;
}
branch = dynamic_cast<IR_Branch*>(branch_sp.get());
if (!branch) {
return nullptr;
}
@ -984,7 +1087,8 @@ std::shared_ptr<IR> cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx)
} else if (dynamic_cast<InfiniteLoopBlock*>(vtx)) {
auto wvtx = dynamic_cast<InfiniteLoopBlock*>(vtx);
auto result = std::make_shared<IR_WhileLoop>(
std::make_shared<IR_Compare>(Condition(Condition::ALWAYS, nullptr, nullptr, nullptr)),
std::make_shared<IR_Compare>(Condition(Condition::ALWAYS, nullptr, nullptr, nullptr),
nullptr),
cfg_to_ir(f, file, wvtx->block));
clean_up_infinite_while_loop(result.get());
return result;
@ -1003,7 +1107,8 @@ std::shared_ptr<IR> cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx)
return fancy_compact_result;
}
if (dynamic_cast<IR_Cond*>(else_ir.get())) {
// this case is disabled because I _think_ it is now properly handled elsewhere.
if (false && dynamic_cast<IR_Cond*>(else_ir.get())) {
auto extra_cond = dynamic_cast<IR_Cond*>(else_ir.get());
std::vector<IR_Cond::Entry> entries;
for (auto& x : cvtx->entries) {
@ -1047,6 +1152,10 @@ std::shared_ptr<IR> cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx)
if (as_abs) {
return as_abs;
}
if (svtx->entries.size() == 1) {
throw std::runtime_error("Weird short circuit form.");
}
// now try as a normal and/or
std::vector<IR_ShortCircuit::Entry> entries;
for (auto& x : svtx->entries) {
@ -1115,7 +1224,8 @@ void clean_up_while_loops(IR_Begin* sequence, LinkedObjectFile& file) {
assert(condition_branch.first);
assert(condition_branch.first->branch_delay.kind == BranchDelay::NOP);
// printf("got while condition branch %s\n", condition_branch.first->print(file).c_str());
auto replacement = std::make_shared<IR_Compare>(condition_branch.first->condition);
auto replacement =
std::make_shared<IR_Compare>(condition_branch.first->condition, condition_branch.first);
*(condition_branch.second) = replacement;
}
}
@ -1157,6 +1267,11 @@ std::shared_ptr<IR> build_cfg_ir(Function& function,
if (as_begin) {
clean_up_while_loops(as_begin, file);
}
auto as_cond_no_else = dynamic_cast<IR_Cond*>(child.get());
if (as_cond_no_else) {
clean_up_cond_no_else_final(as_cond_no_else, file);
}
}
return ir;
} catch (std::runtime_error& e) {

View File

@ -5,6 +5,8 @@
// hack to print out reverse deref paths on loads to help with debugging load stuff.
bool enable_hack_load_path_print = false;
// hack to print (begin x) as x to make debug output easier to read.
bool inline_single_begins = true;
std::vector<std::shared_ptr<IR>> IR::get_all_ir(LinkedObjectFile& file) const {
(void)file;
@ -32,7 +34,8 @@ std::string IR::print(const LinkedObjectFile& file) const {
}
namespace {
void add_regs_to_str(const std::vector<Register>& regs, std::string& str) {
template <typename T>
void add_regs_to_str(const T& regs, std::string& str) {
bool first = true;
for (auto& reg : regs) {
if (first) {
@ -76,6 +79,11 @@ std::string IR_Atomic::print_with_reguse(const LinkedObjectFile& file) const {
add_regs_to_str(clobber_regs, result);
result += "] ";
}
if (!consumed.empty()) {
result += "consumed: [";
add_regs_to_str(consumed, result);
result += "] ";
}
return result;
}
@ -998,6 +1006,9 @@ void IR_Breakpoint_Atomic::get_children(std::vector<std::shared_ptr<IR>>* output
}
goos::Object IR_Begin::to_form(const LinkedObjectFile& file) const {
if (forms.size() == 1 && inline_single_begins) {
return forms.front()->to_form(file);
}
std::vector<goos::Object> list;
list.push_back(pretty_print::to_symbol("begin"));
for (auto& x : forms) {

View File

@ -55,7 +55,7 @@ class IR {
class IR_Atomic : public virtual IR {
public:
std::vector<Register> read_regs, write_regs, clobber_regs;
std::unordered_set<Register, Register::hash> consumed;
std::unordered_set<Register, Register::hash> consumed, written_and_unused;
bool reg_info_set = false;
TypeState end_types; // types at the end of this instruction
@ -208,6 +208,14 @@ class IR_EmptyPair : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override {
(void)consume;
(void)stack;
(void)file;
return true;
}
};
class IR_StaticAddress : public virtual IR {
@ -281,6 +289,9 @@ class IR_FloatMath1 : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_IntMath2 : public virtual IR {
@ -367,6 +378,14 @@ class IR_IntegerConstant : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override {
(void)consume;
(void)stack;
(void)file;
return true;
}
};
struct BranchDelay {
@ -479,15 +498,27 @@ class IR_Branch_Atomic : public virtual IR_Branch, public IR_Atomic {
class IR_Compare : public virtual IR {
public:
explicit IR_Compare(Condition _condition) : condition(std::move(_condition)) {}
explicit IR_Compare(Condition _condition, IR_Atomic* _root_op)
: condition(std::move(_condition)), root_op(_root_op) {}
Condition condition;
// the basic op that the comparison comes from. If the condition is "ALWAYS", this may be null.
// if this is the source of an IR_Set_Atomic, this may also be null. This should only be used
// from IR_Compare's expression_stack, when the IR_Compare is being used as a branch condition,
// and not as a literal #f/#t that's being assigned.
IR_Atomic* root_op = nullptr;
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
std::unordered_set<Register, Register::hash> get_consumed(LinkedObjectFile& file) override;
};
class IR_Nop : public virtual IR {
@ -495,6 +526,7 @@ class IR_Nop : public virtual IR {
IR_Nop() = default;
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
class IR_Nop_Atomic : public IR_Nop, public IR_Atomic {
@ -513,6 +545,11 @@ class IR_Suspend_Atomic : public virtual IR, public IR_Atomic {
void propagate_types(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override {
(void)stack;
(void)file;
return true;
}
};
class IR_Breakpoint_Atomic : public virtual IR_Atomic {
@ -523,6 +560,11 @@ class IR_Breakpoint_Atomic : public virtual IR_Atomic {
void propagate_types(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override {
(void)stack;
(void)file;
return true;
}
};
class IR_Begin : public virtual IR {
@ -542,6 +584,7 @@ class IR_WhileLoop : public virtual IR {
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
std::shared_ptr<IR> condition, body;
bool cleaned = false;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
class IR_UntilLoop : public virtual IR {
@ -550,6 +593,7 @@ class IR_UntilLoop : public virtual IR {
: condition(std::move(_condition)), body(std::move(_body)) {}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
std::shared_ptr<IR> condition, body;
};
@ -566,6 +610,7 @@ class IR_CondWithElse : public virtual IR {
: entries(std::move(_entries)), else_ir(std::move(_else_ir)) {}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
// this one doesn't have an else statement. Will return false if none of the cases are taken.
@ -575,12 +620,16 @@ class IR_Cond : public virtual IR {
std::shared_ptr<IR> condition = nullptr;
std::shared_ptr<IR> body = nullptr;
std::shared_ptr<IR> false_destination = nullptr;
std::shared_ptr<IR> original_condition_branch = nullptr;
bool cleaned = false;
};
Register final_destination;
bool used_as_value = false;
std::vector<Entry> entries;
explicit IR_Cond(std::vector<Entry> _entries) : entries(std::move(_entries)) {}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
// this will work on pairs, bintegers, or basics
@ -591,24 +640,33 @@ class IR_GetRuntimeType : public virtual IR {
: object(std::move(_object)), clobber(std::move(_clobber)) {}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
std::unordered_set<Register, Register::hash> get_consumed(LinkedObjectFile& file) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_ShortCircuit : public virtual IR {
public:
struct Entry {
std::shared_ptr<IR> condition = nullptr;
std::shared_ptr<IR> output = nullptr; // where the delay slot writes to.
// in the case where there's no else, each delay slot will write #f to the "output" register.
// this can be with an or <output>, s7, r0
std::shared_ptr<IR> output = nullptr;
bool is_output_trick = false;
bool cleaned = false;
};
enum Kind { UNKNOWN, AND, OR } kind = UNKNOWN;
std::shared_ptr<IR> final_result = nullptr; // the register that the final result goes in.
std::vector<Entry> entries;
std::optional<bool> used_as_value = std::nullopt;
explicit IR_ShortCircuit(std::vector<Entry> _entries) : entries(std::move(_entries)) {}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
class IR_Ash : public virtual IR {
@ -652,6 +710,7 @@ class IR_AsmOp : public virtual IR {
IR_AsmOp(std::string _name) : name(std::move(_name)) {}
goos::Object to_form(const LinkedObjectFile& file) const override;
void get_children(std::vector<std::shared_ptr<IR>>* output) const override;
bool expression_stack(ExpressionStack& stack, LinkedObjectFile& file) override;
};
class IR_AsmOp_Atomic : public virtual IR_AsmOp, public IR_Atomic {
@ -674,6 +733,9 @@ class IR_CMoveF : public virtual IR {
TP_Type get_expression_type(const TypeState& input,
const LinkedObjectFile& file,
DecompilerTypeSystem& dts) override;
bool update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) override;
};
class IR_AsmReg : public virtual IR {

View File

@ -15,19 +15,38 @@ bool IR_Set_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile& f
// first, we update our source to substitute in more complicated expressions.
auto src_as_reg = dynamic_cast<IR_Register*>(src.get());
if (src_as_reg) {
// an annoying special case.
// we're reading a register. Let's find out if it's safe to directly copy it's value.
if (consumed.find(src_as_reg->reg) != consumed.end()) {
// we consume it.
// yep. Let's read it off of the stack.
src = stack.get(src_as_reg->reg);
}
} else {
// our source is some expression. we need to make sure the expression is up-to-date.
src->update_from_stack(consumed, stack, file);
}
// next, we tell the stack the value of the register we just set
auto dest_reg = dynamic_cast<IR_Register*>(dst.get());
assert(dest_reg);
stack.set(dest_reg->reg, src);
// sequence point if not a register -> register set.
stack.set(dest_reg->reg, src, !src_as_reg);
return true;
}
case IR_Set::STORE:
case IR_Set::SYM_STORE: {
auto src_as_reg = dynamic_cast<IR_Register*>(src.get());
if (src_as_reg) {
// we're reading a register. Let's find out if it's safe to directly copy it's value.
if (consumed.find(src_as_reg->reg) != consumed.end()) {
// yep. Let's read it off of the stack.
src = stack.get(src_as_reg->reg);
}
} else {
// our source is some expression. we need to make sure the expression is up-to-date.
src->update_from_stack(consumed, stack, file);
}
stack.add_no_set(std::make_shared<IR_Set_Atomic>(*this), true);
return true;
}
@ -62,7 +81,7 @@ bool IR_Set::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
// next, we tell the stack the value of the register we just set
auto dest_reg = dynamic_cast<IR_Register*>(dst.get());
assert(dest_reg);
stack.set(dest_reg->reg, src);
stack.set(dest_reg->reg, src, !src_as_reg);
return true;
}
@ -81,6 +100,7 @@ bool IR_Call_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile&
const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3,
Reg::T0, Reg::T1, Reg::T2, Reg::T3};
int nargs = int(call_type.arg_count()) - 1;
// printf("%s\n", stack.print(file).c_str());
// get all arguments.
for (int i = nargs; i-- > 0;) {
args.push_back(stack.get(Register(Reg::GPR, arg_regs[i])));
@ -90,11 +110,18 @@ bool IR_Call_Atomic::expression_stack(ExpressionStack& stack, LinkedObjectFile&
auto return_type = call_type.get_arg(call_type.arg_count() - 1);
// bleh...
stack.set(Register(Reg::GPR, Reg::V0), std::make_shared<IR_Call_Atomic>(*this));
stack.set(Register(Reg::GPR, Reg::V0), std::make_shared<IR_Call_Atomic>(*this), true);
return true;
}
bool IR_UntilLoop::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
(void)stack;
(void)file;
stack.add_no_set(std::make_shared<IR_UntilLoop>(*this), true);
return true;
}
namespace {
void update_from_stack_helper(std::shared_ptr<IR>* ir,
const std::unordered_set<Register, Register::hash>& consume,
@ -111,6 +138,164 @@ void update_from_stack_helper(std::shared_ptr<IR>* ir,
}
} // namespace
bool IR_Compare::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
if (condition.kind != Condition::ALWAYS) {
assert(root_op);
// auto consumed = root_op->get_consumed(file);
auto& consumed = root_op->consumed;
switch (condition.num_args()) {
case 0:
break;
case 1:
update_from_stack_helper(&condition.src0, consumed, stack, file);
break;
case 2:
update_from_stack_helper(&condition.src1, consumed, stack, file);
update_from_stack_helper(&condition.src0, consumed, stack, file);
break;
default:
assert(false);
}
}
stack.add_no_set(std::make_shared<IR_Compare>(*this), true);
return true;
}
bool IR_Compare::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
if (condition.kind != Condition::ALWAYS) {
switch (condition.num_args()) {
case 0:
break;
case 1:
update_from_stack_helper(&condition.src0, consume, stack, file);
break;
case 2:
update_from_stack_helper(&condition.src1, consume, stack, file);
update_from_stack_helper(&condition.src0, consume, stack, file);
break;
default:
assert(false);
}
}
return true;
}
bool IR_ShortCircuit::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
// this one is weird. All forms but the last implicitly set final_destination.
// the last form should somewhere set final_destination, but due to tricky coloring we
// can't identify this 100% of the time.
// so we settle for something like:
// (set! result (or <clause-a> ... (begin (blah) (set! result x) (blah))))
// in the future, we may want to handle this a little bit better, at least in the obvious cases.
assert(final_result);
assert(used_as_value.has_value());
if (used_as_value.value()) {
auto dest_reg = dynamic_cast<IR_Register*>(final_result.get());
// try as a set
auto last_entry_as_set = dynamic_cast<IR_Set*>(entries.back().condition.get());
if (last_entry_as_set) {
auto sd = last_entry_as_set->dst;
auto sd_as_reg = dynamic_cast<IR_Register*>(sd.get());
if (sd_as_reg && sd_as_reg->reg == dest_reg->reg) {
entries.back().condition = last_entry_as_set->src;
stack.set(dest_reg->reg, std::make_shared<IR_ShortCircuit>(*this), true);
return true;
}
}
// try as the last thing in a begin.
auto last_entry_as_begin = dynamic_cast<IR_Begin*>(entries.back().condition.get());
if (last_entry_as_begin) {
last_entry_as_set = dynamic_cast<IR_Set*>(last_entry_as_begin->forms.back().get());
if (last_entry_as_set) {
auto sd = last_entry_as_set->dst;
auto sd_as_reg = dynamic_cast<IR_Register*>(sd.get());
if (sd_as_reg && sd_as_reg->reg == dest_reg->reg) {
entries.back().condition = last_entry_as_set->src;
stack.set(dest_reg->reg, std::make_shared<IR_ShortCircuit>(*this), true);
return true;
}
}
}
// nope. if we have something like (and x (if a b c)), we may need to explictly add an
// evaluation of the if's result.
auto new_last_entry = std::make_shared<IR_Begin>();
new_last_entry->forms.push_back(entries.back().condition);
new_last_entry->forms.push_back(std::make_shared<IR_Register>(dest_reg->reg, -1));
entries.back().condition = new_last_entry;
stack.set(dest_reg->reg, std::make_shared<IR_ShortCircuit>(*this), true);
return true;
// throw std::runtime_error("Last entry in short circuit was bad: " +
// entries.back().condition->print(file));
} else {
stack.add_no_set(std::make_shared<IR_ShortCircuit>(*this), true);
return true;
}
}
bool IR_Cond::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
if (used_as_value) {
// we have to make sure that all of the bodies evaluate to the value stored in the
// final_destination register.
for (auto& entry : entries) {
IR* current_ir = entry.body.get();
while (dynamic_cast<IR_Begin*>(current_ir)) {
current_ir = dynamic_cast<IR_Begin*>(current_ir)->forms.back().get();
}
auto as_set = dynamic_cast<IR_Set*>(current_ir);
if (as_set) {
auto sd = as_set->dst;
auto sd_as_reg = dynamic_cast<IR_Register*>(sd.get());
if (sd_as_reg && sd_as_reg->reg == final_destination) {
// yep! it's okay. set!'s evaluate to the thing they are setting.
continue;
}
}
throw std::runtime_error("IR_Cond used as value didn't work for reg " +
final_destination.to_string() + "\n" + entry.body->print(file));
}
return true;
} else {
(void)file;
stack.add_no_set(std::make_shared<IR_Cond>(*this), true);
return true;
}
}
bool IR_WhileLoop::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
(void)file;
// while loops are never "used by value" yet, but this is okay because they don't
// do any tricks in delay slots like IR_Cond's do.
stack.add_no_set(std::make_shared<IR_WhileLoop>(*this), true);
return true;
}
bool IR_AsmOp::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
(void)file;
// we only fall back to asm ops if we don't understand the GOAL code, or if the original code
// used inline assembly. In these cases, we create a sequence point here.
stack.add_no_set(std::make_shared<IR_AsmOp>(*this), true);
return true;
}
bool IR_CondWithElse::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
(void)file;
// cond with else are never "used by value" yet, but this is okay because they don't
// do any tricks in delay slots like IR_Cond's do.
stack.add_no_set(std::make_shared<IR_CondWithElse>(*this), true);
return true;
}
bool IR_Load::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
@ -143,7 +328,7 @@ bool IR_FloatMath2::update_from_stack(const std::unordered_set<Register, Registe
}
}
} else {
for (auto reg : {&arg0, &arg1}) {
for (auto reg : {&arg1, &arg0}) {
auto as_reg = dynamic_cast<IR_Register*>(reg->get());
if (as_reg) {
if (consume.find(as_reg->reg) != consume.end()) {
@ -221,4 +406,45 @@ bool IR_IntMath1::update_from_stack(const std::unordered_set<Register, Register:
LinkedObjectFile& file) {
update_from_stack_helper(&arg, consume, stack, file);
return true;
}
bool IR_GetRuntimeType::update_from_stack(
const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
update_from_stack_helper(&object, consume, stack, file);
return true;
}
std::unordered_set<Register, Register::hash> IR_GetRuntimeType::get_consumed(
LinkedObjectFile& file) {
// todo, this can actually consume stuff.
(void)file;
return {};
}
std::unordered_set<Register, Register::hash> IR_Compare::get_consumed(LinkedObjectFile& file) {
// todo, this can actually consume stuff.
(void)file;
return {};
}
bool IR_Nop::expression_stack(ExpressionStack& stack, LinkedObjectFile& file) {
(void)stack;
(void)file;
return true;
}
bool IR_CMoveF::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
update_from_stack_helper(&src, consume, stack, file);
return true;
}
bool IR_FloatMath1::update_from_stack(const std::unordered_set<Register, Register::hash>& consume,
ExpressionStack& stack,
LinkedObjectFile& file) {
update_from_stack_helper(&arg, consume, stack, file);
return true;
}

View File

@ -570,7 +570,7 @@ void ObjectFileDB::write_debug_type_analysis(const std::string& output_dir,
if (obj.linked_data.has_any_functions()) {
auto file_text = obj.linked_data.print_type_analysis_debug();
auto file_name =
file_util::combine_path(output_dir, obj.to_unique_name() + suffix + "_db.asm");
file_util::combine_path(output_dir, obj.to_unique_name() + suffix + "_dbt.asm");
total_bytes += file_text.size();
file_util::write_text_file(file_name, file_text);
@ -861,8 +861,11 @@ void ObjectFileDB::analyze_functions() {
// Main Pass over each function...
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
total_functions++;
// printf("in %s from %s\n", func.guessed_name.to_string().c_str(),
// data.to_unique_name().c_str());
// if (func.guessed_name.to_string() != "sort") {
// return;
// }
// printf("in %s from %s\n", func.guessed_name.to_string().c_str(),
// data.to_unique_name().c_str());
// first, find basic blocks.
auto blocks = find_blocks_in_function(data.linked_data, segment_id, func);
@ -907,6 +910,8 @@ void ObjectFileDB::analyze_functions() {
}
// Combine basic ops + CFG to build a nested IR
// register usage first, so we can tell if the SC's if's are used by value.
func.run_reg_usage();
func.ir = build_cfg_ir(func, *func.cfg, data.linked_data);
non_asm_funcs++;
if (func.ir) {
@ -921,6 +926,7 @@ void ObjectFileDB::analyze_functions() {
}
// type analysis
if (get_config().function_type_prop) {
auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()];
if (get_config().no_type_analysis_functions_by_name.find(func.guessed_name.to_string()) ==
@ -1095,17 +1101,18 @@ void ObjectFileDB::analyze_expressions() {
Timer timer;
int attempts = 0;
int success = 0;
bool had_failure = false;
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
// register usage
func.run_reg_usage();
if (func.attempted_type_analysis) {
if (/*!had_failure &&*/ func.attempted_type_analysis) {
attempts++;
spdlog::info("Analyze {}", func.guessed_name.to_string());
if (func.build_expression(data.linked_data)) {
success++;
} else {
func.warnings.append(";; Expression analysis failed.\n");
had_failure = true;
}
}
});

View File

@ -91,7 +91,6 @@ int main(int argc, char** argv) {
if (get_config().analyze_expressions) {
db.analyze_expressions();
db.write_disassembly(out_folder, false, false, "_expr");
db.write_debug_type_analysis(out_folder, "_expr");
}
// todo print type summary

View File

@ -253,14 +253,15 @@
(defun type-type? ((a type) (b type))
"is a a type (or child type) of type b?"
(until (eq? a object)
;; it's not clear why a might be zero?
;; perhaps if the type system is not yet initialized fully for the type?
(if (or (eq? a b) (zero? a))
(return-from #f #t)
)
(set! a (-> a parent))
)
(let ((object-type object))
(until (or (eq? (set! a (-> a parent)) object-type)
(zero? a)
)
(if (eq? a b)
(return-from #f #t)
)
)
)
#f
)