mirror of
https://github.com/open-goal/jak-project.git
synced 2025-02-13 01:49:01 +00:00
[Decompiler] WIP Conversion to SSA and variable naming (#195)
* begin ssa algorithm * ssa based splitting appears to work * add merge pass * finish basic implementation * better output * bug fix
This commit is contained in:
parent
8f86f0f00e
commit
1071ff6003
@ -1083,7 +1083,7 @@ bool TypeSystem::typecheck_base_types(const std::string& expected,
|
||||
/*!
|
||||
* Get a path from type to object.
|
||||
*/
|
||||
std::vector<std::string> TypeSystem::get_path_up_tree(const std::string& type) {
|
||||
std::vector<std::string> TypeSystem::get_path_up_tree(const std::string& type) const {
|
||||
auto parent = lookup_type(type)->get_parent();
|
||||
std::vector<std::string> path = {type};
|
||||
path.push_back(parent);
|
||||
@ -1101,7 +1101,7 @@ std::vector<std::string> TypeSystem::get_path_up_tree(const std::string& type) {
|
||||
/*!
|
||||
* Lowest common ancestor of two base types.
|
||||
*/
|
||||
std::string TypeSystem::lca_base(const std::string& a, const std::string& b) {
|
||||
std::string TypeSystem::lca_base(const std::string& a, const std::string& b) const {
|
||||
if (a == b) {
|
||||
return a;
|
||||
}
|
||||
@ -1137,7 +1137,7 @@ std::string TypeSystem::lca_base(const std::string& a, const std::string& b) {
|
||||
* In a situation like lca("(a b)", "(c d)"), the result will be
|
||||
* (lca(a, b) lca(b, d)).
|
||||
*/
|
||||
TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b) {
|
||||
TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b) const {
|
||||
auto result = make_typespec(lca_base(a.base_type(), b.base_type()));
|
||||
if (result == TypeSpec("function") && a.m_arguments.size() == 2 && b.m_arguments.size() == 2 &&
|
||||
(a.m_arguments.at(0) == TypeSpec("_varargs_") ||
|
||||
@ -1154,14 +1154,14 @@ TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b
|
||||
return result;
|
||||
}
|
||||
|
||||
TypeSpec TypeSystem::lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b) {
|
||||
TypeSpec TypeSystem::lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b) const {
|
||||
return coerce_to_reg_type(lowest_common_ancestor(a, b));
|
||||
}
|
||||
|
||||
/*!
|
||||
* Lowest common ancestor of multiple (or at least one) type.
|
||||
*/
|
||||
TypeSpec TypeSystem::lowest_common_ancestor(const std::vector<TypeSpec>& types) {
|
||||
TypeSpec TypeSystem::lowest_common_ancestor(const std::vector<TypeSpec>& types) const {
|
||||
assert(!types.empty());
|
||||
if (types.size() == 1) {
|
||||
return types.front();
|
||||
|
@ -174,7 +174,7 @@ class TypeSystem {
|
||||
const std::string& error_source_name = "",
|
||||
bool print_on_error = true,
|
||||
bool throw_on_error = true) const;
|
||||
std::vector<std::string> get_path_up_tree(const std::string& type);
|
||||
std::vector<std::string> get_path_up_tree(const std::string& type) const;
|
||||
int get_next_method_id(Type* type);
|
||||
|
||||
bool is_bitfield_type(const std::string& type_name) const;
|
||||
@ -197,9 +197,9 @@ class TypeSystem {
|
||||
return result;
|
||||
}
|
||||
|
||||
TypeSpec lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b);
|
||||
TypeSpec lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b);
|
||||
TypeSpec lowest_common_ancestor(const std::vector<TypeSpec>& types);
|
||||
TypeSpec lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b) const;
|
||||
TypeSpec lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b) const;
|
||||
TypeSpec lowest_common_ancestor(const std::vector<TypeSpec>& types) const;
|
||||
|
||||
private:
|
||||
bool reverse_deref(const ReverseDerefInputInfo& input,
|
||||
@ -226,7 +226,7 @@ class TypeSystem {
|
||||
std::vector<FieldReverseLookupOutput::Token>* path,
|
||||
bool* addr_of,
|
||||
TypeSpec* result_type) const;
|
||||
std::string lca_base(const std::string& a, const std::string& b);
|
||||
std::string lca_base(const std::string& a, const std::string& b) const;
|
||||
bool typecheck_base_types(const std::string& expected, const std::string& actual) const;
|
||||
int get_size_in_type(const Field& field) const;
|
||||
int get_alignment_in_type(const Field& field);
|
||||
|
@ -33,6 +33,7 @@ add_library(
|
||||
IR2/AtomicOpTypeAnalysis.cpp
|
||||
IR2/Env.cpp
|
||||
IR2/reg_usage.cpp
|
||||
IR2/variable_naming.cpp
|
||||
|
||||
ObjectFile/LinkedObjectFile.cpp
|
||||
ObjectFile/LinkedObjectFileCreation.cpp
|
||||
|
@ -11,7 +11,7 @@ namespace decompiler {
|
||||
// VARIABLE
|
||||
/////////////////////////////
|
||||
|
||||
Variable::Variable(Mode mode, Register reg, int atomic_idx, bool allow_all)
|
||||
Variable::Variable(VariableMode mode, Register reg, int atomic_idx, bool allow_all)
|
||||
: m_mode(mode), m_reg(reg), m_atomic_idx(atomic_idx) {
|
||||
// make sure we're using a valid GPR.
|
||||
if (reg.get_kind() == Reg::GPR && !allow_all) {
|
||||
@ -28,12 +28,12 @@ std::string Variable::to_string(const Env* env, Print mode) const {
|
||||
return m_reg.to_string();
|
||||
case Print::FULL:
|
||||
return fmt::format("{}-{:03d}-{}", m_reg.to_charp(), m_atomic_idx,
|
||||
m_mode == Mode::READ ? 'r' : 'w');
|
||||
m_mode == VariableMode::READ ? 'r' : 'w');
|
||||
case Print::AS_VARIABLE:
|
||||
return env->get_variable_name(m_reg, m_atomic_idx);
|
||||
return env->get_variable_name(m_reg, m_atomic_idx, m_mode);
|
||||
case Print::AUTOMATIC:
|
||||
if (env->has_local_vars()) {
|
||||
return env->get_variable_name(m_reg, m_atomic_idx);
|
||||
return env->get_variable_name(m_reg, m_atomic_idx, m_mode);
|
||||
} else {
|
||||
return m_reg.to_string();
|
||||
}
|
||||
@ -58,6 +58,11 @@ AtomicOp::AtomicOp(int my_idx) : m_my_idx(my_idx) {}
|
||||
std::string AtomicOp::to_string(const std::vector<DecompilerLabel>& labels, const Env* env) const {
|
||||
return pretty_print::to_string(to_form(labels, env));
|
||||
}
|
||||
|
||||
std::string AtomicOp::to_string(const Env& env) const {
|
||||
return to_string(env.file->labels, &env);
|
||||
}
|
||||
|
||||
bool AtomicOp::operator!=(const AtomicOp& other) const {
|
||||
return !((*this) == other);
|
||||
}
|
||||
@ -411,7 +416,10 @@ AsmOp::AsmOp(Instruction instr, int my_idx) : AtomicOp(my_idx), m_instr(std::mov
|
||||
if (m_instr.n_dst == 1) {
|
||||
auto& dst = m_instr.get_dst(0);
|
||||
if (dst.is_reg()) {
|
||||
m_dst = Variable(Variable::Mode::WRITE, dst.get_reg(), my_idx, true);
|
||||
auto reg = dst.get_reg();
|
||||
if (reg.get_kind() == Reg::FPR || reg.get_kind() == Reg::GPR) {
|
||||
m_dst = Variable(VariableMode::WRITE, reg, my_idx, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -419,7 +427,10 @@ AsmOp::AsmOp(Instruction instr, int my_idx) : AtomicOp(my_idx), m_instr(std::mov
|
||||
for (int i = 0; i < m_instr.n_src; i++) {
|
||||
auto& src = m_instr.get_src(i);
|
||||
if (src.is_reg()) {
|
||||
m_src[i] = Variable(Variable::Mode::READ, src.get_reg(), my_idx, true);
|
||||
auto reg = src.get_reg();
|
||||
if (reg.get_kind() == Reg::FPR || reg.get_kind() == Reg::GPR) {
|
||||
m_src[i] = Variable(VariableMode::READ, reg, my_idx, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -964,14 +975,14 @@ IR2_BranchDelay::IR2_BranchDelay(Kind kind) : m_kind(kind) {
|
||||
IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0) : m_kind(kind) {
|
||||
assert(m_kind == Kind::SET_REG_FALSE || m_kind == Kind::SET_REG_TRUE ||
|
||||
m_kind == Kind::SET_BINTEGER || m_kind == Kind::SET_PAIR);
|
||||
assert(var0.mode() == Variable::Mode::WRITE);
|
||||
assert(var0.mode() == VariableMode::WRITE);
|
||||
m_var[0] = var0;
|
||||
}
|
||||
|
||||
IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0, Variable var1) : m_kind(kind) {
|
||||
assert(m_kind == Kind::NEGATE || m_kind == Kind::SET_REG_REG);
|
||||
assert(var0.mode() == Variable::Mode::WRITE);
|
||||
assert(var1.mode() == Variable::Mode::READ);
|
||||
assert(var0.mode() == VariableMode::WRITE);
|
||||
assert(var1.mode() == VariableMode::READ);
|
||||
m_var[0] = var0;
|
||||
m_var[1] = var1;
|
||||
}
|
||||
@ -979,9 +990,9 @@ IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0, Variable var1) : m_ki
|
||||
IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0, Variable var1, Variable var2)
|
||||
: m_kind(kind) {
|
||||
assert(m_kind == Kind::DSLLV);
|
||||
assert(var0.mode() == Variable::Mode::WRITE);
|
||||
assert(var1.mode() == Variable::Mode::READ);
|
||||
assert(var2.mode() == Variable::Mode::READ);
|
||||
assert(var0.mode() == VariableMode::WRITE);
|
||||
assert(var1.mode() == VariableMode::READ);
|
||||
assert(var2.mode() == VariableMode::READ);
|
||||
m_var[0] = var0;
|
||||
m_var[1] = var1;
|
||||
m_var[2] = var2;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "common/goos/Object.h"
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
#include "decompiler/Disasm/Instruction.h"
|
||||
#include "decompiler/IR2/IR2_common.h"
|
||||
#include "Env.h"
|
||||
|
||||
namespace decompiler {
|
||||
@ -33,13 +34,8 @@ class DecompilerTypeSystem;
|
||||
*/
|
||||
class Variable {
|
||||
public:
|
||||
enum class Mode : u8 {
|
||||
READ, // represents value of the variable at the beginning of the instruction
|
||||
WRITE // represents value of the variable at the end of the instruction
|
||||
};
|
||||
|
||||
Variable() = default;
|
||||
Variable(Mode mode, Register reg, int atomic_idx, bool allow_all = false);
|
||||
Variable(VariableMode mode, Register reg, int atomic_idx, bool allow_all = false);
|
||||
|
||||
enum class Print {
|
||||
AS_REG, // print as a PS2 register name
|
||||
@ -54,13 +50,13 @@ class Variable {
|
||||
bool operator!=(const Variable& other) const;
|
||||
|
||||
const Register& reg() const { return m_reg; }
|
||||
Mode mode() const { return m_mode; }
|
||||
VariableMode mode() const { return m_mode; }
|
||||
int idx() const { return m_atomic_idx; }
|
||||
|
||||
private:
|
||||
Mode m_mode = Mode::READ; // do we represent a read or a write?
|
||||
Register m_reg; // the EE register
|
||||
int m_atomic_idx = -1; // the index in the function's list of AtomicOps
|
||||
VariableMode m_mode = VariableMode::READ; // do we represent a read or a write?
|
||||
Register m_reg; // the EE register
|
||||
int m_atomic_idx = -1; // the index in the function's list of AtomicOps
|
||||
};
|
||||
|
||||
/*!
|
||||
@ -90,6 +86,7 @@ class AtomicOp {
|
||||
public:
|
||||
explicit AtomicOp(int my_idx);
|
||||
std::string to_string(const std::vector<DecompilerLabel>& labels, const Env* env) const;
|
||||
std::string to_string(const Env& env) const;
|
||||
std::string reg_type_info_as_string(const TypeState& init_types,
|
||||
const TypeState& end_types) const;
|
||||
virtual goos::Object to_form(const std::vector<DecompilerLabel>& labels,
|
||||
|
@ -44,11 +44,11 @@ Register rv0() {
|
||||
/////////////////////////
|
||||
|
||||
Variable make_dst_var(Register reg, int idx) {
|
||||
return Variable(Variable::Mode::WRITE, reg, idx);
|
||||
return Variable(VariableMode::WRITE, reg, idx);
|
||||
}
|
||||
|
||||
Variable make_src_var(Register reg, int idx) {
|
||||
return Variable(Variable::Mode::READ, reg, idx);
|
||||
return Variable(VariableMode::READ, reg, idx);
|
||||
}
|
||||
|
||||
Variable make_dst_var(const Instruction& i, int idx) {
|
||||
@ -1309,11 +1309,11 @@ std::unique_ptr<AtomicOp> convert_5(const Instruction& i0,
|
||||
* @param end : the end of the instructions for the block
|
||||
* @param container : the container to add to
|
||||
*/
|
||||
void convert_block_to_atomic_ops(int begin_idx,
|
||||
std::vector<Instruction>::const_iterator begin,
|
||||
std::vector<Instruction>::const_iterator end,
|
||||
const std::vector<DecompilerLabel>& labels,
|
||||
FunctionAtomicOps* container) {
|
||||
int convert_block_to_atomic_ops(int begin_idx,
|
||||
std::vector<Instruction>::const_iterator begin,
|
||||
std::vector<Instruction>::const_iterator end,
|
||||
const std::vector<DecompilerLabel>& labels,
|
||||
FunctionAtomicOps* container) {
|
||||
container->block_id_to_first_atomic_op.push_back(container->ops.size());
|
||||
for (auto& instr = begin; instr < end;) {
|
||||
// how many instructions can we look at, at most?
|
||||
@ -1400,21 +1400,23 @@ void convert_block_to_atomic_ops(int begin_idx,
|
||||
begin_idx += length;
|
||||
}
|
||||
container->block_id_to_end_atomic_op.push_back(container->ops.size());
|
||||
return int(container->ops.size());
|
||||
}
|
||||
|
||||
FunctionAtomicOps convert_function_to_atomic_ops(const Function& func,
|
||||
const std::vector<DecompilerLabel>& labels) {
|
||||
FunctionAtomicOps result;
|
||||
|
||||
int last_op = 0;
|
||||
for (const auto& block : func.basic_blocks) {
|
||||
// we should only consider the blocks which actually have instructions:
|
||||
if (block.end_word > block.start_word) {
|
||||
auto begin = func.instructions.begin() + block.start_word;
|
||||
auto end = func.instructions.begin() + block.end_word;
|
||||
convert_block_to_atomic_ops(block.start_word, begin, end, labels, &result);
|
||||
last_op = convert_block_to_atomic_ops(block.start_word, begin, end, labels, &result);
|
||||
} else {
|
||||
result.block_id_to_first_atomic_op.push_back(-1);
|
||||
result.block_id_to_end_atomic_op.push_back(-1);
|
||||
result.block_id_to_first_atomic_op.push_back(last_op);
|
||||
result.block_id_to_end_atomic_op.push_back(last_op);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -32,12 +32,13 @@ struct FunctionAtomicOps {
|
||||
* @param end : the end of the instructions for the block
|
||||
* @param labels : label names for the function, used for error prints on failed conversions
|
||||
* @param container : the container to add to
|
||||
* @return the last op (non-inclusive)
|
||||
*/
|
||||
void convert_block_to_atomic_ops(int begin_idx,
|
||||
std::vector<Instruction>::const_iterator begin,
|
||||
std::vector<Instruction>::const_iterator end,
|
||||
const std::vector<DecompilerLabel>& labels,
|
||||
FunctionAtomicOps* container);
|
||||
int convert_block_to_atomic_ops(int begin_idx,
|
||||
std::vector<Instruction>::const_iterator begin,
|
||||
std::vector<Instruction>::const_iterator end,
|
||||
const std::vector<DecompilerLabel>& labels,
|
||||
FunctionAtomicOps* container);
|
||||
|
||||
/*!
|
||||
* Convert an entire function to AtomicOps
|
||||
|
@ -1,11 +1,10 @@
|
||||
#include <stdexcept>
|
||||
#include <unordered_set>
|
||||
#include "Env.h"
|
||||
|
||||
namespace decompiler {
|
||||
std::string Env::get_variable_name(Register reg, int atomic_idx) const {
|
||||
(void)reg;
|
||||
(void)atomic_idx;
|
||||
throw std::runtime_error("Env::get_variable_name not yet implemented.");
|
||||
std::string Env::get_variable_name(Register reg, int atomic_idx, VariableMode mode) const {
|
||||
return m_var_names.lookup(reg, atomic_idx, mode).name();
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -17,4 +16,63 @@ void Env::set_types(const std::vector<TypeState>& block_init_types,
|
||||
m_op_end_types = op_end_types;
|
||||
m_has_types = true;
|
||||
}
|
||||
|
||||
std::string Env::print_local_var_types() const {
|
||||
assert(has_local_vars());
|
||||
std::vector<std::string> entries;
|
||||
std::unordered_map<Register, std::unordered_set<int>, Register::hash> printed;
|
||||
|
||||
for (auto& reg_info : m_var_names.read_vars) {
|
||||
auto& reg_printed = printed[reg_info.first];
|
||||
for (int var_id = 0; var_id < int(reg_info.second.size()); var_id++) {
|
||||
auto& info = reg_info.second.at(var_id);
|
||||
if (info.initialized) {
|
||||
reg_printed.insert(var_id);
|
||||
entries.push_back(fmt::format("{}: {}", info.name(), info.type.typespec().print()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& reg_info : m_var_names.write_vars) {
|
||||
auto& reg_printed = printed[reg_info.first];
|
||||
for (int var_id = 0; var_id < int(reg_info.second.size()); var_id++) {
|
||||
auto& info = reg_info.second.at(var_id);
|
||||
if (info.initialized) {
|
||||
if (reg_printed.find(var_id) == reg_printed.end()) {
|
||||
entries.push_back(fmt::format("{}: {}", info.name(), info.type.typespec().print()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int max_len = 0;
|
||||
for (auto& entry : entries) {
|
||||
if (int(entry.length()) > max_len) {
|
||||
max_len = entry.length();
|
||||
}
|
||||
}
|
||||
|
||||
constexpr int row_len = 100;
|
||||
int per_row = std::max(1, row_len / max_len);
|
||||
int entry_len = 100 / per_row;
|
||||
|
||||
std::string result;
|
||||
|
||||
for (int entry_id = 0; entry_id < int(entries.size()); entry_id++) {
|
||||
if ((entry_id % per_row) == 0) {
|
||||
// onto a new line!
|
||||
if (entry_id != 0) {
|
||||
result += '\n';
|
||||
}
|
||||
result += ";; ";
|
||||
}
|
||||
result += ' ';
|
||||
result += entries.at(entry_id);
|
||||
result += std::string(std::max(0, entry_len - int(entries.at(entry_id).length())), ' ');
|
||||
}
|
||||
|
||||
result += '\n';
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace decompiler
|
@ -5,10 +5,35 @@
|
||||
#include <cassert>
|
||||
#include "decompiler/util/TP_Type.h"
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
#include "decompiler/IR2/IR2_common.h"
|
||||
|
||||
namespace decompiler {
|
||||
class LinkedObjectFile;
|
||||
|
||||
struct VariableNames {
|
||||
struct VarInfo {
|
||||
VarInfo() = default;
|
||||
std::string name() const { return fmt::format("{}-{}", reg.to_charp(), id); }
|
||||
TP_Type type;
|
||||
Register reg;
|
||||
int id = -1;
|
||||
bool initialized = false;
|
||||
};
|
||||
|
||||
std::unordered_map<Register, std::vector<VariableNames::VarInfo>, Register::hash> read_vars,
|
||||
write_vars;
|
||||
std::unordered_map<Register, std::vector<int>, Register::hash> read_opid_to_varid,
|
||||
write_opid_to_varid;
|
||||
|
||||
const VarInfo& lookup(Register reg, int op_id, VariableMode mode) const {
|
||||
if (mode == VariableMode::READ) {
|
||||
return read_vars.at(reg).at(read_opid_to_varid.at(reg).at(op_id));
|
||||
} else {
|
||||
return write_vars.at(reg).at(write_opid_to_varid.at(reg).at(op_id));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*!
|
||||
* An "environment" for a single function.
|
||||
* This contains data for an entire function, like which registers are live when, the types of
|
||||
@ -19,7 +44,7 @@ class Env {
|
||||
public:
|
||||
bool has_local_vars() const { return m_has_local_vars; }
|
||||
bool has_type_analysis() const { return m_has_types; }
|
||||
std::string get_variable_name(Register reg, int atomic_idx) const;
|
||||
std::string get_variable_name(Register reg, int atomic_idx, VariableMode mode) const;
|
||||
|
||||
/*!
|
||||
* Get the types in registers _after_ the given operation has completed.
|
||||
@ -40,6 +65,14 @@ class Env {
|
||||
|
||||
void set_types(const std::vector<TypeState>& block_init_types,
|
||||
const std::vector<TypeState>& op_end_types);
|
||||
|
||||
void set_local_vars(const VariableNames& names) {
|
||||
m_var_names = names;
|
||||
m_has_local_vars = true;
|
||||
}
|
||||
|
||||
std::string print_local_var_types() const;
|
||||
|
||||
LinkedObjectFile* file = nullptr;
|
||||
|
||||
private:
|
||||
@ -47,5 +80,6 @@ class Env {
|
||||
bool m_has_types = false;
|
||||
std::vector<TypeState> m_block_init_types;
|
||||
std::vector<TypeState> m_op_end_types;
|
||||
VariableNames m_var_names;
|
||||
};
|
||||
} // namespace decompiler
|
9
decompiler/IR2/IR2_common.h
Normal file
9
decompiler/IR2/IR2_common.h
Normal file
@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace decompiler {
|
||||
enum class VariableMode : u8 {
|
||||
READ, // represents value of the variable at the beginning of the instruction
|
||||
WRITE // represents value of the variable at the end of the instruction
|
||||
};
|
||||
}
|
@ -19,6 +19,8 @@ struct RegUsageInfo {
|
||||
RegSet live, dead, consumes, written_and_unused;
|
||||
};
|
||||
|
||||
int block_count() const { return int(block.size()); }
|
||||
|
||||
std::vector<PerBlock> block;
|
||||
std::vector<PerOp> op;
|
||||
|
||||
|
571
decompiler/IR2/variable_naming.cpp
Normal file
571
decompiler/IR2/variable_naming.cpp
Normal file
@ -0,0 +1,571 @@
|
||||
#include <set>
|
||||
#include "variable_naming.h"
|
||||
#include "reg_usage.h"
|
||||
#include "decompiler/Function/Function.h"
|
||||
#include "third-party/fmt/core.h"
|
||||
|
||||
namespace decompiler {
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
std::string reg_to_string(const T& regs) {
|
||||
std::string result;
|
||||
for (auto reg : regs) {
|
||||
result += reg.to_charp();
|
||||
result += ' ';
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
/*!
|
||||
* Allocate a new SSA variable for the given register.
|
||||
* This should only be used to allocate the result of a non-phi instruction.
|
||||
*/
|
||||
VarSSA VarMapSSA::allocate(Register reg) {
|
||||
Entry new_entry;
|
||||
new_entry.reg = reg;
|
||||
new_entry.entry_id = int(m_entries.size());
|
||||
new_entry.var_id = get_next_var_id(reg);
|
||||
VarSSA result(reg, new_entry.entry_id);
|
||||
m_entries.push_back(new_entry);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Allocate a new SSA for the given register.
|
||||
* This should only be used to allocate the result of a phi-function.
|
||||
*/
|
||||
VarSSA VarMapSSA::allocate_init_phi(Register reg, int block_id) {
|
||||
Entry new_entry;
|
||||
new_entry.reg = reg;
|
||||
new_entry.entry_id = int(m_entries.size());
|
||||
new_entry.var_id = -block_id;
|
||||
VarSSA result(reg, new_entry.entry_id);
|
||||
m_entries.push_back(new_entry);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the next unused variable id for the given register.
|
||||
*/
|
||||
int VarMapSSA::get_next_var_id(Register reg) {
|
||||
return ++m_reg_next_id[reg];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Combine the two variables into one. The final name is:
|
||||
* - B0, if either is B0
|
||||
* - otherwise b's name.
|
||||
*/
|
||||
void VarMapSSA::merge(const VarSSA& var_a, const VarSSA& var_b) {
|
||||
auto& a = m_entries.at(var_a.m_entry_id);
|
||||
auto& b = m_entries.at(var_b.m_entry_id);
|
||||
assert(a.reg == b.reg);
|
||||
if (b.var_id == 0) {
|
||||
a.var_id = b.var_id;
|
||||
} else {
|
||||
b.var_id = a.var_id;
|
||||
}
|
||||
}
|
||||
|
||||
void VarMapSSA::merge_to_first(const VarSSA& var_a, const VarSSA& var_b) {
|
||||
auto& a = m_entries.at(var_a.m_entry_id);
|
||||
auto& b = m_entries.at(var_b.m_entry_id);
|
||||
assert(a.reg == b.reg);
|
||||
b.var_id = a.var_id;
|
||||
}
|
||||
|
||||
std::string VarMapSSA::to_string(const VarSSA& var) const {
|
||||
auto var_id = m_entries.at(var.m_entry_id).var_id;
|
||||
if (var_id > 0) {
|
||||
return fmt::format("{}-{}", var.m_reg.to_charp(), var_id);
|
||||
} else {
|
||||
return fmt::format("{}-B{}", var.m_reg.to_charp(), -var_id);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* Do these two SSA variables represent the same "program variable"
|
||||
*/
|
||||
bool VarMapSSA::same(const VarSSA& var_a, const VarSSA& var_b) const {
|
||||
return var_a.m_reg == var_b.m_reg &&
|
||||
m_entries.at(var_a.m_entry_id).var_id == m_entries.at(var_b.m_entry_id).var_id;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get program variable ID from an SSA variable.
|
||||
*/
|
||||
int VarMapSSA::var_id(const VarSSA& var) {
|
||||
return m_entries.at(var.m_entry_id).var_id;
|
||||
}
|
||||
|
||||
/*!
|
||||
* For a given register and map, remap using var_id = remap[var_id]
|
||||
* For variables not in the map, set ID to INT32_MIN.
|
||||
*/
|
||||
void VarMapSSA::remap_reg(Register reg, const std::unordered_map<int, int>& remap) {
|
||||
for (auto& entry : m_entries) {
|
||||
if (entry.reg == reg) {
|
||||
auto kv = remap.find(entry.var_id);
|
||||
if (kv == remap.end()) {
|
||||
entry.var_id = INT32_MIN;
|
||||
} else {
|
||||
entry.var_id = kv->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string SSA::Phi::print(const VarMapSSA& var_map) const {
|
||||
std::string result = var_map.to_string(dest);
|
||||
result += " <- phi(";
|
||||
for (auto& s : sources) {
|
||||
result += var_map.to_string(s);
|
||||
result += ' ';
|
||||
}
|
||||
|
||||
if (!sources.empty()) {
|
||||
result.pop_back();
|
||||
}
|
||||
|
||||
result += ')';
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string SSA::Ins::print(const VarMapSSA& var_map) const {
|
||||
std::string result;
|
||||
if (dst.has_value()) {
|
||||
result += var_map.to_string(*dst) + " <- (";
|
||||
} else {
|
||||
result += "read(";
|
||||
}
|
||||
|
||||
for (auto& s : src) {
|
||||
result += var_map.to_string(s);
|
||||
result += ' ';
|
||||
}
|
||||
|
||||
if (!src.empty()) {
|
||||
result.pop_back();
|
||||
}
|
||||
|
||||
result += ')';
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string SSA::Block::print(const VarMapSSA& var_map) const {
|
||||
std::string result;
|
||||
for (auto& phi : phis) {
|
||||
result += " ";
|
||||
result += phi.second.print(var_map);
|
||||
result += '\n';
|
||||
}
|
||||
for (auto& i : ins) {
|
||||
result += " ";
|
||||
result += i.print(var_map);
|
||||
result += '\n';
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the phi function that sets the initial value of the given register in this block
|
||||
* If no phi function exists, it will be created.
|
||||
*/
|
||||
SSA::Phi& SSA::get_phi(int block, Register dest_reg) {
|
||||
auto& phi_map = blocks.at(block).phis;
|
||||
auto kv = phi_map.find(dest_reg);
|
||||
if (kv == phi_map.end()) {
|
||||
auto dest_var = map.allocate_init_phi(dest_reg, block);
|
||||
phi_map.insert(std::make_pair(dest_reg, dest_var));
|
||||
}
|
||||
return phi_map.at(dest_reg);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the result (SSA variable) of the phi function that sets the value of the given register in
|
||||
* this block. If there is no phi which sets this, creates one (empty)
|
||||
*/
|
||||
VarSSA SSA::get_phi_dest(int block, Register dest_reg) {
|
||||
return get_phi(block, dest_reg).dest;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Add a source SSA variable to the phi setting the initial value of dest reg at the top of the
|
||||
* given block. If there is no phi which sets dest_reg, creates one.
|
||||
*/
|
||||
void SSA::add_source_to_phi(int block, Register dest_reg, const VarSSA& src_var) {
|
||||
auto& phi = get_phi(block, dest_reg);
|
||||
phi.sources.push_back(src_var);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/*!
|
||||
* Create a "really crude" SSA, as described in
|
||||
* "Aycock and Horspool Simple Generation of Static Single-Assignment Form"
|
||||
*
|
||||
* Note - we do a few tricks to make this more efficient, inspired by "improvement 1", but
|
||||
* implemented slightly differently. (I couldn't figure out how to efficiently implement their
|
||||
* improvement 1). We also take advantage of precomputed register usage info to avoid creating
|
||||
* totally useless phis that propagate unused values through to the end of the function.
|
||||
*/
|
||||
SSA make_rc_ssa(const Function& function, const RegUsageInfo& rui, const FunctionAtomicOps& ops) {
|
||||
SSA ssa(rui.block_count());
|
||||
for (int block_id = 0; block_id < rui.block_count(); block_id++) {
|
||||
const auto& block = function.basic_blocks.at(block_id);
|
||||
int start_op = ops.block_id_to_first_atomic_op.at(block_id);
|
||||
int end_op = ops.block_id_to_end_atomic_op.at(block_id);
|
||||
if (start_op == end_op) {
|
||||
// later we rely on having > 0 ops in our block, so we must reject 0 size blocks.
|
||||
if (block_id + 1 == rui.block_count()) {
|
||||
// if it's the last block, just ignore it. The expression propagator will ignore it too,
|
||||
// so the return value will safely make it to the end.
|
||||
continue;
|
||||
}
|
||||
// otherwise give up. This is something that should be fixed upstream (#196).
|
||||
throw std::runtime_error("Zero size blocks not yet supported");
|
||||
}
|
||||
|
||||
// local map: current register names at the current op.
|
||||
std::unordered_map<Register, VarSSA, Register::hash> current_regs;
|
||||
|
||||
// initialize phis. this is only done on:
|
||||
// - variables live out at the first op
|
||||
// - variables read by the first op
|
||||
// which should contain at least all live variables at the beginning of the block.
|
||||
// this may accidentally add a phi for a variable that's dead at the block entry but is
|
||||
// defined by the first op. This is no big deal, as it will be trivially eliminated later on.
|
||||
const auto& start_op_info = rui.op.at(start_op);
|
||||
const auto& start_op_op = ops.ops.at(start_op);
|
||||
auto init_regs = start_op_info.live;
|
||||
for (auto reg : start_op_op->read_regs()) {
|
||||
init_regs.insert(reg);
|
||||
}
|
||||
|
||||
for (auto reg : init_regs) {
|
||||
// to avoid operator[]
|
||||
auto it = current_regs.find(reg);
|
||||
if (it != current_regs.end()) {
|
||||
assert(false);
|
||||
it->second = ssa.get_phi_dest(block_id, reg);
|
||||
} else {
|
||||
current_regs.insert(std::make_pair(reg, ssa.get_phi_dest(block_id, reg)));
|
||||
}
|
||||
}
|
||||
|
||||
// loop over ops, creating and reading from variables as needed.
|
||||
for (int op_id = start_op; op_id < end_op; op_id++) {
|
||||
const auto& op = ops.ops.at(op_id);
|
||||
SSA::Ins ssa_i(op_id);
|
||||
// todo - verify no duplicates here?
|
||||
assert(op->write_regs().size() <= 1);
|
||||
// reads:
|
||||
for (auto r : op->read_regs()) {
|
||||
ssa_i.src.push_back(current_regs.at(r));
|
||||
}
|
||||
// writes:
|
||||
if (!op->write_regs().empty()) {
|
||||
auto w = op->write_regs().front();
|
||||
auto var = ssa.map.allocate(w);
|
||||
ssa_i.dst = var;
|
||||
// avoid operator[] again
|
||||
auto it = current_regs.find(w);
|
||||
if (it != current_regs.end()) {
|
||||
it->second = var;
|
||||
} else {
|
||||
current_regs.insert(std::make_pair(w, var));
|
||||
}
|
||||
}
|
||||
|
||||
ssa.blocks.at(block_id).ins.push_back(ssa_i);
|
||||
}
|
||||
|
||||
// process succs:
|
||||
auto& end_op_info = rui.op.at(end_op - 1);
|
||||
for (auto succ : {block.succ_branch, block.succ_ft}) {
|
||||
if (succ != -1) {
|
||||
for (auto reg : end_op_info.live) {
|
||||
// only update phis for variables that are actually live at the next block.
|
||||
ssa.add_source_to_phi(succ, reg, current_regs.at(reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ssa;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string SSA::print() const {
|
||||
std::string result;
|
||||
for (int block_id = 0; block_id < int(blocks.size()); block_id++) {
|
||||
result += fmt::format("B-{}\n", block_id);
|
||||
result += blocks.at(block_id).print(map);
|
||||
result += "\n";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Simplify the SSA while still keeping it in SSA form.
|
||||
* This does only a single pass of simplifications and returns true if it made changes.
|
||||
*/
|
||||
bool SSA::simplify() {
|
||||
bool changed = false;
|
||||
for (auto& block : blocks) {
|
||||
auto it = block.phis.begin();
|
||||
while (it != block.phis.end()) {
|
||||
// first case: all sources are the same as the destination.
|
||||
// note - this will remove all phis with 1 or 0 arguments.
|
||||
bool remove = true;
|
||||
auto& dst = it->second.dest;
|
||||
for (auto& src : it->second.sources) {
|
||||
if (!map.same(src, dst)) {
|
||||
remove = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!remove) {
|
||||
// second case. V_i = phi(combo of i, j)
|
||||
remove = true;
|
||||
auto v_i = it->second.dest;
|
||||
std::optional<VarSSA> v_j;
|
||||
for (auto& src : it->second.sources) {
|
||||
if (!map.same(v_i, src)) {
|
||||
// three cases:
|
||||
if (!v_j.has_value()) {
|
||||
// this is the first time we see j
|
||||
v_j = src;
|
||||
} else {
|
||||
// we know j...
|
||||
if (!map.same(*v_j, src)) {
|
||||
// but it's not a match. three different vars, so give up.
|
||||
remove = false;
|
||||
break;
|
||||
}
|
||||
// else, we know j and matched it, continue checking
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (remove) {
|
||||
assert(v_j.has_value());
|
||||
map.merge(*v_j, v_i);
|
||||
}
|
||||
}
|
||||
|
||||
if (remove) {
|
||||
changed = true;
|
||||
it = block.phis.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Convert from SSA to a form without phis. This takes advantage of the following properties:
|
||||
* - All phis have all sources and dest in the same HW register.
|
||||
* - Merging variables in the same register is safe because they can't have overlapping use.
|
||||
* - As a bonus, this never merges variables that we _know_ are distinct GOAL variables.
|
||||
*/
|
||||
void SSA::merge_all_phis() {
|
||||
for (auto& block : blocks) {
|
||||
for (auto& phi : block.phis) {
|
||||
for (auto& src : phi.second.sources) {
|
||||
map.merge_to_first(phi.second.dest, src);
|
||||
}
|
||||
}
|
||||
block.phis.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void SSA::remap() {
|
||||
std::unordered_map<Register, std::set<int>, Register::hash> used_vars;
|
||||
for (auto& block : blocks) {
|
||||
assert(block.phis.empty());
|
||||
for (auto& instr : block.ins) {
|
||||
if (instr.dst.has_value()) {
|
||||
used_vars[instr.dst->reg()].insert(map.var_id(*instr.dst));
|
||||
}
|
||||
for (auto& src : instr.src) {
|
||||
used_vars[src.reg()].insert(map.var_id(src));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& reg_vars : used_vars) {
|
||||
std::unordered_map<int, int> var_remap;
|
||||
int i = 0;
|
||||
for (auto var_id : reg_vars.second) {
|
||||
var_remap[var_id] = i++;
|
||||
}
|
||||
map.remap_reg(reg_vars.first, var_remap);
|
||||
program_read_vars[reg_vars.first].resize(i);
|
||||
program_write_vars[reg_vars.first].resize(i);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void update_var_info(VariableNames::VarInfo* info,
|
||||
Register reg,
|
||||
const TypeState& ts,
|
||||
int var_id,
|
||||
const DecompilerTypeSystem& dts) {
|
||||
if (info->initialized) {
|
||||
assert(info->id == var_id);
|
||||
assert(info->reg == reg);
|
||||
bool changed;
|
||||
info->type = dts.tp_lca(info->type, ts.get(reg), &changed);
|
||||
} else {
|
||||
info->id = var_id;
|
||||
info->reg = reg;
|
||||
info->type = ts.get(reg);
|
||||
info->initialized = true;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void SSA::make_vars(const Function& function, const DecompilerTypeSystem& dts) {
|
||||
for (int block_id = 0; block_id < int(blocks.size()); block_id++) {
|
||||
const auto& block = blocks.at(block_id);
|
||||
const TypeState* init_types = &function.ir2.env.get_types_at_block_entry(block_id);
|
||||
for (auto& instr : block.ins) {
|
||||
auto op_id = instr.op_id;
|
||||
const TypeState* end_types = &function.ir2.env.get_types_after_op(op_id);
|
||||
|
||||
if (instr.dst.has_value()) {
|
||||
auto var_id = map.var_id(*instr.dst);
|
||||
auto* info = &program_write_vars[instr.dst->reg()].at(var_id);
|
||||
update_var_info(info, instr.dst->reg(), *end_types, var_id, dts);
|
||||
}
|
||||
|
||||
for (auto& src : instr.src) {
|
||||
auto var_id = map.var_id(src);
|
||||
auto* info = &program_read_vars[src.reg()].at(var_id);
|
||||
update_var_info(info, src.reg(), *init_types, var_id, dts);
|
||||
}
|
||||
|
||||
init_types = end_types;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VariableNames SSA::get_vars() {
|
||||
VariableNames result;
|
||||
result.read_vars = program_read_vars;
|
||||
result.write_vars = program_write_vars;
|
||||
|
||||
for (int block_id = 0; block_id < int(blocks.size()); block_id++) {
|
||||
const auto& block = blocks.at(block_id);
|
||||
for (auto& instr : block.ins) {
|
||||
auto op_id = instr.op_id;
|
||||
if (instr.dst.has_value()) {
|
||||
auto& ids = result.write_opid_to_varid[instr.dst->reg()];
|
||||
if (int(ids.size()) <= op_id) {
|
||||
ids.resize(op_id + 1);
|
||||
}
|
||||
ids.at(op_id) = map.var_id(*instr.dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int block_id = 0; block_id < int(blocks.size()); block_id++) {
|
||||
const auto& block = blocks.at(block_id);
|
||||
for (auto& instr : block.ins) {
|
||||
auto op_id = instr.op_id;
|
||||
for (auto& src : instr.src) {
|
||||
auto& ids = result.read_opid_to_varid[src.reg()];
|
||||
if (int(ids.size()) <= op_id) {
|
||||
ids.resize(op_id + 1);
|
||||
}
|
||||
ids.at(op_id) = map.var_id(src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::optional<VariableNames> run_variable_renaming(const Function& function,
|
||||
const RegUsageInfo& rui,
|
||||
const FunctionAtomicOps& ops,
|
||||
const DecompilerTypeSystem& dts,
|
||||
bool debug_prints) {
|
||||
if (debug_prints) {
|
||||
std::string debug_in;
|
||||
for (int block_id = 0; block_id < rui.block_count(); block_id++) {
|
||||
auto& block_info = rui.block.at(block_id);
|
||||
// const auto& block = function.basic_blocks.at(block_id);
|
||||
int start_op = ops.block_id_to_first_atomic_op.at(block_id);
|
||||
int end_op = ops.block_id_to_end_atomic_op.at(block_id);
|
||||
|
||||
debug_in += fmt::format("Block {}\n", block_id);
|
||||
debug_in += fmt::format(" use: {}\n", reg_to_string(block_info.use));
|
||||
debug_in += fmt::format(" in : {}\n", reg_to_string(block_info.input));
|
||||
debug_in += "pred: ";
|
||||
for (auto p : function.basic_blocks.at(block_id).pred) {
|
||||
debug_in += std::to_string(p);
|
||||
debug_in += ' ';
|
||||
}
|
||||
debug_in += '\n';
|
||||
|
||||
for (int op_id = start_op; op_id < end_op; op_id++) {
|
||||
debug_in +=
|
||||
fmt::format(" [{:03d}] {} : ", op_id, ops.ops.at(op_id)->to_string(function.ir2.env));
|
||||
auto& op_info = rui.op.at(op_id);
|
||||
for (auto reg : op_info.live) {
|
||||
debug_in += reg.to_charp();
|
||||
debug_in += ' ';
|
||||
}
|
||||
debug_in += '\n';
|
||||
}
|
||||
|
||||
debug_in += fmt::format(" def: {}\n", reg_to_string(block_info.defs));
|
||||
debug_in += fmt::format(" out: {}\n\n", reg_to_string(block_info.output));
|
||||
}
|
||||
|
||||
fmt::print("Debug Input\n{}\n----------------------------------\n", debug_in);
|
||||
}
|
||||
|
||||
// Create and convert to SSA
|
||||
auto ssa = make_rc_ssa(function, rui, ops);
|
||||
|
||||
if (debug_prints) {
|
||||
fmt::print("Basic SSA\n{}\n------------------------------------\n", ssa.print());
|
||||
}
|
||||
|
||||
// eliminate PHIs that are stupid.
|
||||
while (ssa.simplify()) {
|
||||
}
|
||||
if (debug_prints) {
|
||||
fmt::print("Simplified SSA\n{}-------------------------------\n", ssa.print());
|
||||
}
|
||||
|
||||
// Merge phis to return to executable code.
|
||||
ssa.merge_all_phis();
|
||||
if (debug_prints) {
|
||||
fmt::print("{}", ssa.print());
|
||||
}
|
||||
|
||||
// merge same vars (decided this made things worse)
|
||||
|
||||
// do rename
|
||||
ssa.remap();
|
||||
if (debug_prints) {
|
||||
fmt::print("{}", ssa.print());
|
||||
}
|
||||
|
||||
if (function.ir2.env.has_type_analysis()) {
|
||||
// make vars
|
||||
ssa.make_vars(function, dts);
|
||||
return ssa.get_vars();
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
} // namespace decompiler
|
149
decompiler/IR2/variable_naming.h
Normal file
149
decompiler/IR2/variable_naming.h
Normal file
@ -0,0 +1,149 @@
|
||||
/*!
|
||||
* @file variable_naming.h
|
||||
* This implements the variable renaming algorithm that splits registers into variables.
|
||||
* Note - this doesn't "merge" in cases where a variable lives in multiple registers.
|
||||
* That will be handled at expression building, as those cases are extremely specific.
|
||||
*
|
||||
* This algorithm has three phases:
|
||||
* 1). Convert to Static Single Assignment (SSA) form.
|
||||
* 2). Merge variables to eliminate phi functions
|
||||
* 3). Perform final variable naming and typing.
|
||||
*
|
||||
* In the future it may be possible to insert a step between 2 and 3 that merges incorrectly
|
||||
* separated variables based on heuristics.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <cassert>
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
#include "decompiler/util/TP_Type.h"
|
||||
#include "decompiler/IR2/Env.h"
|
||||
|
||||
namespace decompiler {
|
||||
|
||||
class Function;
|
||||
class DecompilerTypeSystem;
|
||||
struct RegUsageInfo;
|
||||
struct FunctionAtomicOps;
|
||||
|
||||
/*!
|
||||
* An SSA variable in the variable analysis pass. Can be converted into a register again.
|
||||
* These must be created from a VarMapSSA, which can then remap and merge these.
|
||||
* This remapping/merging functionality is used in the initial conversion to SSA,
|
||||
* the simplification of the SSA, and the merging of variables.
|
||||
*/
|
||||
class VarSSA {
|
||||
public:
|
||||
Register reg() const { return m_reg; }
|
||||
|
||||
private:
|
||||
friend class VarMapSSA;
|
||||
VarSSA(Register reg, int entry_id) : m_reg(reg), m_entry_id(entry_id) {}
|
||||
VarSSA() = default;
|
||||
Register m_reg;
|
||||
int m_entry_id = -1;
|
||||
};
|
||||
|
||||
/*!
|
||||
* A map of VarSSA's to ID's. The ID represents a program variable.
|
||||
* The VarSSA represents an SSA variable during the "rough" SSA phase.
|
||||
* As the algorithm runs, it reduces the number of program variables my merging.
|
||||
*
|
||||
* ID's are given out in order per register in the order of allocation.
|
||||
* All ID's for normal variables are > 0.
|
||||
* Negative/0 ID's correspond to block ending variables (set with remap_to_final_for_block).
|
||||
* The ID is -block_id. It is printed as B{ID}.
|
||||
* Use merge(var, var) to make two variables have the same ID. A wins, unless B is zero, in which
|
||||
* case B wins. This approach is chosen because it
|
||||
* - making A win makes the names match the block for intermediate results
|
||||
* - makes the B0 version of the variable represent the initial value of the variable on function
|
||||
* entry
|
||||
*/
|
||||
class VarMapSSA {
|
||||
public:
|
||||
VarSSA allocate(Register reg);
|
||||
VarSSA allocate_init_phi(Register reg, int block_id);
|
||||
void merge(const VarSSA& var_a, const VarSSA& var_b);
|
||||
void merge_to_first(const VarSSA& var_a, const VarSSA& var_b);
|
||||
std::string to_string(const VarSSA& var) const;
|
||||
bool same(const VarSSA& var_a, const VarSSA& var_b) const;
|
||||
int var_id(const VarSSA& var);
|
||||
void remap_reg(Register reg, const std::unordered_map<int, int>& remap);
|
||||
|
||||
private:
|
||||
int get_next_var_id(Register reg);
|
||||
|
||||
struct Entry {
|
||||
int var_id = -1;
|
||||
int entry_id = -1;
|
||||
Register reg;
|
||||
};
|
||||
|
||||
std::vector<Entry> m_entries;
|
||||
std::unordered_map<Register, int, Register::hash> m_reg_next_id;
|
||||
};
|
||||
|
||||
/*!
|
||||
* Representation of a program used in the variable renaming algorithm.
|
||||
*/
|
||||
struct SSA {
|
||||
struct Phi {
|
||||
// represents a phi node placed at the top of a block.
|
||||
VarSSA dest;
|
||||
std::vector<VarSSA> sources;
|
||||
|
||||
explicit Phi(const VarSSA& d) : dest(d) {}
|
||||
std::string print(const VarMapSSA& var_map) const;
|
||||
};
|
||||
|
||||
struct Ins {
|
||||
explicit Ins(int id) : op_id(id) {}
|
||||
// represents an instruction.
|
||||
std::optional<VarSSA> dst;
|
||||
std::vector<VarSSA> src;
|
||||
int op_id = -1;
|
||||
|
||||
std::string print(const VarMapSSA& var_map) const;
|
||||
};
|
||||
|
||||
struct Block {
|
||||
std::unordered_map<Register, Phi, Register::hash> phis; // stored per dest reg.
|
||||
std::vector<Ins> ins;
|
||||
|
||||
std::string print(const VarMapSSA& var_map) const;
|
||||
};
|
||||
|
||||
explicit SSA(int n_blocks) { blocks.resize(n_blocks); }
|
||||
VarMapSSA map;
|
||||
std::vector<Block> blocks;
|
||||
|
||||
// in terms of reg, var_id
|
||||
std::unordered_map<Register, std::vector<VariableNames::VarInfo>, Register::hash>
|
||||
program_read_vars;
|
||||
std::unordered_map<Register, std::vector<VariableNames::VarInfo>, Register::hash>
|
||||
program_write_vars;
|
||||
|
||||
Phi& get_phi(int block, Register dest_reg);
|
||||
VarSSA get_phi_dest(int block, Register dest_reg);
|
||||
void add_source_to_phi(int block, Register dest_reg, const VarSSA& src_var);
|
||||
|
||||
bool simplify();
|
||||
void merge_all_phis();
|
||||
void remap();
|
||||
void make_vars(const Function& function, const DecompilerTypeSystem& dts);
|
||||
VariableNames get_vars();
|
||||
std::string print() const;
|
||||
};
|
||||
|
||||
std::optional<VariableNames> run_variable_renaming(const Function& function,
|
||||
const RegUsageInfo& rui,
|
||||
const FunctionAtomicOps& ops,
|
||||
const DecompilerTypeSystem& dts,
|
||||
bool debug_prints = false);
|
||||
|
||||
} // namespace decompiler
|
@ -72,6 +72,7 @@ class ObjectFileDB {
|
||||
void ir2_atomic_op_pass();
|
||||
void ir2_type_analysis_pass();
|
||||
void ir2_register_usage_pass();
|
||||
void ir2_variable_pass();
|
||||
void ir2_write_results(const std::string& output_dir);
|
||||
std::string ir2_to_file(ObjectFileData& data);
|
||||
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "common/util/FileUtil.h"
|
||||
#include "decompiler/Function/TypeInspector.h"
|
||||
#include "decompiler/IR2/reg_usage.h"
|
||||
#include "decompiler/IR2/variable_naming.h"
|
||||
|
||||
namespace decompiler {
|
||||
|
||||
@ -29,6 +30,8 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) {
|
||||
ir2_type_analysis_pass();
|
||||
lg::info("Register usage analysis...");
|
||||
ir2_register_usage_pass();
|
||||
lg::info("Variable analysis...");
|
||||
ir2_variable_pass();
|
||||
lg::info("Writing results...");
|
||||
ir2_write_results(output_dir);
|
||||
}
|
||||
@ -274,7 +277,7 @@ void ObjectFileDB::ir2_type_analysis_pass() {
|
||||
}
|
||||
});
|
||||
|
||||
lg::info("{}/{}/{}/{} (success/attempted/non-asm/total) in {:.2f} ms", successful_functions,
|
||||
lg::info("{}/{}/{}/{} (success/attempted/non-asm/total) in {:.2f} ms\n", successful_functions,
|
||||
attempted_functions, non_asm_functions, total_functions, timer.getMs());
|
||||
}
|
||||
|
||||
@ -293,8 +296,32 @@ void ObjectFileDB::ir2_register_usage_pass() {
|
||||
}
|
||||
});
|
||||
|
||||
lg::info("{}/{} functions had register usage analyzed in {:.2f} ms", analyzed_funcs, total_funcs,
|
||||
timer.getMs());
|
||||
lg::info("{}/{} functions had register usage analyzed in {:.2f} ms\n", analyzed_funcs,
|
||||
total_funcs, timer.getMs());
|
||||
}
|
||||
|
||||
void ObjectFileDB::ir2_variable_pass() {
|
||||
Timer timer;
|
||||
int attempted = 0;
|
||||
int successful = 0;
|
||||
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
|
||||
(void)segment_id;
|
||||
(void)data;
|
||||
if (!func.suspected_asm && func.ir2.atomic_ops_succeeded) {
|
||||
try {
|
||||
attempted++;
|
||||
auto result = run_variable_renaming(func, func.ir2.reg_use, *func.ir2.atomic_ops, dts);
|
||||
if (result.has_value()) {
|
||||
successful++;
|
||||
func.ir2.env.set_local_vars(*result);
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
lg::warn("variable pass failed on {}: {}", func.guessed_name.to_string(), e.what());
|
||||
}
|
||||
}
|
||||
});
|
||||
lg::info("{}/{} functions out of attempted passed variable pass in {:.2f} ms\n", successful,
|
||||
attempted, timer.getMs());
|
||||
}
|
||||
|
||||
void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
|
||||
@ -403,6 +430,10 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function&
|
||||
result += ";;Warnings:\n" + func.warnings + "\n";
|
||||
}
|
||||
|
||||
if (func.ir2.env.has_local_vars()) {
|
||||
result += func.ir2.env.print_local_var_types();
|
||||
}
|
||||
|
||||
bool print_atomics = func.ir2.atomic_ops_succeeded;
|
||||
// print each instruction in the function.
|
||||
bool in_delay_slot = false;
|
||||
|
@ -162,7 +162,9 @@ void DecompilerTypeSystem::add_symbol(const std::string& name, const TypeSpec& t
|
||||
/*!
|
||||
* Compute the least common ancestor of two TP Types.
|
||||
*/
|
||||
TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) {
|
||||
TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing,
|
||||
const TP_Type& add,
|
||||
bool* changed) const {
|
||||
// starting from most vague to most specific
|
||||
|
||||
// simplist case, no difference.
|
||||
@ -302,7 +304,7 @@ bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) {
|
||||
return result;
|
||||
}
|
||||
|
||||
int DecompilerTypeSystem::get_format_arg_count(const std::string& str) {
|
||||
int DecompilerTypeSystem::get_format_arg_count(const std::string& str) const {
|
||||
int arg_count = 0;
|
||||
for (size_t i = 0; i < str.length(); i++) {
|
||||
if (str.at(i) == '~') {
|
||||
@ -317,7 +319,7 @@ int DecompilerTypeSystem::get_format_arg_count(const std::string& str) {
|
||||
return arg_count;
|
||||
}
|
||||
|
||||
int DecompilerTypeSystem::get_format_arg_count(const TP_Type& type) {
|
||||
int DecompilerTypeSystem::get_format_arg_count(const TP_Type& type) const {
|
||||
if (type.is_constant_string()) {
|
||||
return get_format_arg_count(type.get_string());
|
||||
} else {
|
||||
|
@ -38,11 +38,10 @@ class DecompilerTypeSystem {
|
||||
std::string dump_symbol_types();
|
||||
std::string lookup_parent_from_inspects(const std::string& child) const;
|
||||
bool lookup_flags(const std::string& type, u64* dest) const;
|
||||
TP_Type tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed);
|
||||
TP_Type tp_lca_no_simplify(const TP_Type& existing, const TP_Type& add, bool* changed);
|
||||
TP_Type tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) const;
|
||||
bool tp_lca(TypeState* combined, const TypeState& add);
|
||||
int get_format_arg_count(const std::string& str);
|
||||
int get_format_arg_count(const TP_Type& type);
|
||||
int get_format_arg_count(const std::string& str) const;
|
||||
int get_format_arg_count(const TP_Type& type) const;
|
||||
struct {
|
||||
bool allow_pair;
|
||||
std::string current_method_type;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <cassert>
|
||||
#include "common/log/log.h"
|
||||
#include "common/type_system/TypeSpec.h"
|
||||
#include "common/common_types.h"
|
||||
#include "decompiler/Disasm/Register.h"
|
||||
@ -190,6 +191,7 @@ struct TypeState {
|
||||
case Reg::FPR:
|
||||
return fpr_types[r.get_fpr()];
|
||||
default:
|
||||
lg::die("Cannot use register {} with TypeState.", r.to_charp());
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user