mirror of
https://github.com/libretro/beetle-psx-libretro.git
synced 2024-11-23 08:49:47 +00:00
781094c597
subrepo: subdir: "deps/lightrec" merged: "0df4ec86" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "0df4ec86" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo" commit: "2f68596"
1412 lines
31 KiB
C
1412 lines
31 KiB
C
// SPDX-License-Identifier: LGPL-2.1-or-later
|
|
/*
|
|
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "disassembler.h"
|
|
#include "lightrec.h"
|
|
#include "memmanager.h"
|
|
#include "optimizer.h"
|
|
#include "regcache.h"
|
|
|
|
#include <errno.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
|
|
|
|
struct optimizer_list {
|
|
void (**optimizers)(struct opcode *);
|
|
unsigned int nb_optimizers;
|
|
};
|
|
|
|
bool is_unconditional_jump(union code c)
|
|
{
|
|
switch (c.i.op) {
|
|
case OP_SPECIAL:
|
|
return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
|
|
case OP_J:
|
|
case OP_JAL:
|
|
return true;
|
|
case OP_BEQ:
|
|
case OP_BLEZ:
|
|
return c.i.rs == c.i.rt;
|
|
case OP_REGIMM:
|
|
return (c.r.rt == OP_REGIMM_BGEZ ||
|
|
c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool is_syscall(union code c)
|
|
{
|
|
return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
|
|
(c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
|
|
c.r.rs == OP_CP0_CTC0) &&
|
|
(c.r.rd == 12 || c.r.rd == 13));
|
|
}
|
|
|
|
static u64 opcode_read_mask(union code op)
|
|
{
|
|
switch (op.i.op) {
|
|
case OP_SPECIAL:
|
|
switch (op.r.op) {
|
|
case OP_SPECIAL_SYSCALL:
|
|
case OP_SPECIAL_BREAK:
|
|
return 0;
|
|
case OP_SPECIAL_JR:
|
|
case OP_SPECIAL_JALR:
|
|
case OP_SPECIAL_MTHI:
|
|
case OP_SPECIAL_MTLO:
|
|
return BIT(op.r.rs);
|
|
case OP_SPECIAL_MFHI:
|
|
return BIT(REG_HI);
|
|
case OP_SPECIAL_MFLO:
|
|
return BIT(REG_LO);
|
|
case OP_SPECIAL_SLL:
|
|
case OP_SPECIAL_SRL:
|
|
case OP_SPECIAL_SRA:
|
|
return BIT(op.r.rt);
|
|
default:
|
|
return BIT(op.r.rs) | BIT(op.r.rt);
|
|
}
|
|
case OP_CP0:
|
|
switch (op.r.rs) {
|
|
case OP_CP0_MTC0:
|
|
case OP_CP0_CTC0:
|
|
return BIT(op.r.rt);
|
|
default:
|
|
return 0;
|
|
}
|
|
case OP_CP2:
|
|
if (op.r.op == OP_CP2_BASIC) {
|
|
switch (op.r.rs) {
|
|
case OP_CP2_BASIC_MTC2:
|
|
case OP_CP2_BASIC_CTC2:
|
|
return BIT(op.r.rt);
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return 0;
|
|
case OP_J:
|
|
case OP_JAL:
|
|
case OP_LUI:
|
|
return 0;
|
|
case OP_BEQ:
|
|
case OP_BNE:
|
|
case OP_LWL:
|
|
case OP_LWR:
|
|
case OP_SB:
|
|
case OP_SH:
|
|
case OP_SWL:
|
|
case OP_SW:
|
|
case OP_SWR:
|
|
return BIT(op.i.rs) | BIT(op.i.rt);
|
|
default:
|
|
return BIT(op.i.rs);
|
|
}
|
|
}
|
|
|
|
static u64 opcode_write_mask(union code op)
|
|
{
|
|
u64 flags;
|
|
|
|
switch (op.i.op) {
|
|
case OP_SPECIAL:
|
|
switch (op.r.op) {
|
|
case OP_SPECIAL_JR:
|
|
case OP_SPECIAL_SYSCALL:
|
|
case OP_SPECIAL_BREAK:
|
|
return 0;
|
|
case OP_SPECIAL_MULT:
|
|
case OP_SPECIAL_MULTU:
|
|
case OP_SPECIAL_DIV:
|
|
case OP_SPECIAL_DIVU:
|
|
if (!OPT_FLAG_MULT_DIV)
|
|
return BIT(REG_LO) | BIT(REG_HI);
|
|
|
|
if (op.r.rd)
|
|
flags = BIT(op.r.rd);
|
|
else
|
|
flags = BIT(REG_LO);
|
|
if (op.r.imm)
|
|
flags |= BIT(op.r.imm);
|
|
else
|
|
flags |= BIT(REG_HI);
|
|
return flags;
|
|
case OP_SPECIAL_MTHI:
|
|
return BIT(REG_HI);
|
|
case OP_SPECIAL_MTLO:
|
|
return BIT(REG_LO);
|
|
default:
|
|
return BIT(op.r.rd);
|
|
}
|
|
case OP_ADDI:
|
|
case OP_ADDIU:
|
|
case OP_SLTI:
|
|
case OP_SLTIU:
|
|
case OP_ANDI:
|
|
case OP_ORI:
|
|
case OP_XORI:
|
|
case OP_LUI:
|
|
case OP_LB:
|
|
case OP_LH:
|
|
case OP_LWL:
|
|
case OP_LW:
|
|
case OP_LBU:
|
|
case OP_LHU:
|
|
case OP_LWR:
|
|
return BIT(op.i.rt);
|
|
case OP_JAL:
|
|
return BIT(31);
|
|
case OP_CP0:
|
|
switch (op.r.rs) {
|
|
case OP_CP0_MFC0:
|
|
case OP_CP0_CFC0:
|
|
return BIT(op.i.rt);
|
|
default:
|
|
return 0;
|
|
}
|
|
case OP_CP2:
|
|
if (op.r.op == OP_CP2_BASIC) {
|
|
switch (op.r.rs) {
|
|
case OP_CP2_BASIC_MFC2:
|
|
case OP_CP2_BASIC_CFC2:
|
|
return BIT(op.i.rt);
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return 0;
|
|
case OP_REGIMM:
|
|
switch (op.r.rt) {
|
|
case OP_REGIMM_BLTZAL:
|
|
case OP_REGIMM_BGEZAL:
|
|
return BIT(31);
|
|
default:
|
|
return 0;
|
|
}
|
|
case OP_META_MOV:
|
|
return BIT(op.r.rd);
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
bool opcode_reads_register(union code op, u8 reg)
|
|
{
|
|
return opcode_read_mask(op) & BIT(reg);
|
|
}
|
|
|
|
bool opcode_writes_register(union code op, u8 reg)
|
|
{
|
|
return opcode_write_mask(op) & BIT(reg);
|
|
}
|
|
|
|
static bool opcode_is_load(union code op)
|
|
{
|
|
switch (op.i.op) {
|
|
case OP_LB:
|
|
case OP_LH:
|
|
case OP_LWL:
|
|
case OP_LW:
|
|
case OP_LBU:
|
|
case OP_LHU:
|
|
case OP_LWR:
|
|
case OP_LWC2:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool opcode_is_store(union code op)
|
|
{
|
|
switch (op.i.op) {
|
|
case OP_SB:
|
|
case OP_SH:
|
|
case OP_SW:
|
|
case OP_SWL:
|
|
case OP_SWR:
|
|
case OP_SWC2:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool opcode_is_io(union code op)
|
|
{
|
|
return opcode_is_load(op) || opcode_is_store(op);
|
|
}
|
|
|
|
/* TODO: Complete */
|
|
static bool is_nop(union code op)
|
|
{
|
|
if (opcode_writes_register(op, 0)) {
|
|
switch (op.i.op) {
|
|
case OP_CP0:
|
|
return op.r.rs != OP_CP0_MFC0;
|
|
case OP_LB:
|
|
case OP_LH:
|
|
case OP_LWL:
|
|
case OP_LW:
|
|
case OP_LBU:
|
|
case OP_LHU:
|
|
case OP_LWR:
|
|
return false;
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
switch (op.i.op) {
|
|
case OP_SPECIAL:
|
|
switch (op.r.op) {
|
|
case OP_SPECIAL_AND:
|
|
return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
|
|
case OP_SPECIAL_ADD:
|
|
case OP_SPECIAL_ADDU:
|
|
return (op.r.rd == op.r.rt && op.r.rs == 0) ||
|
|
(op.r.rd == op.r.rs && op.r.rt == 0);
|
|
case OP_SPECIAL_SUB:
|
|
case OP_SPECIAL_SUBU:
|
|
return op.r.rd == op.r.rs && op.r.rt == 0;
|
|
case OP_SPECIAL_OR:
|
|
if (op.r.rd == op.r.rt)
|
|
return op.r.rd == op.r.rs || op.r.rs == 0;
|
|
else
|
|
return (op.r.rd == op.r.rs) && op.r.rt == 0;
|
|
case OP_SPECIAL_SLL:
|
|
case OP_SPECIAL_SRA:
|
|
case OP_SPECIAL_SRL:
|
|
return op.r.rd == op.r.rt && op.r.imm == 0;
|
|
case OP_SPECIAL_MFHI:
|
|
case OP_SPECIAL_MFLO:
|
|
return op.r.rd == 0;
|
|
default:
|
|
return false;
|
|
}
|
|
case OP_ORI:
|
|
case OP_ADDI:
|
|
case OP_ADDIU:
|
|
return op.i.rt == op.i.rs && op.i.imm == 0;
|
|
case OP_BGTZ:
|
|
return (op.i.rs == 0 || op.i.imm == 1);
|
|
case OP_REGIMM:
|
|
return (op.i.op == OP_REGIMM_BLTZ ||
|
|
op.i.op == OP_REGIMM_BLTZAL) &&
|
|
(op.i.rs == 0 || op.i.imm == 1);
|
|
case OP_BNE:
|
|
return (op.i.rs == op.i.rt || op.i.imm == 1);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool load_in_delay_slot(union code op)
|
|
{
|
|
switch (op.i.op) {
|
|
case OP_CP0:
|
|
switch (op.r.rs) {
|
|
case OP_CP0_MFC0:
|
|
case OP_CP0_CFC0:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
break;
|
|
case OP_CP2:
|
|
if (op.r.op == OP_CP2_BASIC) {
|
|
switch (op.r.rs) {
|
|
case OP_CP2_BASIC_MFC2:
|
|
case OP_CP2_BASIC_CFC2:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
break;
|
|
case OP_LB:
|
|
case OP_LH:
|
|
case OP_LW:
|
|
case OP_LWL:
|
|
case OP_LWR:
|
|
case OP_LBU:
|
|
case OP_LHU:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
|
|
{
|
|
switch (c.i.op) {
|
|
case OP_SPECIAL:
|
|
switch (c.r.op) {
|
|
case OP_SPECIAL_SLL:
|
|
if (known & BIT(c.r.rt)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] << c.r.imm;
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SRL:
|
|
if (known & BIT(c.r.rt)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] >> c.r.imm;
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SRA:
|
|
if (known & BIT(c.r.rt)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SLLV:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SRLV:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SRAV:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = (s32)v[c.r.rt]
|
|
>> (v[c.r.rs] & 0x1f);
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_ADD:
|
|
case OP_SPECIAL_ADDU:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SUB:
|
|
case OP_SPECIAL_SUBU:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_AND:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_OR:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_XOR:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_NOR:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SLT:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
case OP_SPECIAL_SLTU:
|
|
if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case OP_REGIMM:
|
|
break;
|
|
case OP_ADDI:
|
|
case OP_ADDIU:
|
|
if (known & BIT(c.i.rs)) {
|
|
known |= BIT(c.i.rt);
|
|
v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
|
|
} else {
|
|
known &= ~BIT(c.i.rt);
|
|
}
|
|
break;
|
|
case OP_SLTI:
|
|
if (known & BIT(c.i.rs)) {
|
|
known |= BIT(c.i.rt);
|
|
v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
|
|
} else {
|
|
known &= ~BIT(c.i.rt);
|
|
}
|
|
break;
|
|
case OP_SLTIU:
|
|
if (known & BIT(c.i.rs)) {
|
|
known |= BIT(c.i.rt);
|
|
v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
|
|
} else {
|
|
known &= ~BIT(c.i.rt);
|
|
}
|
|
break;
|
|
case OP_ANDI:
|
|
if (known & BIT(c.i.rs)) {
|
|
known |= BIT(c.i.rt);
|
|
v[c.i.rt] = v[c.i.rs] & c.i.imm;
|
|
} else {
|
|
known &= ~BIT(c.i.rt);
|
|
}
|
|
break;
|
|
case OP_ORI:
|
|
if (known & BIT(c.i.rs)) {
|
|
known |= BIT(c.i.rt);
|
|
v[c.i.rt] = v[c.i.rs] | c.i.imm;
|
|
} else {
|
|
known &= ~BIT(c.i.rt);
|
|
}
|
|
break;
|
|
case OP_XORI:
|
|
if (known & BIT(c.i.rs)) {
|
|
known |= BIT(c.i.rt);
|
|
v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
|
|
} else {
|
|
known &= ~BIT(c.i.rt);
|
|
}
|
|
break;
|
|
case OP_LUI:
|
|
known |= BIT(c.i.rt);
|
|
v[c.i.rt] = c.i.imm << 16;
|
|
break;
|
|
case OP_CP0:
|
|
switch (c.r.rs) {
|
|
case OP_CP0_MFC0:
|
|
case OP_CP0_CFC0:
|
|
known &= ~BIT(c.r.rt);
|
|
break;
|
|
}
|
|
break;
|
|
case OP_CP2:
|
|
if (c.r.op == OP_CP2_BASIC) {
|
|
switch (c.r.rs) {
|
|
case OP_CP2_BASIC_MFC2:
|
|
case OP_CP2_BASIC_CFC2:
|
|
known &= ~BIT(c.r.rt);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case OP_LB:
|
|
case OP_LH:
|
|
case OP_LWL:
|
|
case OP_LW:
|
|
case OP_LBU:
|
|
case OP_LHU:
|
|
case OP_LWR:
|
|
case OP_LWC2:
|
|
known &= ~BIT(c.i.rt);
|
|
break;
|
|
case OP_META_MOV:
|
|
if (known & BIT(c.r.rs)) {
|
|
known |= BIT(c.r.rd);
|
|
v[c.r.rd] = v[c.r.rs];
|
|
} else {
|
|
known &= ~BIT(c.r.rd);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return known;
|
|
}
|
|
|
|
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
|
|
{
|
|
struct opcode *list;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < block->nb_ops; i++) {
|
|
list = &block->opcode_list[i];
|
|
|
|
/* Transform all opcodes detected as useless to real NOPs
|
|
* (0x0: SLL r0, r0, #0) */
|
|
if (list->opcode != 0 && is_nop(list->c)) {
|
|
pr_debug("Converting useless opcode 0x%08x to NOP\n",
|
|
list->opcode);
|
|
list->opcode = 0x0;
|
|
}
|
|
|
|
if (!list->opcode)
|
|
continue;
|
|
|
|
switch (list->i.op) {
|
|
/* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the
|
|
* two registers is zero. */
|
|
case OP_BEQ:
|
|
if ((list->i.rs == 0) ^ (list->i.rt == 0)) {
|
|
list->i.op = OP_META_BEQZ;
|
|
if (list->i.rs == 0) {
|
|
list->i.rs = list->i.rt;
|
|
list->i.rt = 0;
|
|
}
|
|
} else if (list->i.rs == list->i.rt) {
|
|
list->i.rs = 0;
|
|
list->i.rt = 0;
|
|
}
|
|
break;
|
|
case OP_BNE:
|
|
if (list->i.rs == 0) {
|
|
list->i.op = OP_META_BNEZ;
|
|
list->i.rs = list->i.rt;
|
|
list->i.rt = 0;
|
|
} else if (list->i.rt == 0) {
|
|
list->i.op = OP_META_BNEZ;
|
|
}
|
|
break;
|
|
|
|
/* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
|
|
* with register $zero to the MOV meta-opcode */
|
|
case OP_ORI:
|
|
case OP_ADDI:
|
|
case OP_ADDIU:
|
|
if (list->i.imm == 0) {
|
|
pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
|
|
list->i.op = OP_META_MOV;
|
|
list->r.rd = list->i.rt;
|
|
}
|
|
break;
|
|
case OP_SPECIAL:
|
|
switch (list->r.op) {
|
|
case OP_SPECIAL_SLL:
|
|
case OP_SPECIAL_SRA:
|
|
case OP_SPECIAL_SRL:
|
|
if (list->r.imm == 0) {
|
|
pr_debug("Convert SLL/SRL/SRA #0 to MOV\n");
|
|
list->i.op = OP_META_MOV;
|
|
list->r.rs = list->r.rt;
|
|
}
|
|
break;
|
|
case OP_SPECIAL_OR:
|
|
case OP_SPECIAL_ADD:
|
|
case OP_SPECIAL_ADDU:
|
|
if (list->r.rs == 0) {
|
|
pr_debug("Convert OR/ADD $zero to MOV\n");
|
|
list->i.op = OP_META_MOV;
|
|
list->r.rs = list->r.rt;
|
|
}
|
|
case OP_SPECIAL_SUB: /* fall-through */
|
|
case OP_SPECIAL_SUBU:
|
|
if (list->r.rt == 0) {
|
|
pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
|
|
list->i.op = OP_META_MOV;
|
|
}
|
|
default: /* fall-through */
|
|
break;
|
|
}
|
|
default: /* fall-through */
|
|
break;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
|
|
{
|
|
struct opcode *list, *next = &block->opcode_list[0];
|
|
unsigned int i;
|
|
union code op, next_op;
|
|
u8 flags;
|
|
|
|
for (i = 0; i < block->nb_ops - 1; i++) {
|
|
list = next;
|
|
next = &block->opcode_list[i + 1];
|
|
next_op = next->c;
|
|
op = list->c;
|
|
|
|
if (!has_delay_slot(op) ||
|
|
list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
|
|
op.opcode == 0 || next_op.opcode == 0)
|
|
continue;
|
|
|
|
if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
|
|
!(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS))
|
|
continue;
|
|
|
|
if ((list->flags & LIGHTREC_SYNC) ||
|
|
(next->flags & LIGHTREC_SYNC))
|
|
continue;
|
|
|
|
switch (list->i.op) {
|
|
case OP_SPECIAL:
|
|
switch (op.r.op) {
|
|
case OP_SPECIAL_JALR:
|
|
if (opcode_reads_register(next_op, op.r.rd) ||
|
|
opcode_writes_register(next_op, op.r.rd))
|
|
continue;
|
|
case OP_SPECIAL_JR: /* fall-through */
|
|
if (opcode_writes_register(next_op, op.r.rs))
|
|
continue;
|
|
default: /* fall-through */
|
|
break;
|
|
}
|
|
case OP_J: /* fall-through */
|
|
break;
|
|
case OP_JAL:
|
|
if (opcode_reads_register(next_op, 31) ||
|
|
opcode_writes_register(next_op, 31))
|
|
continue;
|
|
else
|
|
break;
|
|
case OP_BEQ:
|
|
case OP_BNE:
|
|
if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
|
|
continue;
|
|
case OP_BLEZ: /* fall-through */
|
|
case OP_BGTZ:
|
|
case OP_META_BEQZ:
|
|
case OP_META_BNEZ:
|
|
if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
|
|
continue;
|
|
break;
|
|
case OP_REGIMM:
|
|
switch (op.r.rt) {
|
|
case OP_REGIMM_BLTZAL:
|
|
case OP_REGIMM_BGEZAL:
|
|
if (opcode_reads_register(next_op, 31) ||
|
|
opcode_writes_register(next_op, 31))
|
|
continue;
|
|
case OP_REGIMM_BLTZ: /* fall-through */
|
|
case OP_REGIMM_BGEZ:
|
|
if (op.i.rs &&
|
|
opcode_writes_register(next_op, op.i.rs))
|
|
continue;
|
|
break;
|
|
}
|
|
default: /* fall-through */
|
|
break;
|
|
}
|
|
|
|
pr_debug("Swap branch and delay slot opcodes "
|
|
"at offsets 0x%x / 0x%x\n",
|
|
i << 2, (i + 1) << 2);
|
|
|
|
flags = next->flags;
|
|
list->c = next_op;
|
|
next->c = op;
|
|
next->flags = list->flags | LIGHTREC_NO_DS;
|
|
list->flags = flags | LIGHTREC_NO_DS;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
|
|
{
|
|
struct opcode *list;
|
|
|
|
if (new_size >= block->nb_ops) {
|
|
pr_err("Invalid shrink size (%u vs %u)\n",
|
|
new_size, block->nb_ops);
|
|
return -EINVAL;
|
|
}
|
|
|
|
|
|
list = lightrec_malloc(state, MEM_FOR_IR,
|
|
sizeof(*list) * new_size);
|
|
if (!list) {
|
|
pr_err("Unable to allocate memory\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
memcpy(list, block->opcode_list, sizeof(*list) * new_size);
|
|
|
|
lightrec_free_opcode_list(state, block);
|
|
block->opcode_list = list;
|
|
block->nb_ops = new_size;
|
|
|
|
pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
|
|
block->pc, new_size);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int lightrec_detect_impossible_branches(struct lightrec_state *state,
|
|
struct block *block)
|
|
{
|
|
struct opcode *op, *next = &block->opcode_list[0];
|
|
unsigned int i;
|
|
int ret = 0;
|
|
|
|
for (i = 0; i < block->nb_ops - 1; i++) {
|
|
op = next;
|
|
next = &block->opcode_list[i + 1];
|
|
|
|
if (!has_delay_slot(op->c) ||
|
|
(!load_in_delay_slot(next->c) &&
|
|
!has_delay_slot(next->c) &&
|
|
!(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
|
|
continue;
|
|
|
|
if (op->c.opcode == next->c.opcode) {
|
|
/* The delay slot is the exact same opcode as the branch
|
|
* opcode: this is effectively a NOP */
|
|
next->c.opcode = 0;
|
|
continue;
|
|
}
|
|
|
|
op->flags |= LIGHTREC_EMULATE_BRANCH;
|
|
|
|
if (op == block->opcode_list) {
|
|
pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
|
|
block->pc);
|
|
|
|
/* If the first opcode is an 'impossible' branch, we
|
|
* only keep the first two opcodes of the block (the
|
|
* branch itself + its delay slot) */
|
|
if (block->nb_ops > 2)
|
|
ret = shrink_opcode_list(state, block, 2);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
|
|
{
|
|
struct opcode *list;
|
|
unsigned int i;
|
|
s32 offset;
|
|
|
|
for (i = 0; i < block->nb_ops; i++) {
|
|
list = &block->opcode_list[i];
|
|
|
|
if (should_emulate(list))
|
|
continue;
|
|
|
|
switch (list->i.op) {
|
|
case OP_BEQ:
|
|
case OP_BNE:
|
|
case OP_BLEZ:
|
|
case OP_BGTZ:
|
|
case OP_REGIMM:
|
|
case OP_META_BEQZ:
|
|
case OP_META_BNEZ:
|
|
offset = i + 1 + (s16)list->i.imm;
|
|
if (offset >= 0 && offset < block->nb_ops)
|
|
break;
|
|
default: /* fall-through */
|
|
continue;
|
|
}
|
|
|
|
pr_debug("Found local branch to offset 0x%x\n", offset << 2);
|
|
|
|
if (should_emulate(&block->opcode_list[offset])) {
|
|
pr_debug("Branch target must be emulated - skip\n");
|
|
continue;
|
|
}
|
|
|
|
if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
|
|
pr_debug("Branch target is a delay slot - skip\n");
|
|
continue;
|
|
}
|
|
|
|
pr_debug("Adding sync at offset 0x%x\n", offset << 2);
|
|
|
|
block->opcode_list[offset].flags |= LIGHTREC_SYNC;
|
|
list->flags |= LIGHTREC_LOCAL_BRANCH;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool has_delay_slot(union code op)
|
|
{
|
|
switch (op.i.op) {
|
|
case OP_SPECIAL:
|
|
switch (op.r.op) {
|
|
case OP_SPECIAL_JR:
|
|
case OP_SPECIAL_JALR:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
case OP_J:
|
|
case OP_JAL:
|
|
case OP_BEQ:
|
|
case OP_BNE:
|
|
case OP_BLEZ:
|
|
case OP_BGTZ:
|
|
case OP_REGIMM:
|
|
case OP_META_BEQZ:
|
|
case OP_META_BNEZ:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool should_emulate(const struct opcode *list)
|
|
{
|
|
return has_delay_slot(list->c) &&
|
|
(list->flags & LIGHTREC_EMULATE_BRANCH);
|
|
}
|
|
|
|
static void lightrec_add_unload(struct opcode *op, u8 reg)
|
|
{
|
|
if (op->i.op == OP_SPECIAL && reg == op->r.rd)
|
|
op->flags |= LIGHTREC_UNLOAD_RD;
|
|
|
|
if (op->i.rs == reg)
|
|
op->flags |= LIGHTREC_UNLOAD_RS;
|
|
if (op->i.rt == reg)
|
|
op->flags |= LIGHTREC_UNLOAD_RT;
|
|
}
|
|
|
|
static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
|
|
{
|
|
unsigned int i, offset;
|
|
struct opcode *op;
|
|
u8 reg;
|
|
|
|
for (reg = 1; reg < 34; reg++) {
|
|
int last_r_id = -1, last_w_id = -1;
|
|
|
|
for (i = 0; i < block->nb_ops; i++) {
|
|
union code c = block->opcode_list[i].c;
|
|
|
|
if (opcode_reads_register(c, reg))
|
|
last_r_id = i;
|
|
if (opcode_writes_register(c, reg))
|
|
last_w_id = i;
|
|
}
|
|
|
|
if (last_w_id > last_r_id)
|
|
offset = (unsigned int)last_w_id;
|
|
else if (last_r_id >= 0)
|
|
offset = (unsigned int)last_r_id;
|
|
else
|
|
continue;
|
|
|
|
op = &block->opcode_list[offset];
|
|
|
|
if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS))
|
|
offset++;
|
|
|
|
if (offset == block->nb_ops)
|
|
continue;
|
|
|
|
lightrec_add_unload(&block->opcode_list[offset], reg);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int lightrec_flag_stores(struct lightrec_state *state, struct block *block)
|
|
{
|
|
struct opcode *list;
|
|
u32 known = BIT(0);
|
|
u32 values[32] = { 0 };
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < block->nb_ops; i++) {
|
|
list = &block->opcode_list[i];
|
|
|
|
/* Register $zero is always, well, zero */
|
|
known |= BIT(0);
|
|
values[0] = 0;
|
|
|
|
switch (list->i.op) {
|
|
case OP_SB:
|
|
case OP_SH:
|
|
case OP_SW:
|
|
/* Mark all store operations that target $sp or $gp
|
|
* as not requiring code invalidation. This is based
|
|
* on the heuristic that stores using one of these
|
|
* registers as address will never hit a code page. */
|
|
if (list->i.rs >= 28 && list->i.rs <= 29 &&
|
|
!state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
|
|
pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
|
|
list->opcode);
|
|
list->flags |= LIGHTREC_NO_INVALIDATE;
|
|
}
|
|
|
|
/* Detect writes whose destination address is inside the
|
|
* current block, using constant propagation. When these
|
|
* occur, we mark the blocks as not compilable. */
|
|
if ((known & BIT(list->i.rs)) &&
|
|
kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
|
|
kunseg(values[list->i.rs]) < (kunseg(block->pc) +
|
|
block->nb_ops * 4)) {
|
|
pr_debug("Self-modifying block detected\n");
|
|
block->flags |= BLOCK_NEVER_COMPILE;
|
|
list->flags |= LIGHTREC_SMC;
|
|
}
|
|
default: /* fall-through */
|
|
break;
|
|
}
|
|
|
|
known = lightrec_propagate_consts(list->c, known, values);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
|
|
const struct opcode *last,
|
|
u32 mask, bool sync, bool mflo, bool another)
|
|
{
|
|
const struct opcode *op, *next = &block->opcode_list[offset];
|
|
u32 old_mask;
|
|
u8 reg2, reg = mflo ? REG_LO : REG_HI;
|
|
u16 branch_offset;
|
|
unsigned int i;
|
|
|
|
for (i = offset; i < block->nb_ops; i++) {
|
|
op = next;
|
|
next = &block->opcode_list[i + 1];
|
|
old_mask = mask;
|
|
|
|
/* If any other opcode writes or reads to the register
|
|
* we'd use, then we cannot use it anymore. */
|
|
mask |= opcode_read_mask(op->c);
|
|
mask |= opcode_write_mask(op->c);
|
|
|
|
if (op->flags & LIGHTREC_SYNC)
|
|
sync = true;
|
|
|
|
switch (op->i.op) {
|
|
case OP_BEQ:
|
|
case OP_BNE:
|
|
case OP_BLEZ:
|
|
case OP_BGTZ:
|
|
case OP_REGIMM:
|
|
case OP_META_BEQZ:
|
|
case OP_META_BNEZ:
|
|
/* TODO: handle backwards branches too */
|
|
if (!last &&
|
|
(op->flags & LIGHTREC_LOCAL_BRANCH) &&
|
|
(s16)op->c.i.imm >= 0) {
|
|
branch_offset = i + 1 + (s16)op->c.i.imm
|
|
- !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
|
|
|
|
reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
|
|
mask, sync, mflo, false);
|
|
reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
|
|
mask, sync, mflo, false);
|
|
if (reg > 0 && reg == reg2)
|
|
return reg;
|
|
if (!reg && !reg2)
|
|
return 0;
|
|
}
|
|
|
|
return mflo ? REG_LO : REG_HI;
|
|
case OP_SPECIAL:
|
|
switch (op->r.op) {
|
|
case OP_SPECIAL_MULT:
|
|
case OP_SPECIAL_MULTU:
|
|
case OP_SPECIAL_DIV:
|
|
case OP_SPECIAL_DIVU:
|
|
return 0;
|
|
case OP_SPECIAL_MTHI:
|
|
if (!mflo)
|
|
return 0;
|
|
continue;
|
|
case OP_SPECIAL_MTLO:
|
|
if (mflo)
|
|
return 0;
|
|
continue;
|
|
case OP_SPECIAL_JR:
|
|
if (op->r.rs != 31)
|
|
return reg;
|
|
|
|
if (!sync &&
|
|
!(op->flags & LIGHTREC_NO_DS) &&
|
|
(next->i.op == OP_SPECIAL) &&
|
|
((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
|
|
(mflo && next->r.op == OP_SPECIAL_MFLO)))
|
|
return next->r.rd;
|
|
|
|
return 0;
|
|
case OP_SPECIAL_JALR:
|
|
return reg;
|
|
case OP_SPECIAL_MFHI:
|
|
if (!mflo) {
|
|
if (another)
|
|
return op->r.rd;
|
|
/* Must use REG_HI if there is another MFHI target*/
|
|
reg2 = get_mfhi_mflo_reg(block, i + 1, next,
|
|
0, sync, mflo, true);
|
|
if (reg2 > 0 && reg2 != REG_HI)
|
|
return REG_HI;
|
|
|
|
if (!sync && !(old_mask & BIT(op->r.rd)))
|
|
return op->r.rd;
|
|
else
|
|
return REG_HI;
|
|
}
|
|
continue;
|
|
case OP_SPECIAL_MFLO:
|
|
if (mflo) {
|
|
if (another)
|
|
return op->r.rd;
|
|
/* Must use REG_LO if there is another MFLO target*/
|
|
reg2 = get_mfhi_mflo_reg(block, i + 1, next,
|
|
0, sync, mflo, true);
|
|
if (reg2 > 0 && reg2 != REG_LO)
|
|
return REG_LO;
|
|
|
|
if (!sync && !(old_mask & BIT(op->r.rd)))
|
|
return op->r.rd;
|
|
else
|
|
return REG_LO;
|
|
}
|
|
continue;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* fall-through */
|
|
default:
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return reg;
|
|
}
|
|
|
|
static void lightrec_replace_lo_hi(struct block *block, u16 offset,
|
|
u16 last, bool lo)
|
|
{
|
|
unsigned int i;
|
|
u32 branch_offset;
|
|
|
|
/* This function will remove the following MFLO/MFHI. It must be called
|
|
* only if get_mfhi_mflo_reg() returned a non-zero value. */
|
|
|
|
for (i = offset; i < last; i++) {
|
|
struct opcode *op = &block->opcode_list[i];
|
|
|
|
switch (op->i.op) {
|
|
case OP_BEQ:
|
|
case OP_BNE:
|
|
case OP_BLEZ:
|
|
case OP_BGTZ:
|
|
case OP_REGIMM:
|
|
case OP_META_BEQZ:
|
|
case OP_META_BNEZ:
|
|
/* TODO: handle backwards branches too */
|
|
if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
|
|
(s16)op->c.i.imm >= 0) {
|
|
branch_offset = i + 1 + (s16)op->c.i.imm
|
|
- !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
|
|
|
|
lightrec_replace_lo_hi(block, branch_offset, last, lo);
|
|
lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
|
|
}
|
|
break;
|
|
|
|
case OP_SPECIAL:
|
|
if (lo && op->r.op == OP_SPECIAL_MFLO) {
|
|
pr_debug("Removing MFLO opcode at offset 0x%x\n",
|
|
i << 2);
|
|
op->opcode = 0;
|
|
return;
|
|
} else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
|
|
pr_debug("Removing MFHI opcode at offset 0x%x\n",
|
|
i << 2);
|
|
op->opcode = 0;
|
|
return;
|
|
}
|
|
|
|
/* fall-through */
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
|
|
{
|
|
struct opcode *list;
|
|
u8 reg_hi, reg_lo;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < block->nb_ops - 1; i++) {
|
|
list = &block->opcode_list[i];
|
|
|
|
if (list->i.op != OP_SPECIAL)
|
|
continue;
|
|
|
|
switch (list->r.op) {
|
|
case OP_SPECIAL_MULT:
|
|
case OP_SPECIAL_MULTU:
|
|
case OP_SPECIAL_DIV:
|
|
case OP_SPECIAL_DIVU:
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
/* Don't support opcodes in delay slots */
|
|
if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
|
|
(list->flags & LIGHTREC_NO_DS))
|
|
continue;
|
|
|
|
reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
|
|
if (reg_lo == 0) {
|
|
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
|
|
" not writing LO\n", i << 2);
|
|
list->flags |= LIGHTREC_NO_LO;
|
|
}
|
|
|
|
reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
|
|
if (reg_hi == 0) {
|
|
pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
|
|
" not writing HI\n", i << 2);
|
|
list->flags |= LIGHTREC_NO_HI;
|
|
}
|
|
|
|
if (!reg_lo && !reg_hi) {
|
|
pr_debug("Both LO/HI unused in this block, they will "
|
|
"probably be used in parent block - removing "
|
|
"flags.\n");
|
|
list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
|
|
}
|
|
|
|
if (reg_lo > 0 && reg_lo != REG_LO) {
|
|
pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
|
|
lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
|
|
|
|
lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
|
|
list->r.rd = reg_lo;
|
|
} else {
|
|
list->r.rd = 0;
|
|
}
|
|
|
|
if (reg_hi > 0 && reg_hi != REG_HI) {
|
|
pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
|
|
lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
|
|
|
|
lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
|
|
list->r.imm = reg_hi;
|
|
} else {
|
|
list->r.imm = 0;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool remove_div_sequence(struct block *block, unsigned int offset)
|
|
{
|
|
struct opcode *op;
|
|
unsigned int i, found = 0;
|
|
|
|
/*
|
|
* Scan for the zero-checking sequence that GCC automatically introduced
|
|
* after most DIV/DIVU opcodes. This sequence checks the value of the
|
|
* divisor, and if zero, executes a BREAK opcode, causing the BIOS
|
|
* handler to crash the PS1.
|
|
*
|
|
* For DIV opcodes, this sequence additionally checks that the signed
|
|
* operation does not overflow.
|
|
*
|
|
* With the assumption that the games never crashed the PS1, we can
|
|
* therefore assume that the games never divided by zero or overflowed,
|
|
* and these sequences can be removed.
|
|
*/
|
|
|
|
for (i = offset; i < block->nb_ops; i++) {
|
|
op = &block->opcode_list[i];
|
|
|
|
if (!found) {
|
|
if (op->i.op == OP_SPECIAL &&
|
|
(op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
|
|
break;
|
|
|
|
if ((op->opcode & 0xfc1fffff) == 0x14000002) {
|
|
/* BNE ???, zero, +8 */
|
|
found++;
|
|
} else {
|
|
offset++;
|
|
}
|
|
} else if (found == 1 && !op->opcode) {
|
|
/* NOP */
|
|
found++;
|
|
} else if (found == 2 && op->opcode == 0x0007000d) {
|
|
/* BREAK 0x1c00 */
|
|
found++;
|
|
} else if (found == 3 && op->opcode == 0x2401ffff) {
|
|
/* LI at, -1 */
|
|
found++;
|
|
} else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
|
|
/* BNE ???, at, +16 */
|
|
found++;
|
|
} else if (found == 5 && op->opcode == 0x3c018000) {
|
|
/* LUI at, 0x8000 */
|
|
found++;
|
|
} else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
|
|
/* BNE ???, at, +16 */
|
|
found++;
|
|
} else if (found == 7 && !op->opcode) {
|
|
/* NOP */
|
|
found++;
|
|
} else if (found == 8 && op->opcode == 0x0006000d) {
|
|
/* BREAK 0x1800 */
|
|
found++;
|
|
break;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found >= 3) {
|
|
if (found != 9)
|
|
found = 3;
|
|
|
|
pr_debug("Removing DIV%s sequence at offset 0x%x\n",
|
|
found == 9 ? "" : "U", offset << 2);
|
|
|
|
for (i = 0; i < found; i++)
|
|
block->opcode_list[offset + i].opcode = 0;
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
|
|
struct block *block)
|
|
{
|
|
struct opcode *op;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < block->nb_ops; i++) {
|
|
op = &block->opcode_list[i];
|
|
|
|
if (op->i.op == OP_SPECIAL &&
|
|
(op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
|
|
remove_div_sequence(block, i + 1))
|
|
op->flags |= LIGHTREC_NO_DIV_CHECK;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const u32 memset_code[] = {
|
|
0x10a00006, // beqz a1, 2f
|
|
0x24a2ffff, // addiu v0,a1,-1
|
|
0x2403ffff, // li v1,-1
|
|
0xac800000, // 1: sw zero,0(a0)
|
|
0x2442ffff, // addiu v0,v0,-1
|
|
0x1443fffd, // bne v0,v1, 1b
|
|
0x24840004, // addiu a0,a0,4
|
|
0x03e00008, // 2: jr ra
|
|
0x00000000, // nop
|
|
};
|
|
|
|
static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
|
|
{
|
|
unsigned int i;
|
|
union code c;
|
|
|
|
for (i = 0; i < block->nb_ops; i++) {
|
|
c = block->opcode_list[i].c;
|
|
|
|
if (c.opcode != memset_code[i])
|
|
return 0;
|
|
|
|
if (i == ARRAY_SIZE(memset_code) - 1) {
|
|
/* success! */
|
|
pr_debug("Block at PC 0x%x is a memset\n", block->pc);
|
|
block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE;
|
|
|
|
/* Return non-zero to skip other optimizers. */
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
|
|
IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
|
|
IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
|
|
IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
|
|
IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
|
|
IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
|
|
IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
|
|
IF_OPT(OPT_FLAG_STORES, &lightrec_flag_stores),
|
|
IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
|
|
IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
|
|
};
|
|
|
|
int lightrec_optimize(struct lightrec_state *state, struct block *block)
|
|
{
|
|
unsigned int i;
|
|
int ret;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
|
|
if (lightrec_optimizers[i]) {
|
|
ret = (*lightrec_optimizers[i])(state, block);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|