Merge pull request #1021 from FioraAeterna/optimizeca3

JIT: Carry optimizations!
This commit is contained in:
comex 2014-09-14 15:08:08 -04:00
commit db7617248f
12 changed files with 361 additions and 317 deletions

View File

@ -34,7 +34,7 @@ static GekkoOPTemplate primarytable[] =
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0, 1, 0, 0, 0}},
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0, 1, 0, 0, 0}},
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
@ -180,8 +180,8 @@ static GekkoOPTemplate table31[] =
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
@ -260,7 +260,7 @@ static GekkoOPTemplate table31[] =
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}},
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
@ -280,26 +280,26 @@ static GekkoOPTemplate table31[] =
static GekkoOPTemplate table31_2[] =
{
{266, Interpreter::addx, {"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{778, Interpreter::addx, {"addox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{778, Interpreter::addx, {"addox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{10, Interpreter::addcx, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{522, Interpreter::addcx, {"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{522, Interpreter::addcx, {"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{138, Interpreter::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{650, Interpreter::addex, {"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{650, Interpreter::addex, {"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
{234, Interpreter::addmex, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{202, Interpreter::addzex, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{491, Interpreter::divwx, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
{1003, Interpreter::divwx, {"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
{1003, Interpreter::divwx, {"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 40, 0, 0, 0}},
{459, Interpreter::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
{971, Interpreter::divwux, {"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
{971, Interpreter::divwux, {"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 40, 0, 0, 0}},
{75, Interpreter::mulhwx, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
{11, Interpreter::mulhwux, {"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
{235, Interpreter::mullwx, {"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
{747, Interpreter::mullwx, {"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
{747, Interpreter::mullwx, {"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 5, 0, 0, 0}},
{104, Interpreter::negx, {"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{40, Interpreter::subfx, {"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{552, Interpreter::subfx, {"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{552, Interpreter::subfx, {"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
{8, Interpreter::subfcx, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{520, Interpreter::subfcx, {"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{520, Interpreter::subfcx, {"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
{136, Interpreter::subfex, {"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{232, Interpreter::subfmex, {"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{200, Interpreter::subfzex, {"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},

View File

@ -178,6 +178,8 @@ void Jit64::Init()
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
}
void Jit64::ClearCache()
@ -461,6 +463,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
js.skipnext = false;
js.carryFlagSet = false;
js.carryFlagInverted = false;
js.compilerPC = nextPC;
// Translate instructions
for (u32 i = 0; i < code_block.m_num_instructions; i++)
@ -492,6 +496,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
js.next_op = &ops[i + 1];
}
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)

View File

@ -101,6 +101,8 @@ public:
void GenerateConstantOverflow(s64 val);
void GenerateOverflow();
void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
void ComputeRC(const Gen::OpArg & arg);
// Use to extract bytes from a register using the regcache. offset is in bytes.
@ -139,7 +141,7 @@ public:
void DynaRunTable63(UGeckoInstruction _inst);
void addx(UGeckoInstruction inst);
void addcx(UGeckoInstruction inst);
void arithcx(UGeckoInstruction inst);
void mulli(UGeckoInstruction inst);
void mulhwXx(UGeckoInstruction inst);
void mullwx(UGeckoInstruction inst);
@ -147,9 +149,7 @@ public:
void divwx(UGeckoInstruction inst);
void srawix(UGeckoInstruction inst);
void srawx(UGeckoInstruction inst);
void addex(UGeckoInstruction inst);
void addmex(UGeckoInstruction inst);
void addzex(UGeckoInstruction inst);
void arithXex(UGeckoInstruction inst);
void extsXx(UGeckoInstruction inst);
@ -217,11 +217,7 @@ public:
void dcbz(UGeckoInstruction inst);
void subfic(UGeckoInstruction inst);
void subfcx(UGeckoInstruction inst);
void subfx(UGeckoInstruction inst);
void subfex(UGeckoInstruction inst);
void subfmex(UGeckoInstruction inst);
void subfzex(UGeckoInstruction inst);
void twx(UGeckoInstruction inst);

View File

@ -48,7 +48,7 @@ static GekkoOPTemplate primarytable[] =
{10, &Jit64::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, &Jit64::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, &Jit64::reg_imm}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, &Jit64::reg_imm}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
{13, &Jit64::reg_imm}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0}},
{14, &Jit64::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{15, &Jit64::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
@ -193,8 +193,8 @@ static GekkoOPTemplate table31[] =
{922, &Jit64::extsXx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{954, &Jit64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
@ -273,7 +273,7 @@ static GekkoOPTemplate table31[] =
{339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}},
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA}},
{595, &Jit64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{659, &Jit64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
@ -294,12 +294,12 @@ static GekkoOPTemplate table31_2[] =
{
{266, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{778, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{10, &Jit64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{522, &Jit64::addcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{138, &Jit64::addex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{650, &Jit64::addex}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{234, &Jit64::addmex}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{202, &Jit64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{10, &Jit64::arithcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{522, &Jit64::arithcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{138, &Jit64::arithXex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{650, &Jit64::arithXex}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{234, &Jit64::arithXex}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{202, &Jit64::arithXex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{491, &Jit64::divwx}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{1003, &Jit64::divwx}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{459, &Jit64::divwux}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
@ -311,11 +311,11 @@ static GekkoOPTemplate table31_2[] =
{104, &Jit64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{40, &Jit64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{552, &Jit64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{8, &Jit64::subfcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{520, &Jit64::subfcx}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{136, &Jit64::subfex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{232, &Jit64::subfmex}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{200, &Jit64::subfzex}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{8, &Jit64::arithcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{520, &Jit64::arithcx}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{136, &Jit64::arithXex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{232, &Jit64::arithXex}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{200, &Jit64::arithXex}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
};
static GekkoOPTemplate table59[] =

View File

@ -44,28 +44,76 @@ void Jit64::GenerateOverflow()
SetJumpTarget(exit);
}
void Jit64::FinalizeCarry(CCFlags cond)
{
js.carryFlagSet = false;
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (js.next_op->wantsCAInFlags)
{
if (cond == CC_C || cond == CC_NC)
{
js.carryFlagInverted = cond == CC_NC;
}
else
{
// convert the condition to a carry flag (is there a better way?)
SETcc(cond, R(RSCRATCH));
BT(8, R(RSCRATCH), Imm8(0));
}
js.carryFlagSet = true;
}
else
{
JitSetCAIf(cond);
}
}
}
// Unconditional version
void Jit64::FinalizeCarry(bool ca)
{
js.carryFlagSet = false;
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (js.next_op->wantsCAInFlags)
{
if (ca)
STC();
else
CLC();
js.carryFlagSet = true;
}
else if (ca)
{
JitSetCA();
}
else
{
JitClearCAOV(true, false);
}
}
}
// Assumes CA,OV are clear
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
{
// USES_XER
if (oe)
{
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
// sides of the branch.
// Make sure not to lose the carry flags (not a big deal, this path is rare).
PUSHF();
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
FixupBranch jno = J_CC(CC_NO);
JitSetCAIf(inv ? CC_NC : CC_C);
//XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J();
SetJumpTarget(jno);
JitSetCAIf(inv ? CC_NC : CC_C);
SetJumpTarget(exit);
}
else
{
// Do carry
JitSetCAIf(inv ? CC_NC : CC_C);
POPF();
}
// Do carry
FinalizeCarry(inv ? CC_NC : CC_C);
}
void Jit64::ComputeRC(const Gen::OpArg & arg)
@ -129,10 +177,10 @@ static u32 Xor(u32 a, u32 b)
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
{
gpr.Lock(d, a);
if (a || binary || carry) // yeh nasty special case addic
// Be careful; addic treats r0 as r0, but addi treats r0 as zero.
if (a || binary || carry)
{
if (carry)
JitClearCAOV(false);
carry &= js.op->wantsCA;
if (gpr.R(a).IsImm() && !carry)
{
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
@ -156,7 +204,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
}
}
if (carry)
JitSetCAIf(CC_C);
FinalizeCarry(CC_C);
if (Rc)
ComputeRC(gpr.R(d));
}
@ -239,6 +287,9 @@ void Jit64::reg_imm(UGeckoInstruction inst)
bool Jit64::CheckMergedBranch(int crf)
{
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
return false;
const UGeckoInstruction& next = js.next_inst;
return (((next.OPCD == 16 /* bcx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
@ -721,148 +772,36 @@ void Jit64::subfic(UGeckoInstruction inst)
{
if (imm == 0)
{
JitClearCAOV(false);
// Flags act exactly like subtracting from 0
NEG(32, gpr.R(d));
// Output carry is inverted
JitSetCAIf(CC_NC);
FinalizeCarry(CC_NC);
}
else if (imm == -1)
{
// CA is always set in this case
JitSetCA();
NOT(32, gpr.R(d));
// CA is always set in this case
FinalizeCarry(true);
}
else
{
JitClearCAOV(false);
NOT(32, gpr.R(d));
ADD(32, gpr.R(d), Imm32(imm+1));
// Output carry is normal
JitSetCAIf(CC_C);
FinalizeCarry(CC_C);
}
}
else
{
JitClearCAOV(false);
MOV(32, gpr.R(d), Imm32(imm));
SUB(32, gpr.R(d), gpr.R(a));
// Output carry is inverted
JitSetCAIf(CC_NC);
FinalizeCarry(CC_NC);
}
gpr.UnlockAll();
// This instruction has no RC flag
}
void Jit64::subfcx(UGeckoInstruction inst)
{
INSTRUCTION_START;
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
JitClearCAOV(inst.OE);
if (d == b)
{
SUB(32, gpr.R(d), gpr.R(a));
}
else if (d == a)
{
MOV(32, R(RSCRATCH), gpr.R(a));
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(RSCRATCH));
}
else
{
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), gpr.R(a));
}
if (inst.Rc)
ComputeRC(gpr.R(d));
FinalizeCarryOverflow(inst.OE, true);
gpr.UnlockAll();
}
void Jit64::subfex(UGeckoInstruction inst)
{
INSTRUCTION_START;
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
JitGetAndClearCAOV(inst.OE);
bool invertedCarry = false;
if (d == b)
{
// Convert carry to borrow
CMC();
SBB(32, gpr.R(d), gpr.R(a));
invertedCarry = true;
}
else if (d == a)
{
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b));
}
else
{
MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b));
}
FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::subfmex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(a, d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::subfzex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(a, d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::subfx(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -1329,96 +1268,93 @@ void Jit64::addx(UGeckoInstruction inst)
}
}
void Jit64::addex(UGeckoInstruction inst)
void Jit64::arithXex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
bool regsource = !(inst.SUBOP10 & 64); // addex or subfex
bool mex = !!(inst.SUBOP10 & 32); // addmex/subfmex or addzex/subfzex
bool add = !!(inst.SUBOP10 & 2); // add or sub
int a = inst.RA;
int b = regsource ? inst.RB : a;
int d = inst.RD;
bool same_input_sub = !add && regsource && a == b;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a) || (d == b));
JitGetAndClearCAOV(inst.OE);
if ((d == a) || (d == b))
gpr.BindToRegister(d, !same_input_sub && (d == a || d == b));
if (!js.carryFlagSet)
JitGetAndClearCAOV(inst.OE);
bool invertedCarry = false;
// Special case: subfe A, B, B is a common compiler idiom
if (same_input_sub)
{
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a));
// Convert carry to borrow
if (!js.carryFlagInverted)
CMC();
SBB(32, gpr.R(d), gpr.R(d));
invertedCarry = true;
}
else if (!add && regsource && d == b)
{
if (!js.carryFlagInverted)
CMC();
if (d != b)
MOV(32, gpr.R(d), gpr.R(b));
SBB(32, gpr.R(d), gpr.R(a));
invertedCarry = true;
}
else
{
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), gpr.R(b));
OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0);
if (js.carryFlagInverted)
CMC();
if (d != a && d != b)
MOV(32, gpr.R(d), gpr.R(a));
if (!add)
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), source);
}
FinalizeCarryOverflow(inst.OE);
FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::addcx(UGeckoInstruction inst)
void Jit64::arithcx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
bool add = !!(inst.SUBOP10 & 2); // add or sub
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, d == a || d == b, true);
if ((d == a) || (d == b))
if (d == a && d != b)
{
int operand = ((d == a) ? b : a);
gpr.Lock(a, b, d);
gpr.BindToRegister(d, true);
JitClearCAOV(inst.OE);
ADD(32, gpr.R(d), gpr.R(operand));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
if (add)
{
ADD(32, gpr.R(d), gpr.R(b));
}
else
{
// special case, because sub isn't reversible
MOV(32, R(RSCRATCH), gpr.R(a));
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(RSCRATCH));
}
}
else
{
gpr.Lock(a, b, d);
gpr.BindToRegister(d, false);
JitClearCAOV(inst.OE);
MOV(32, gpr.R(d), gpr.R(a));
ADD(32, gpr.R(d), gpr.R(b));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
if (d != b)
MOV(32, gpr.R(d), gpr.R(b));
if (add)
ADD(32, gpr.R(d), gpr.R(a));
else
SUB(32, gpr.R(d), gpr.R(a));
}
}
void Jit64::addmex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::addzex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryOverflow(inst.OE);
FinalizeCarryOverflow(inst.OE, !add);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
@ -1811,16 +1747,22 @@ void Jit64::srawx(UGeckoInstruction inst)
gpr.FlushLockX(ECX);
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
JitClearCAOV(false);
MOV(32, R(ECX), gpr.R(b));
if (a != s)
MOV(32, gpr.R(a), gpr.R(s));
SHL(64, gpr.R(a), Imm8(32));
SAR(64, gpr.R(a), R(ECX));
MOV(32, R(RSCRATCH), gpr.R(a));
SHR(64, gpr.R(a), Imm8(32));
TEST(32, gpr.R(a), R(RSCRATCH));
JitSetCAIf(CC_NZ);
if (js.op->wantsCA)
{
MOV(32, R(RSCRATCH), gpr.R(a));
SHR(64, gpr.R(a), Imm8(32));
TEST(32, gpr.R(a), R(RSCRATCH));
}
else
{
SHR(64, gpr.R(a), Imm8(32));
}
FinalizeCarry(CC_NZ);
gpr.UnlockAll();
gpr.UnlockAllX();
if (inst.Rc)
@ -1838,41 +1780,50 @@ void Jit64::srawix(UGeckoInstruction inst)
{
gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true);
MOV(32, R(RSCRATCH), gpr.R(s));
if (a != s)
MOV(32, gpr.R(a), R(RSCRATCH));
// some optimized common cases that can be done in slightly fewer ops
if (amount == 31)
if (!js.op->wantsCA)
{
JitSetCA();
SAR(32, gpr.R(a), Imm8(31));
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
}
else if (amount == 1)
{
JitClearCAOV(false);
SHR(32, R(RSCRATCH), Imm8(31)); // sign
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
SAR(32, gpr.R(a), Imm8(1));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
if (a != s)
MOV(32, gpr.R(a), gpr.R(s));
SAR(32, gpr.R(a), Imm8(amount));
}
else
{
JitClearCAOV(false);
SAR(32, gpr.R(a), Imm8(amount));
SHL(32, R(RSCRATCH), Imm8(32 - amount));
TEST(32, R(RSCRATCH), gpr.R(a));
JitSetCAIf(CC_NZ);
MOV(32, R(RSCRATCH), gpr.R(s));
if (a != s)
MOV(32, gpr.R(a), R(RSCRATCH));
// some optimized common cases that can be done in slightly fewer ops
if (amount == 31)
{
JitSetCA();
SAR(32, gpr.R(a), Imm8(31));
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
}
else if (amount == 1)
{
JitClearCAOV(true, false);
SHR(32, R(RSCRATCH), Imm8(31)); // sign
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
SAR(32, gpr.R(a), Imm8(1));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
}
else
{
JitClearCAOV(true, false);
SAR(32, gpr.R(a), Imm8(amount));
SHL(32, R(RSCRATCH), Imm8(32 - amount));
TEST(32, R(RSCRATCH), gpr.R(a));
FinalizeCarry(CC_NZ);
}
}
}
else
{
gpr.Lock(a, s);
JitClearCAOV(false);
FinalizeCarry(false);
gpr.BindToRegister(a, a == s, true);
if (a != s)

View File

@ -1106,7 +1106,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->JitSetCA();
FixupBranch cont = Jit->J();
Jit->SetJumpTarget(nocarry);
Jit->JitClearCAOV(false);
Jit->JitClearCAOV(true, false);
Jit->SetJumpTarget(cont);
regNormalRegClear(RI, I);
break;

View File

@ -81,13 +81,16 @@ protected:
bool isLastInstruction;
bool memcheck;
bool skipnext;
bool carryFlagSet;
bool carryFlagInverted;
int fifoBytesThisBlock;
PPCAnalyst::BlockStats st;
PPCAnalyst::BlockRegStats gpa;
PPCAnalyst::BlockRegStats fpa;
PPCAnalyst::CodeOp *op;
PPCAnalyst::CodeOp* op;
PPCAnalyst::CodeOp* next_op;
u8* rewriteStart;
JitBlock *curBlock;

View File

@ -845,13 +845,14 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
SETcc(conditionCode, R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
}
void EmuCodeBlock::JitClearCAOV(bool oe)
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
{
if (oe)
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0
else
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
if (mask == 0xFFFFFFFF)
return;
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask));
}

View File

@ -111,7 +111,7 @@ public:
void JitGetAndClearCAOV(bool oe);
void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCAOV(bool oe);
void JitClearCAOV(bool ca, bool oe);
void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm);

View File

@ -213,14 +213,17 @@ static void AnalyzeFunction2(Symbol *func)
func->flags = flags;
}
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
{
const GekkoOPInfo *a_info = a.opinfo;
const GekkoOPInfo *b_info = b.opinfo;
int a_flags = a_info->flags;
int b_flags = b_info->flags;
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL))
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
return false;
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1))
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
return false;
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
return false;
switch (b.inst.OPCD)
@ -250,20 +253,16 @@ static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
{
int regInA = a.regsIn[j];
int regInB = b.regsIn[j];
if (regInA >= 0 &&
(b.regsOut[0] == regInA ||
b.regsOut[1] == regInA))
{
// reg collision! don't swap
// register collision: b outputs to one of a's inputs
if (regInA >= 0 && (b.regsOut[0] == regInA || b.regsOut[1] == regInA))
return false;
}
if (regInB >= 0 &&
(a.regsOut[0] == regInB ||
a.regsOut[1] == regInB))
{
// reg collision! don't swap
// register collision: a outputs to one of b's inputs
if (regInB >= 0 && (a.regsOut[0] == regInB || a.regsOut[1] == regInB))
return false;
}
// register collision: b outputs to one of a's outputs (overwriting it)
for (int k = 0; k < 2; k++)
if (b.regsOut[k] >= 0 && (b.regsOut[k] == a.regsOut[0] || b.regsOut[k] == a.regsOut[1]))
return false;
}
return true;
@ -403,34 +402,84 @@ void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db)
leafSize, niceSize, unniceSize);
}
void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
static bool isCmp(const CodeOp& a)
{
// Instruction Reordering Pass
// Bubble down compares towards branches, so that they can be merged.
// -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch.
for (u32 i = 0; i < (instructions - 2); ++i)
return (a.inst.OPCD == 10 || a.inst.OPCD == 11) || (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32));
}
static bool isRlwinm_rc(const CodeOp& a)
{
return a.inst.OPCD == 21 && a.inst.Rc;
}
static bool isCarryOp(const CodeOp& a)
{
return (a.opinfo->flags & FL_SET_CA) && !(a.opinfo->flags & FL_SET_OE) && a.opinfo->type == OPTYPE_INTEGER;
}
void PPCAnalyzer::ReorderInstructionsCore(u32 instructions, CodeOp* code, bool reverse, ReorderType type)
{
// Bubbling an instruction sometimes reveals another opportunity to bubble an instruction, so do
// multiple passes.
while (true)
{
CodeOp &a = code[i];
CodeOp &b = code[i + 1];
// All integer compares can be reordered.
if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) ||
(a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32)))
// Instruction Reordering Pass
// Carry pass: bubble carry-using instructions as close to each other as possible, so we can avoid
// storing the carry flag.
// Compare pass: bubble compare instructions next to branches, so they can be merged.
bool swapped = false;
int increment = reverse ? -1 : 1;
int start = reverse ? instructions - 1 : 0;
int end = reverse ? 0 : instructions - 1;
for (int i = start; i != end; i += increment)
{
// Got a compare instruction.
if (CanSwapAdjacentOps(a, b))
CodeOp &a = code[i];
CodeOp &b = code[i + increment];
// Reorder integer compares, rlwinm., and carry-affecting ops
// (if we add more merged branch instructions, add them here!)
if ((type == REORDER_CARRY && isCarryOp(a)) || (type == REORDER_CMP && (isCmp(a) || isRlwinm_rc(a))))
{
// Alright, let's bubble it down!
std::swap(a, b);
// once we're next to a carry instruction, don't move away!
if (type == REORDER_CARRY && i != start)
{
// if we read the CA flag, and the previous instruction sets it, don't move away.
if (!reverse && (a.opinfo->flags & FL_READ_CA) && (code[i - increment].opinfo->flags & FL_SET_CA))
continue;
// if we set the CA flag, and the next instruction reads it, don't move away.
if (reverse && (a.opinfo->flags & FL_SET_CA) && (code[i - increment].opinfo->flags & FL_READ_CA))
continue;
}
if (CanSwapAdjacentOps(a, b))
{
// Alright, let's bubble it!
std::swap(a, b);
swapped = true;
}
}
}
if (!swapped)
return;
}
}
void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
{
// For carry, bubble instructions *towards* each other; one direction often isn't enough
// to get pairs like addc/adde next to each other.
if (HasOption(OPTION_CARRY_MERGE))
{
ReorderInstructionsCore(instructions, code, true, REORDER_CARRY);
ReorderInstructionsCore(instructions, code, false, REORDER_CARRY);
}
if (HasOption(OPTION_BRANCH_MERGE))
ReorderInstructionsCore(instructions, code, false, REORDER_CMP);
}
void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index)
{
code->wantsCR0 = false;
code->wantsCR1 = false;
code->wantsPS1 = false;
if (opinfo->flags & FL_USE_FPU)
block->m_fpa->any = true;
@ -458,6 +507,24 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
// We're going to try to avoid storing carry in XER if we can avoid it -- keep it in the x86 carry flag!
// If the instruction reads CA but doesn't write it, we still need to store CA in XER; we can't
// leave it in flags.
if (HasOption(OPTION_CARRY_MERGE))
code->wantsCAInFlags = code->wantsCA && code->outputCA && opinfo->type == OPTYPE_INTEGER;
else
code->wantsCAInFlags = false;
// mfspr/mtspr can affect/use XER, so be super careful here
// we need to note specifically that mfspr needs CA in XER, not in the x86 carry flag
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
int numOut = 0;
int numIn = 0;
if (opinfo->flags & FL_OUT_A)
@ -715,26 +782,30 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
block->m_broken = true;
}
// Scan for CR0 dependency
// assume next block wants flags to be safe
// Scan for flag dependencies; assume the next block (or any branch that can leave the block)
// wants flags, to be safe.
bool wantsCR0 = true;
bool wantsCR1 = true;
bool wantsPS1 = true;
bool wantsFPRF = true;
bool wantsCA = true;
for (int i = block->m_num_instructions - 1; i >= 0; i--)
{
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock;
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock;
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock;
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock;
code[i].wantsCR0 = wantsCR0;
code[i].wantsCR1 = wantsCR1;
code[i].wantsPS1 = wantsPS1;
code[i].wantsFPRF = wantsFPRF;
wantsCR0 &= !code[i].outputCR0;
wantsCR1 &= !code[i].outputCR1;
wantsPS1 &= !code[i].outputPS1;
wantsFPRF &= !code[i].outputFPRF;
bool opWantsCR0 = code[i].wantsCR0;
bool opWantsCR1 = code[i].wantsCR1;
bool opWantsFPRF = code[i].wantsFPRF;
bool opWantsCA = code[i].wantsCA;
code[i].wantsCR0 = wantsCR0 || code[i].canEndBlock;
code[i].wantsCR1 = wantsCR1 || code[i].canEndBlock;
code[i].wantsFPRF = wantsFPRF || code[i].canEndBlock;
code[i].wantsCA = wantsCA || code[i].canEndBlock;
wantsCR0 |= opWantsCR0 || code[i].canEndBlock;
wantsCR1 |= opWantsCR1 || code[i].canEndBlock;
wantsFPRF |= opWantsFPRF || code[i].canEndBlock;
wantsCA |= opWantsCA || code[i].canEndBlock;
wantsCR0 &= !code[i].outputCR0 || opWantsCR0;
wantsCR1 &= !code[i].outputCR1 || opWantsCR1;
wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
wantsCA &= !code[i].outputCA || opWantsCA;
}
return address;
}

View File

@ -33,12 +33,13 @@ struct CodeOp //16B
bool isBranchTarget;
bool wantsCR0;
bool wantsCR1;
bool wantsPS1;
bool wantsFPRF;
bool wantsCA;
bool wantsCAInFlags;
bool outputCR0;
bool outputCR1;
bool outputPS1;
bool outputFPRF;
bool outputCA;
bool canEndBlock;
bool skip; // followed BL-s for example
};
@ -143,6 +144,13 @@ class PPCAnalyzer
{
private:
enum ReorderType
{
REORDER_CARRY,
REORDER_CMP
};
void ReorderInstructionsCore(u32 instructions, CodeOp* code, bool reverse, ReorderType type);
void ReorderInstructions(u32 instructions, CodeOp *code);
void SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index);
@ -175,6 +183,14 @@ public:
// Requires JIT support to work.
// XXX: NOT COMPLETE
OPTION_FORWARD_JUMP = (1 << 3),
// Reorder compare/Rc instructions next to their associated branches and
// merge in the JIT (for common cases, anyway).
OPTION_BRANCH_MERGE = (1 << 4),
// Reorder carry instructions next to their associated branches and pass
// carry flags in the x86 flags between them, instead of in XER.
OPTION_CARRY_MERGE = (1 << 5),
};

View File

@ -38,6 +38,7 @@ enum
FL_LOADSTORE = (1<<19),
FL_SET_FPRF = (1<<20),
FL_READ_FPRF = (1<<21),
FL_SET_OE = (1<<22),
};
enum