Armjit-FPU: Fix lots of bugs, impl some stuff. Still nothing working.

This commit is contained in:
Henrik Rydgard 2013-02-11 23:10:11 +01:00
parent 4eb89e6aec
commit 4bdb2045a7
10 changed files with 91 additions and 101 deletions

View File

@ -651,11 +651,18 @@ void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
// VFP Specific
void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, Operand2 op)
void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, u16 offset)
{
_assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR");
_assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR");
_assert_msg_(DYNA_REC, !(op.Imm12() & 4), "VLDR: Offset needs to be word aligned");
_assert_msg_(DYNA_REC, (offset & 0xC003) == 0, "VLDR: Offset needs to be word aligned and small enough");
if (offset & 0xC03) {
ERROR_LOG(DYNA_REC, "VLDR: Bad offset %08x", offset);
}
ERROR_LOG(DYNA_REC, "VLDR: s%i, r%i + %i", Dest - S0, Base, offset);
bool single_reg = Dest < D0;
Dest = SubBase(Dest);
@ -663,20 +670,26 @@ void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, Operand2 op)
if (single_reg)
{
Write32(NO_COND | (0x1B << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \
| ((Dest & 0x1E) << 11) | (10 << 8) | (op.Imm12() >> 2));
| ((Dest & 0x1E) << 11) | (10 << 8) | (offset >> 2));
}
else
{
Write32(NO_COND | (0x1B << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \
| ((Dest & 0xF) << 12) | (11 << 8) | (op.Imm12() >> 2));
| ((Dest & 0xF) << 12) | (11 << 8) | (offset >> 2));
}
}
void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, Operand2 op)
void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, u16 offset)
{
_assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR");
_assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR");
_assert_msg_(DYNA_REC, !(op.Imm12() & 4), "VSTR: Offset needs to be word aligned");
_assert_msg_(DYNA_REC, (offset & 0xC003) == 0, "VSTR: Offset needs to be word aligned");
if (offset & 0xC03) {
ERROR_LOG(DYNA_REC, "VSTR: Bad offset %08x", offset);
}
ERROR_LOG(DYNA_REC, "VSTR: s%i, r%i + %i", Src - S0, Base, offset);
bool single_reg = Src < D0;
Src = SubBase(Src);
@ -684,13 +697,13 @@ void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, Operand2 op)
if (single_reg)
{
Write32(NO_COND | (0x1B << 23) | ((Src & 0x1) << 22) | (Base << 16) \
| ((Src & 0x1E) << 11) | (10 << 8) | (op.Imm12() >> 2));
| ((Src & 0x1E) << 11) | (10 << 8) | (offset >> 2));
}
else
{
Write32(NO_COND | (0x1B << 23) | ((Src & 0x10) << 18) | (Base << 16) \
| ((Src & 0xF) << 12) | (11 << 8) | (op.Imm12() >> 2));
| ((Src & 0xF) << 12) | (11 << 8) | (offset >> 2));
}
}
void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm)
@ -791,7 +804,7 @@ void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm)
if (single_reg)
{
Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \
| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 12) | (0x5 << 9) \
| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \
| ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1));
}
else

View File

@ -499,8 +499,8 @@ public:
void VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
// VFP Only
void VLDR(ARMReg Dest, ARMReg Base, Operand2 op);
void VSTR(ARMReg Src, ARMReg Base, Operand2 op);
void VLDR(ARMReg Dest, ARMReg Base, u16 offset);
void VSTR(ARMReg Src, ARMReg Base, u16 offset);
void VCMP(ARMReg Vd, ARMReg Vm);
// Compares against zero
void VCMP(ARMReg Vd);

View File

@ -41,38 +41,46 @@ void Jit::Comp_FPU3op(u32 op)
int ft = _FT;
int fs = _FS;
int fd = _FD;
fpr.MapDirtyInIn(fd, fs, ft);
switch (op & 0x3f)
{
case 0: VADD(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) + F(ft); //add
case 1: VSUB(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) - F(ft); //sub
case 2: VMUL(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) * F(ft); //mul
case 3: VDIV(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) / F(ft); //div
case 0:
fpr.MapDirtyInIn(fd, fs, ft);
INFO_LOG(HLE,"add.s %i %i %i -> VADD %i %i %i", fd, fs, ft, fpr.R(fd) - S0, fpr.R(fs) - S0, fpr.R(ft) - S0);
VADD(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) + F(ft); //add
//case 1: VSUB(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) - F(ft); //sub
//case 2: VMUL(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) * F(ft); //mul
//case 3: VDIV(fpr.R(fd), fpr.R(fs), fpr.R(fd)); break; //F(fd) = F(fs) / F(ft); //div
default:
Comp_Generic(op);
DISABLE;
return;
}
}
extern int logBlocks;
void Jit::Comp_FPULS(u32 op)
{
DISABLE
s32 offset = (s16)(op&0xFFFF);
int ft = ((op>>16)&0x1f);
s32 offset = (s16)(op & 0xFFFF);
int ft = _FT;
int rs = _RS;
// u32 addr = R(rs) + offset;
logBlocks = 1;
switch(op >> 26)
{
case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1
gpr.MapReg(rs);
fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);
ERROR_LOG(HLE, "lwc1 rs=%i offset=%i armr=%i", rs, offset, fpr.R(ft) - S0);
SetR0ToEffectiveAddress(rs, offset);
VLDR(fpr.R(ft), R0, 0);
break;
case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1
DISABLE;
fpr.MapReg(ft, 0);
gpr.MapReg(rs);
SetR0ToEffectiveAddress(rs, offset);
VSTR(fpr.R(ft), R0, 0);
break;

View File

@ -124,6 +124,8 @@ void Jit::RunLoopUntil(u64 globalticks)
// TODO: copy globalticks somewhere
((void (*)())enterCode)();
}
static int dontLogBlocks = 20;
int logBlocks = 40;
const u8 *Jit::DoJit(u32 em_address, ArmJitBlock *b)
{
@ -152,8 +154,6 @@ const u8 *Jit::DoJit(u32 em_address, ArmJitBlock *b)
int numInstructions = 0;
int cycles = 0;
static int dontLogBlocks = 20;
static int logBlocks = 40;
if (logBlocks > 0) logBlocks--;
if (dontLogBlocks > 0) dontLogBlocks--;
@ -164,24 +164,21 @@ const u8 *Jit::DoJit(u32 em_address, ArmJitBlock *b)
while (js.compiling)
{
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
fpr.SetCompilerPC(js.compilerPC);
u32 inst = Memory::Read_Instruction(js.compilerPC);
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
MIPSDisAsm(inst, js.compilerPC, temp, true);
INFO_LOG(DYNA_REC, "M: %08x %s", js.compilerPC, temp);
}
#endif
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
// FlushAll(); ///HACKK
FlushAll(); ///HACKK
js.compilerPC += 4;
numInstructions++;
}
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
MIPSDisAsm(Memory::Read_Instruction(js.compilerPC), js.compilerPC, temp, true);
INFO_LOG(DYNA_REC, "M: %08x %s", js.compilerPC, temp);
for (u32 cpc = em_address; cpc != js.compilerPC; cpc += 4) {
MIPSDisAsm(Memory::Read_Instruction(cpc), cpc, temp, true);
INFO_LOG(DYNA_REC, "M: %08x %s", cpc, temp);
}
}
#endif

View File

@ -20,8 +20,6 @@
using namespace ArmGen;
#define CTXREG (R10)
ArmRegCache::ArmRegCache(MIPSState *mips) : mips_(mips) {
}

View File

@ -23,6 +23,8 @@
using namespace ArmGen;
#define CTXREG (R10)
// R2 to R8: mapped MIPS regs
// R9 = code pointers
// R10 = MIPS context

View File

@ -21,7 +21,6 @@
using namespace ArmGen;
#define CTXREG (R10)
ArmRegCacheFPU::ArmRegCacheFPU(MIPSState *mips) : mips_(mips) {
}
@ -38,7 +37,6 @@ void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
mr[i].loc = ML_MEM;
mr[i].reg = INVALID_REG;
mr[i].imm = -1;
mr[i].spillLock = false;
}
}
@ -64,6 +62,7 @@ ARMReg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
if (mapFlags & MAP_DIRTY) {
ar[mr[mipsReg].reg].isDirty = true;
}
INFO_LOG(HLE, "Already mapped %i to %i", mipsReg, mr[mipsReg].reg);
return (ARMReg)(mr[mipsReg].reg + S0);
}
@ -87,10 +86,12 @@ allocate:
ar[reg].mipsReg = mipsReg;
mr[mipsReg].loc = ML_ARMREG;
mr[mipsReg].reg = reg;
INFO_LOG(HLE, "Mapped %i to %i", mipsReg, mr[mipsReg].reg);
return (ARMReg)(reg + S0);
}
}
// Still nothing. Let's spill a reg and goto 10.
// TODO: Use age or something to choose which register to spill?
// TODO: Spill dirty regs first? or opposite?
@ -104,8 +105,9 @@ allocate:
}
if (bestToSpill != -1) {
INFO_LOG(HLE, "Spillin! %i", bestToSpill);
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
FlushArmReg((ARMReg)bestToSpill);
FlushArmReg((ARMReg)(S0 + bestToSpill));
goto allocate;
}
@ -146,11 +148,14 @@ void ArmRegCacheFPU::FlushArmReg(ARMReg r) {
}
if (ar[reg].mipsReg != -1) {
if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG)
emit->VSTR(CTXREG, r, GetMipsRegOffset(ar[reg].mipsReg));
{
INFO_LOG(HLE, "Flushing ARM reg %i", reg);
emit->VSTR(r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg));
}
// IMMs won't be in an ARM reg.
mr[ar[reg].mipsReg].loc = ML_MEM;
mr[ar[reg].mipsReg].reg = INVALID_REG;
mr[ar[reg].mipsReg].imm = 0;
} else {
ERROR_LOG(HLE, "Dirty but no mipsreg?");
}
@ -171,7 +176,8 @@ void ArmRegCacheFPU::FlushMipsReg(MIPSReg r) {
ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad ArmReg");
}
if (ar[mr[r].reg].isDirty) {
emit->VSTR(CTXREG, (ARMReg)(mr[r].reg + S0), GetMipsRegOffset(r));
INFO_LOG(HLE, "Flushing dirty reg %i", mr[r].reg);
emit->VSTR((ARMReg)(mr[r].reg + S0), CTXREG, GetMipsRegOffset(r));
ar[mr[r].reg].isDirty = false;
}
ar[mr[r].reg].mipsReg = -1;
@ -187,7 +193,6 @@ void ArmRegCacheFPU::FlushMipsReg(MIPSReg r) {
}
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
mr[r].imm = 0;
}
void ArmRegCacheFPU::FlushAll() {
@ -202,34 +207,10 @@ void ArmRegCacheFPU::FlushAll() {
}
}
void ArmRegCacheFPU::SetImm(MIPSReg r, u32 immVal) {
// Zap existing value if cached in a reg
if (mr[r].loc == ML_ARMREG) {
ar[mr[r].reg].mipsReg = -1;
ar[mr[r].reg].isDirty = false;
}
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
mr[r].reg = INVALID_REG;
}
bool ArmRegCacheFPU::IsImm(MIPSReg r) const {
return mr[r].loc == ML_IMM;
}
u32 ArmRegCacheFPU::GetImm(MIPSReg r) const {
if (mr[r].loc != ML_IMM) {
ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r);
}
return mr[r].imm;
}
int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) {
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs".
if (r < 32)
return (r + 32) * 4;
else if (r < 32 + 128)
return (r + 64) * 4;
if (r < 32 + 128)
return (r + 32) << 2;
ERROR_LOG(JIT, "bad mips register %i", r);
return 0; // or what?
}

View File

@ -39,7 +39,6 @@ struct FPURegMIPS {
// Where is this MIPS register?
RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 imm;
ARMReg reg;
bool spillLock; // if true, this register cannot be spilled.
// If loc == ML_MEM, it's back in its location in the CPU context struct.

View File

@ -20,61 +20,53 @@ TestCode::TestCode()
AllocCodeSpace(0x10000);
}
u32 TestLeaf(u32 a, u32 b, u32 c)
{
ILOG("TestLeaf: %08x %08x %08x\n", a, b, c);
return 0xFF;
}
void TestLeaf2(u32 a)
{
ILOG("TestLeaf2 %08x\n");
}
static float abc[256] = {1.0f, 2.0f, 0.0f};
void TestCode::Generate()
{
testCodePtr = this->GetCodePtr();
// Sonic1 commented that R11 is the frame pointer in debug mode, whatever "debug mode" means.
PUSH(2, R11, _LR);
MOVI2R(R0, 0x13371338);
AND(R1, R0, Operand2(0xFC, 4));
BIC(R0, R0, Operand2(0xFC, 4));
CMP(R1, Operand2(0x10, 4));
SetCC(CC_EQ);
MOV(R2, Operand2(0x99, 0));
SetCC(CC_NEQ);
MOV(R2, Operand2(0xFF, 0));
SetCC();
QuickCallFunction(R3, (void*)&TestLeaf);
MOVI2R(R11, (u32)&abc[0]);
MOVI2R(R1, 0x3f800000);
STR(R11, R1, 4 * (32 + 31));
VLDR(S0, R11, 0);
VLDR(S1, R11, 4);
VADD(S12, S0, S1);
VSTR(S0, R11, 4 * (32 + 31));
VSTR(S12, R11, 4 * (32 + 31));
//VSTR(S2, R0, 8);
POP(2, R11, _PC); // Yup, this is how you return.
//VLDR(S1, R0, 4);
//VADD(S2, S0, S1);
//VSTR(S2, R0, 8);
//QuickCallFunction(R3, (void*)&TestLeaf);
//ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x1, 0x100, 0x1337);
//ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x2, 0x100, 0x31337);
//ARMABI_CallFunctionCCC((void*)&TestLeaf, 0x3, 0x100, 0x1337);
POP(2, R11, _PC); // Yup, this is how you return.
testCodePtr2 = this->GetCodePtr();
PUSH(2, R11, _LR);
QuickCallFunction(R3, (void*)&TestLeaf2);
POP(2, R11, _PC);
}
void CallPtr(const void *ptr)
u32 CallPtr(const void *ptr)
{
((void(*)())ptr)();
return ((u32(*)())ptr)();
}
extern void DisassembleArm(const u8 *data, int size);
void ArmEmitterTest()
{
// return;
ILOG("Running ARM emitter test!");
TestCode gen;
gen.ReserveCodeSpace(0x4000);
gen.ReserveCodeSpace(0x1000);
const u8 *codeStart = gen.GetCodePtr();
gen.Generate();
DisassembleArm(codeStart, gen.GetCodePtr()-codeStart);
CallPtr(gen.testCodePtr);
ILOG("ARM emitter test 1 passed!");
u32 retval = CallPtr(gen.testCodePtr);
ILOG("ARM emitter test 1 passed if %f == 3.0! retval = %08x", abc[32 + 31], retval);
}

View File

@ -155,7 +155,7 @@ void NativeGetAppInfo(std::string *app_dir_name, std::string *app_nice_name, boo
*app_dir_name = "ppsspp";
*landscape = true;
// ArmEmitterTest();
ArmEmitterTest();
}
void NativeInit(int argc, const char *argv[], const char *savegame_directory, const char *external_directory, const char *installID)