Merge pull request #2189 from magumagu/paired-loadstore-cleanup

Fix paired loadstore to use correct load/store calls.
This commit is contained in:
skidau 2015-03-17 11:26:01 +11:00
commit b103aa7122
5 changed files with 205 additions and 253 deletions

View File

@ -305,6 +305,9 @@ union UGeckoInstruction
enum EQuantizeType : u32
{
QUANTIZE_FLOAT = 0,
QUANTIZE_INVALID1 = 1,
QUANTIZE_INVALID2 = 2,
QUANTIZE_INVALID3 = 3,
QUANTIZE_U8 = 4,
QUANTIZE_U16 = 5,
QUANTIZE_S8 = 6,

View File

@ -311,8 +311,8 @@ private:
static u32 Helper_Get_EA_UX(const UGeckoInstruction _inst);
// paired helper
static float Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale);
static void Helper_Quantize (const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned _uScale);
static void Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW);
static void Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW);
// other helper
static u32 Helper_Mask(int mb, int me);

View File

@ -48,212 +48,260 @@ const float m_quantizeTable[] =
1.0 / (1ULL << 4), 1.0 / (1ULL << 3), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1),
};
void Interpreter::Helper_Quantize(const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned int _uScale)
template<typename SType> SType ScaleAndClamp(double ps, u32 stScale)
{
switch (_quantizeType)
float convPS = (float)ps * m_quantizeTable[stScale];
float min = (float)std::numeric_limits<SType>::min();
float max = (float)std::numeric_limits<SType>::max();
MathUtil::Clamp(&convPS, min, max);
return (SType)convPS;
}
template<typename T> static T ReadUnpaired(u32 addr);
template<> u8 ReadUnpaired<u8>(u32 addr)
{
return PowerPC::Read_U8(addr);
}
template<> u16 ReadUnpaired<u16>(u32 addr)
{
return PowerPC::Read_U16(addr);
}
template<> u32 ReadUnpaired<u32>(u32 addr)
{
return PowerPC::Read_U32(addr);
}
template<typename T> static std::pair<T, T> ReadPair(u32 addr);
template<> std::pair<u8, u8> ReadPair<u8>(u32 addr)
{
u16 val = PowerPC::Read_U16(addr);
return { (u8)(val >> 8), (u8)val };
}
template<> std::pair<u16, u16> ReadPair<u16>(u32 addr)
{
u32 val = PowerPC::Read_U32(addr);
return { (u16)(val >> 16), (u16)val };
}
template<> std::pair<u32, u32> ReadPair<u32>(u32 addr)
{
u64 val = PowerPC::Read_U64(addr);
return { (u32)(val >> 32), (u32)val };
}
template<typename T> static void WriteUnpaired(T val, u32 addr);
template<> void WriteUnpaired<u8>(u8 val, u32 addr)
{
PowerPC::Write_U8(val, addr);
}
template<> void WriteUnpaired<u16>(u16 val, u32 addr)
{
PowerPC::Write_U16(val, addr);
}
template<> void WriteUnpaired<u32>(u32 val, u32 addr)
{
PowerPC::Write_U32(val, addr);
}
template<typename T> static void WritePair(T val1, T val2, u32 addr);
template<> void WritePair<u8>(u8 val1, u8 val2, u32 addr)
{
PowerPC::Write_U16(((u16)val1 << 8) | (u16)val2, addr);
}
template<> void WritePair<u16>(u16 val1, u16 val2, u32 addr)
{
PowerPC::Write_U32(((u32)val1 << 16) | (u32)val2, addr);
}
template<> void WritePair<u32>(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
}
template<typename T>
void QuantizeAndStore(double ps0, double ps1, u32 addr, u32 instW, u32 stScale)
{
typedef typename std::make_unsigned<T>::type U;
U convPS0 = (U)ScaleAndClamp<T>(ps0, stScale);
if (instW)
{
WriteUnpaired<U>(convPS0, addr);
}
else
{
U convPS1 = (U)ScaleAndClamp<T>(ps1, stScale);
WritePair<U>(convPS0, convPS1, addr);
}
}
void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW)
{
const UGQR gqr(rSPR(SPR_GQR0 + instI));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
double ps0 = rPS0(instRS);
double ps1 = rPS1(instRS);
switch (stType)
{
case QUANTIZE_FLOAT:
PowerPC::Write_U32(ConvertToSingleFTZ(*(u64*)&_fValue), _Addr);
break;
// used for THP player
case QUANTIZE_U8:
{
u32 convPS0 = ConvertToSingleFTZ(MathUtil::IntDouble(ps0).i);
if (instW)
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, 0.0f, 255.0f);
PowerPC::Write_U8((u8)fResult, _Addr);
WriteUnpaired<u32>(convPS0, addr);
}
else
{
u32 convPS1 = ConvertToSingleFTZ(MathUtil::IntDouble(ps1).i);
WritePair<u32>(convPS0, convPS1, addr);
}
break;
}
case QUANTIZE_U8:
QuantizeAndStore<u8>(ps0, ps1, addr, instW, stScale);
break;
case QUANTIZE_U16:
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, 0.0f, 65535.0f);
PowerPC::Write_U16((u16)fResult, _Addr);
}
QuantizeAndStore<u16>(ps0, ps1, addr, instW, stScale);
break;
case QUANTIZE_S8:
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, -128.0f, 127.0f);
PowerPC::Write_U8((u8)(s8)fResult, _Addr);
}
QuantizeAndStore<s8>(ps0, ps1, addr, instW, stScale);
break;
case QUANTIZE_S16:
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, -32768.0f, 32767.0f);
PowerPC::Write_U16((u16)(s16)fResult, _Addr);
}
QuantizeAndStore<s16>(ps0, ps1, addr, instW, stScale);
break;
default:
_dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
case QUANTIZE_INVALID1:
case QUANTIZE_INVALID2:
case QUANTIZE_INVALID3:
_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
break;
}
}
float Interpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale)
template<typename T>
std::pair<float, float> LoadAndDequantize(u32 addr, u32 instW, u32 ldScale)
{
// dequantize the value
float fResult;
switch (_quantizeType)
typedef typename std::make_unsigned<T>::type U;
float ps0, ps1;
if (instW)
{
U value = ReadUnpaired<U>(addr);
ps0 = (float)(T)(value) * m_dequantizeTable[ldScale];
ps1 = 1.0f;
}
else
{
std::pair<U, U> value = ReadPair<U>(addr);
ps0 = (float)(T)(value.first) * m_dequantizeTable[ldScale];
ps1 = (float)(T)(value.second) * m_dequantizeTable[ldScale];
}
return { ps0, ps1 };
}
void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW)
{
UGQR gqr(rSPR(SPR_GQR0 + instI));
EQuantizeType ldType = gqr.ld_type;
unsigned int ldScale = gqr.ld_scale;
float ps0, ps1;
switch (ldType)
{
case QUANTIZE_FLOAT:
if (instW)
{
u32 dwValue = PowerPC::Read_U32(_Addr);
fResult = *(float*)&dwValue;
u32 value = ReadUnpaired<u32>(addr);
ps0 = MathUtil::IntFloat(value).f;
ps1 = 1.0f;
}
else
{
std::pair<u32, u32> value = ReadPair<u32>(addr);
ps0 = MathUtil::IntFloat(value.first).f;
ps1 = MathUtil::IntFloat(value.second).f;
}
break;
case QUANTIZE_U8:
fResult = static_cast<float>(PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<u8>(addr, instW, ldScale);
break;
case QUANTIZE_U16:
fResult = static_cast<float>(PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<u16>(addr, instW, ldScale);
break;
case QUANTIZE_S8:
fResult = static_cast<float>((s8)PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<s8>(addr, instW, ldScale);
break;
// used for THP player
case QUANTIZE_S16:
fResult = static_cast<float>((s16)PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<s16>(addr, instW, ldScale);
break;
default:
_dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
fResult = 0;
case QUANTIZE_INVALID1:
case QUANTIZE_INVALID2:
case QUANTIZE_INVALID3:
_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
ps0 = 0.f;
ps1 = 0.f;
break;
}
return fResult;
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(instRD) = ps0;
rPS1(instRD) = ps1;
}
void Interpreter::psq_l(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = _inst.RA ?
(rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12;
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = 1.0f;
}
Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W);
}
void Interpreter::psq_lu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12;
Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W);
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = 1.0f;
return;
}
rGPR[_inst.RA] = EA;
}
void Interpreter::psq_st(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = _inst.RA ?
(rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12;
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W);
}
void Interpreter::psq_stu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12;
Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W);
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
@ -263,132 +311,36 @@ void Interpreter::psq_stu(UGeckoInstruction _inst)
void Interpreter::psq_lx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB];
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = 1.0f;
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx);
}
void Interpreter::psq_stx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB];
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx);
}
void Interpreter::psq_lux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB];
Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx);
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = 1.0f;
return;
}
rGPR[_inst.RA] = EA;
}
void Interpreter::psq_stux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB];
Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx);
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rGPR[_inst.RA] = EA;
} // namespace=======
}

View File

@ -27,22 +27,19 @@ using namespace ArmGen;
JitArmAsmRoutineManager asm_routines;
static void WriteDual32(u32 value1, u32 value2, u32 address)
static void WriteDual8(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U32(value1, address);
PowerPC::Write_U32(value2, address + 4);
PowerPC::Write_U16(((u16)(u8)val1 << 8) | (u16)(u8)val2, addr);
}
static void WriteDual16(u32 value1, u32 value2, u32 address)
static void WriteDual16(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U16(value1, address);
PowerPC::Write_U16(value2, address + 2);
PowerPC::Write_U32(((u32)(u16)val1 << 16) | (u32)(u16)val2, addr);
}
static void WriteDual8(u32 value1, u32 value2, u32 address)
static void WriteDual32(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U8(value1, address);
PowerPC::Write_U8(value2, address + 1);
PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
}
void JitArmAsmRoutineManager::Generate()

View File

@ -238,7 +238,7 @@ __forceinline static void WriteToHardware(u32 em_address, const T data)
{
// First, let's check for FIFO writes, since they are probably the most common
// reason we end up in this function:
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0xCC008000)
if (flag == FLAG_WRITE && em_address == 0xCC008000)
{
switch (sizeof(T))
{