Replaced uses of x87 instructions by SSE instructions

git-svn-id: http://svn.purei.org/purei/trunk@201 b36208d7-6611-0410-8bec-b1987f11c4a2
This commit is contained in:
jpd002 2007-12-14 01:41:48 +00:00
parent c336504aab
commit 985fc1728b
9 changed files with 288 additions and 80 deletions

View File

@ -212,28 +212,28 @@ void CCOP_FPU::BC1TL()
//00
void CCOP_FPU::ADD_S()
{
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FPU_Add();
m_codeGen->FPU_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FP_Add();
m_codeGen->FP_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
}
//01
void CCOP_FPU::SUB_S()
{
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FPU_Sub();
m_codeGen->FPU_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FP_Sub();
m_codeGen->FP_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
}
//02
void CCOP_FPU::MUL_S()
{
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FPU_Mul();
m_codeGen->FPU_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FP_Mul();
m_codeGen->FP_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
}
//03
@ -251,10 +251,10 @@ void CCOP_FPU::DIV_S()
}
m_codeGen->BeginIfElseAlt();
{
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FPU_Div();
m_codeGen->FPU_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFT * 2]));
m_codeGen->FP_Div();
m_codeGen->FP_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
}
m_codeGen->EndIf();
}
@ -367,8 +367,8 @@ void CCOP_FPU::CVT_W_S()
{
//Load the rounding mode from FCSR?
//PS2 only supports truncate rounding mode
m_codeGen->FPU_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FPU_PullWordTruncate(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
m_codeGen->FP_PushSingle(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FP_PullWordTruncate(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
}
//32
@ -420,8 +420,8 @@ void CCOP_FPU::C_LE_S()
//20
void CCOP_FPU::CVT_S_W()
{
m_codeGen->FPU_PushWord(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FPU_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
m_codeGen->FP_PushWord(offsetof(CMIPS, m_State.nCOP10[m_nFS * 2]));
m_codeGen->FP_PullSingle(offsetof(CMIPS, m_State.nCOP10[m_nFD * 2]));
}
//////////////////////////////////////////////////

View File

@ -9,6 +9,7 @@
using namespace boost;
using namespace Framework;
using namespace std;
bool CCodeGen::m_nBlockStarted = false;
CCacheBlock* CCodeGen::m_pBlock = NULL;
@ -25,6 +26,7 @@ CX86Assembler CCodeGen::m_Assembler
);
bool CCodeGen::m_nRegisterAllocated[MAX_REGISTER];
bool CCodeGen::m_xmmRegisterAllocated[MAX_XMM_REGISTER];
CStream* CCodeGen::m_stream = NULL;
CX86Assembler::REGISTER CCodeGen::g_nBaseRegister = CX86Assembler::rBP;
@ -91,6 +93,16 @@ CX86Assembler::REGISTER CCodeGen::m_nRegisterLookupEx[MAX_REGISTER] =
#endif
CCodeGen::CCodeGen()
{
}
CCodeGen::~CCodeGen()
{
}
void CCodeGen::SetStream(CStream* stream)
{
m_stream = stream;
@ -112,6 +124,11 @@ void CCodeGen::Begin(CCacheBlock* pBlock)
{
m_nRegisterAllocated[i] = false;
}
for(unsigned int i = 0; i < MAX_XMM_REGISTER; i++)
{
m_xmmRegisterAllocated[i] = false;
}
}
void CCodeGen::End()
@ -332,6 +349,25 @@ void CCodeGen::FreeRegister(unsigned int nRegister)
m_nRegisterAllocated[nRegister] = false;
}
CCodeGen::XMMREGISTER CCodeGen::AllocateXmmRegister()
{
for(unsigned int i = 0; i < MAX_XMM_REGISTER; i++)
{
if(!m_xmmRegisterAllocated[i])
{
m_xmmRegisterAllocated[i] = true;
return static_cast<XMMREGISTER>(i);
}
}
throw runtime_error("All registers exhausted.");
}
void CCodeGen::FreeXmmRegister(XMMREGISTER registerId)
{
m_xmmRegisterAllocated[registerId] = false;
}
void CCodeGen::LoadVariableInRegister(unsigned int nRegister, uint32 nVariable)
{
//mov reg, dword ptr[Variable]

View File

@ -19,7 +19,9 @@ namespace CodeGen
class CCodeGen
{
public:
enum CONDITION
typedef CX86Assembler::XMMREGISTER XMMREGISTER;
enum CONDITION
{
CONDITION_EQ,
CONDITION_NE,
@ -41,6 +43,8 @@ public:
#ifdef AMD64
REGISTER64,
#endif
FP_SINGLE_RELATIVE,
FP_SINGLE_REGISTER,
};
enum ROUNDMODE
@ -53,6 +57,9 @@ public:
friend class CodeGen::CFPU;
CCodeGen();
virtual ~CCodeGen();
static void Begin(CCacheBlock*);
static void End();
@ -100,20 +107,17 @@ public:
static void Xor();
//FPU
void FPU_PushWord(size_t);
void FPU_PushSingle(size_t);
void FPU_PullWord(size_t);
void FPU_PullWordTruncate(size_t);
void FPU_PullSingle(size_t);
void FP_PushWord(size_t);
void FP_PushSingle(size_t);
void FP_PullWordTruncate(size_t);
void FP_PullSingle(size_t);
void FP_PushSingleReg(XMMREGISTER);
void FP_LoadSingleRelativeInRegister(XMMREGISTER, uint32);
void FPU_Add();
void FPU_Sub();
void FPU_Mul();
void FPU_Div();
void FPU_PushRoundingMode();
void FPU_PullRoundingMode();
void FPU_SetRoundingMode(ROUNDMODE);
void FP_Add();
void FP_Sub();
void FP_Mul();
void FP_Div();
void SetStream(Framework::CStream*);
static CX86Assembler m_Assembler;
@ -139,6 +143,15 @@ private:
#endif
};
enum MAX_XMM_REGISTER
{
#ifdef AMD64
MAX_XMM_REGISTER = 16,
#else
MAX_XMM_REGISTER = 8,
#endif
};
enum REL_REGISTER
{
REL_REGISTER = 5,
@ -178,6 +191,9 @@ private:
static void LoadConstantInRegister64(unsigned int, uint64);
#endif
XMMREGISTER AllocateXmmRegister();
void FreeXmmRegister(XMMREGISTER);
static void LoadConditionInRegister(unsigned int, CONDITION);
static void ReduceToRegister();
@ -247,6 +263,7 @@ private:
static unsigned int m_nRegisterLookup[MAX_REGISTER];
static CX86Assembler::REGISTER m_nRegisterLookupEx[MAX_REGISTER];
static CCacheBlock* m_pBlock;
static bool m_xmmRegisterAllocated[MAX_XMM_REGISTER];
static Framework::CStream* m_stream;
static CX86Assembler::REGISTER g_nBaseRegister;

View File

@ -1,5 +1,6 @@
#include <assert.h>
#include "CodeGen_FPU.h"
#include "CodeGen_StackPatterns.h"
#include "PtrMacro.h"
using namespace CodeGen;
@ -226,76 +227,129 @@ void CFPU::Round()
//New stuff
void CCodeGen::FPU_PushSingle(size_t offset)
void CCodeGen::FP_PushSingleReg(XMMREGISTER registerId)
{
m_Assembler.FldEd(CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)));
m_Shadow.Push(registerId);
m_Shadow.Push(FP_SINGLE_REGISTER);
}
void CCodeGen::FPU_PushWord(size_t offset)
void CCodeGen::FP_PushSingle(size_t offset)
{
m_Assembler.FildEd(CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)));
m_Shadow.Push(static_cast<uint32>(offset));
m_Shadow.Push(FP_SINGLE_RELATIVE);
}
void CCodeGen::FPU_PullSingle(size_t offset)
void CCodeGen::FP_LoadSingleRelativeInRegister(XMMREGISTER destination, uint32 source)
{
m_Assembler.FstpEd(CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)));
m_Assembler.MovssEd(destination,
CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, source));
}
void CCodeGen::FPU_PullWord(size_t offset)
void CCodeGen::FP_PushWord(size_t offset)
{
m_Assembler.FistpEd(CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)));
XMMREGISTER resultRegister = AllocateXmmRegister();
m_Assembler.Cvtsi2ssEd(resultRegister,
CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)));
FP_PushSingleReg(resultRegister);
}
void CCodeGen::FPU_PullWordTruncate(size_t offset)
void CCodeGen::FP_PullSingle(size_t offset)
{
m_Assembler.FisttpEd(CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)));
if(FitsPattern<SingleFpSingleRegister>())
{
XMMREGISTER valueRegister = static_cast<XMMREGISTER>(GetPattern<SingleFpSingleRegister>());
m_Assembler.MovssEd(CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)),
valueRegister);
FreeXmmRegister(valueRegister);
}
else
{
assert(0);
}
}
void CCodeGen::FPU_PushRoundingMode()
void CCodeGen::FP_PullWordTruncate(size_t offset)
{
m_Assembler.SubId(CX86Assembler::MakeRegisterAddress(CX86Assembler::rSP), 4);
m_Assembler.Fwait();
m_Assembler.FnstcwEw(CX86Assembler::MakeIndRegAddress(CX86Assembler::rSP));
if(FitsPattern<SingleFpSingleRelative>())
{
SingleFpSingleRelative::PatternValue op = GetPattern<SingleFpSingleRelative>();
unsigned int valueRegister = AllocateRegister();
m_Assembler.Cvttss2siEd(m_nRegisterLookupEx[valueRegister],
CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, op));
m_Assembler.MovGd(CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, static_cast<uint32>(offset)),
m_nRegisterLookupEx[valueRegister]);
FreeRegister(valueRegister);
}
else
{
assert(0);
}
}
void CCodeGen::FPU_PullRoundingMode()
void CCodeGen::FP_Add()
{
m_Assembler.FldcwEw(CX86Assembler::MakeIndRegAddress(CX86Assembler::rSP));
m_Assembler.AddId(CX86Assembler::MakeRegisterAddress(CX86Assembler::rSP), 4);
if(FitsPattern<DualFpSingleRelative>())
{
DualFpSingleRelative::PatternValue ops = GetPattern<DualFpSingleRelative>();
XMMREGISTER resultRegister = AllocateXmmRegister();
FP_LoadSingleRelativeInRegister(resultRegister, ops.first);
m_Assembler.AddssEd(resultRegister,
CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, ops.second));
FP_PushSingleReg(resultRegister);
}
else
{
assert(0);
}
}
void CCodeGen::FPU_SetRoundingMode(ROUNDMODE roundingMode)
void CCodeGen::FP_Sub()
{
//Load current control word
m_Assembler.SubId(CX86Assembler::MakeRegisterAddress(CX86Assembler::rSP), 4);
m_Assembler.Fwait();
m_Assembler.FnstcwEw(CX86Assembler::MakeIndRegAddress(CX86Assembler::rSP));
//Set new rounding mode
m_Assembler.AndId(CX86Assembler::MakeIndRegAddress(CX86Assembler::rSP),
0xFFFFF3FF);
m_Assembler.OrId(CX86Assembler::MakeIndRegAddress(CX86Assembler::rSP),
roundingMode << 10);
//Save control word
m_Assembler.FldcwEw(CX86Assembler::MakeIndRegAddress(CX86Assembler::rSP));
m_Assembler.AddId(CX86Assembler::MakeRegisterAddress(CX86Assembler::rSP), 4);
if(FitsPattern<DualFpSingleRelative>())
{
DualFpSingleRelative::PatternValue ops = GetPattern<DualFpSingleRelative>();
XMMREGISTER resultRegister = AllocateXmmRegister();
FP_LoadSingleRelativeInRegister(resultRegister, ops.first);
m_Assembler.SubssEd(resultRegister,
CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, ops.second));
FP_PushSingleReg(resultRegister);
}
else
{
assert(0);
}
}
void CCodeGen::FPU_Add()
void CCodeGen::FP_Mul()
{
m_Assembler.FaddpSt(1);
if(FitsPattern<DualFpSingleRelative>())
{
DualFpSingleRelative::PatternValue ops = GetPattern<DualFpSingleRelative>();
XMMREGISTER resultRegister = AllocateXmmRegister();
FP_LoadSingleRelativeInRegister(resultRegister, ops.first);
m_Assembler.MulssEd(resultRegister,
CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, ops.second));
FP_PushSingleReg(resultRegister);
}
else
{
assert(0);
}
}
void CCodeGen::FPU_Sub()
void CCodeGen::FP_Div()
{
m_Assembler.FsubpSt(1);
}
void CCodeGen::FPU_Mul()
{
m_Assembler.FmulpSt(1);
}
void CCodeGen::FPU_Div()
{
m_Assembler.FdivpSt(1);
if(FitsPattern<DualFpSingleRelative>())
{
DualFpSingleRelative::PatternValue ops = GetPattern<DualFpSingleRelative>();
XMMREGISTER resultRegister = AllocateXmmRegister();
FP_LoadSingleRelativeInRegister(resultRegister, ops.first);
m_Assembler.DivssEd(resultRegister,
CX86Assembler::MakeIndRegOffAddress(g_nBaseRegister, ops.second));
FP_PushSingleReg(resultRegister);
}
else
{
assert(0);
}
}

View File

@ -294,11 +294,15 @@ struct ZeroWithSomethingCommutative64
typedef GenericOneArgument<CCodeGen::RELATIVE> SingleRelative;
typedef GenericOneArgument<CCodeGen::REGISTER> SingleRegister;
typedef GenericOneArgument<CCodeGen::CONSTANT> SingleConstant;
typedef GenericOneArgument<CCodeGen::FP_SINGLE_REGISTER> SingleFpSingleRegister;
typedef GenericOneArgument<CCodeGen::FP_SINGLE_RELATIVE> SingleFpSingleRelative;
typedef GenericTwoArguments<CCodeGen::RELATIVE, CCodeGen::CONSTANT> RelativeConstant;
typedef GenericTwoArguments<CCodeGen::REGISTER, CCodeGen::CONSTANT> RegisterConstant;
typedef GenericTwoArguments<CCodeGen::CONSTANT, CCodeGen::RELATIVE> ConstantRelative;
typedef GenericTwoArguments<CCodeGen::CONSTANT, CCodeGen::CONSTANT> ConstantConstant;
typedef GenericTwoArguments<CCodeGen::RELATIVE, CCodeGen::RELATIVE> RelativeRelative;
typedef GenericTwoArguments<CCodeGen::FP_SINGLE_REGISTER, CCodeGen::FP_SINGLE_REGISTER> DualFpSingleRegister;
typedef GenericTwoArguments<CCodeGen::FP_SINGLE_RELATIVE, CCodeGen::FP_SINGLE_RELATIVE> DualFpSingleRelative;
typedef GenericCommutative<CCodeGen::REGISTER, CCodeGen::CONSTANT> CommutativeRegisterConstant;
typedef GenericCommutative<CCodeGen::RELATIVE, CCodeGen::CONSTANT> CommutativeRelativeConstant;
typedef GenericOneArgument64<CCodeGen::CONSTANT> SingleConstant64;

View File

@ -624,7 +624,7 @@ void CDMAC::SetRegister(uint32 nAddress, uint32 nData)
}
#ifdef _DEBUG
DisassembleSet(nAddress, nData);
// DisassembleSet(nAddress, nData);
#endif
#ifdef PROFILE

View File

@ -427,7 +427,7 @@ void CGSHandler::WriteRegisterImpl(uint8 nRegister, uint64 nData)
}
#ifdef _DEBUG
DisassembleWrite(nRegister, nData);
// DisassembleWrite(nRegister, nData);
#endif
}

View File

@ -28,6 +28,26 @@ public:
r15,
};
enum XMMREGISTER
{
xMM0 = 0,
xMM1,
xMM2,
xMM3,
xMM4,
xMM5,
xMM6,
xMM7,
xMM8,
xMM9,
xMM10,
xMM11,
xMM12,
xMM13,
xMM14,
xMM15,
};
typedef std::tr1::function<void (uint8)> WriteFunctionType;
typedef std::tr1::function<void (unsigned int, uint8)> WriteAtFunctionType;
typedef std::tr1::function<size_t ()> TellFunctionType;
@ -143,6 +163,16 @@ public:
void FnstcwEw(const CAddress&);
void FldcwEw(const CAddress&);
//SSE
void MovssEd(const CAddress&, XMMREGISTER);
void MovssEd(XMMREGISTER, const CAddress&);
void AddssEd(XMMREGISTER, const CAddress&);
void SubssEd(XMMREGISTER, const CAddress&);
void MulssEd(XMMREGISTER, const CAddress&);
void DivssEd(XMMREGISTER, const CAddress&);
void Cvtsi2ssEd(XMMREGISTER, const CAddress&);
void Cvttss2siEd(REGISTER, const CAddress&);
private:
struct LABELREF
{
@ -159,6 +189,7 @@ private:
void WriteEvGvOp(uint8, bool, const CAddress&, REGISTER);
void WriteEvId(uint8, const CAddress&, uint32);
void WriteEvIq(uint8, const CAddress&, uint64);
void WriteEdVdOp(uint8, const CAddress&, XMMREGISTER);
void WriteStOp(uint8, uint8, uint8);
void CreateLabelReference(LABEL, unsigned int);

View File

@ -69,6 +69,62 @@ void CX86Assembler::FldcwEw(const CAddress& address)
WriteEvOp(0xD9, 0x05, false, address);
}
void CX86Assembler::MovssEd(const CAddress& address, XMMREGISTER registerId)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEdVdOp(0x11, address, registerId);
}
void CX86Assembler::MovssEd(XMMREGISTER registerId, const CAddress& address)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEdVdOp(0x10, address, registerId);
}
void CX86Assembler::AddssEd(XMMREGISTER registerId, const CAddress& address)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEdVdOp(0x58, address, registerId);
}
void CX86Assembler::SubssEd(XMMREGISTER registerId, const CAddress& address)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEdVdOp(0x5C, address, registerId);
}
void CX86Assembler::MulssEd(XMMREGISTER registerId, const CAddress& address)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEdVdOp(0x59, address, registerId);
}
void CX86Assembler::DivssEd(XMMREGISTER registerId, const CAddress& address)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEdVdOp(0x5E, address, registerId);
}
void CX86Assembler::Cvtsi2ssEd(XMMREGISTER registerId, const CAddress& address)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEdVdOp(0x2A, address, registerId);
}
void CX86Assembler::Cvttss2siEd(REGISTER registerId, const CAddress& address)
{
WriteByte(0xF3);
WriteByte(0x0F);
WriteEvGvOp(0x2C, false, address, registerId);
}
void CX86Assembler::WriteStOp(uint8 opcode, uint8 subOpcode, uint8 stackId)
{
CAddress address;
@ -78,3 +134,13 @@ void CX86Assembler::WriteStOp(uint8 opcode, uint8 subOpcode, uint8 stackId)
WriteByte(opcode);
WriteByte(address.ModRm.nByte);
}
void CX86Assembler::WriteEdVdOp(uint8 opcode, const CAddress& address, XMMREGISTER xmmRegisterId)
{
REGISTER registerId = static_cast<REGISTER>(xmmRegisterId);
WriteRexByte(false, address, registerId);
CAddress NewAddress(address);
NewAddress.ModRm.nFnReg = registerId;
WriteByte(opcode);
NewAddress.Write(m_WriteFunction);
}