mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 21:39:52 +00:00
samplerjit: Add an alternate profiling method.
This is more useful to group common operations together for profiling.
This commit is contained in:
parent
3aedea89eb
commit
28cfbe0e5a
@ -250,18 +250,35 @@ std::string SamplerJitCache::DescribeSamplerID(const SamplerID &id) {
|
||||
return name;
|
||||
}
|
||||
|
||||
std::string SamplerJitCache::DescribeCodePtr(const u8 *ptr) {
|
||||
ptrdiff_t dist = 0x7FFFFFFF;
|
||||
SamplerID found{};
|
||||
for (const auto &it : addresses_) {
|
||||
ptrdiff_t it_dist = ptr - it.second;
|
||||
if (it_dist >= 0 && it_dist < dist) {
|
||||
found = it.first;
|
||||
dist = it_dist;
|
||||
}
|
||||
}
|
||||
void SamplerJitCache::Describe(const std::string &message) {
|
||||
descriptions_[GetCodePointer()] = message;
|
||||
}
|
||||
|
||||
return DescribeSamplerID(found);
|
||||
std::string SamplerJitCache::DescribeCodePtr(const u8 *ptr) {
|
||||
constexpr bool USE_IDS = false;
|
||||
ptrdiff_t dist = 0x7FFFFFFF;
|
||||
if (USE_IDS) {
|
||||
SamplerID found{};
|
||||
for (const auto &it : addresses_) {
|
||||
ptrdiff_t it_dist = ptr - it.second;
|
||||
if (it_dist >= 0 && it_dist < dist) {
|
||||
found = it.first;
|
||||
dist = it_dist;
|
||||
}
|
||||
}
|
||||
|
||||
return DescribeSamplerID(found);
|
||||
} else {
|
||||
std::string found;
|
||||
for (const auto &it : descriptions_) {
|
||||
ptrdiff_t it_dist = ptr - it.first;
|
||||
if (it_dist >= 0 && it_dist < dist) {
|
||||
found = it.second;
|
||||
dist = it_dist;
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}
|
||||
}
|
||||
|
||||
NearestFunc SamplerJitCache::GetNearest(const SamplerID &id) {
|
||||
|
@ -73,6 +73,8 @@ private:
|
||||
NearestFunc Compile(const SamplerID &id);
|
||||
LinearFunc CompileLinear(const SamplerID &id);
|
||||
|
||||
void Describe(const std::string &message);
|
||||
|
||||
Rasterizer::RegCache::Reg GetZeroVec();
|
||||
Rasterizer::RegCache::Reg GetGState();
|
||||
|
||||
@ -116,6 +118,7 @@ private:
|
||||
|
||||
std::unordered_map<SamplerID, NearestFunc> cache_;
|
||||
std::unordered_map<SamplerID, const u8 *> addresses_;
|
||||
std::unordered_map<const u8 *, std::string> descriptions_;
|
||||
Rasterizer::RegCache regCache_;
|
||||
};
|
||||
|
||||
|
@ -45,6 +45,7 @@ NearestFunc SamplerJitCache::Compile(const SamplerID &id) {
|
||||
regCache_.ChangeReg(XMM0, RegCache::VEC_RESULT);
|
||||
|
||||
BeginWrite();
|
||||
Describe("Init");
|
||||
const u8 *start = AlignCode16();
|
||||
|
||||
// Early exit on !srcPtr.
|
||||
@ -87,6 +88,7 @@ NearestFunc SamplerJitCache::Compile(const SamplerID &id) {
|
||||
}
|
||||
regCache_.Unlock(vecResultReg, RegCache::VEC_RESULT);
|
||||
|
||||
Describe("Init");
|
||||
if (id.hasInvalidPtr) {
|
||||
SetJumpTarget(zeroSrc);
|
||||
}
|
||||
@ -105,6 +107,7 @@ alignas(16) static const float ones[4] = { 1.0f, 1.0f, 1.0f, 1.0f, };
|
||||
LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
_assert_msg_(id.linear, "Linear should be set on sampler id");
|
||||
BeginWrite();
|
||||
Describe("Init");
|
||||
|
||||
// Set the stackArgPos_ so we can use it in the nearest part too.
|
||||
#if PPSSPP_PLATFORM(WINDOWS)
|
||||
@ -155,6 +158,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Describe("Init");
|
||||
RET();
|
||||
|
||||
regCache_.ForceRelease(RegCache::VEC_ARG_U);
|
||||
@ -222,6 +226,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
|
||||
// Now the actual linear func, which is exposed externally.
|
||||
const u8 *start = AlignCode16();
|
||||
Describe("Init");
|
||||
|
||||
regCache_.SetupABI({
|
||||
RegCache::VEC_ARG_S,
|
||||
@ -321,6 +326,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
// Early exit on !srcPtr (either one.)
|
||||
FixupBranch zeroSrc;
|
||||
if (id.hasInvalidPtr) {
|
||||
Describe("NullCheck");
|
||||
X64Reg srcReg = regCache_.Find(RegCache::GEN_ARG_TEXPTR);
|
||||
|
||||
if (id.hasAnyMips) {
|
||||
@ -349,6 +355,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
regCache_.Unlock(vReg, vPurpose);
|
||||
};
|
||||
|
||||
Describe("DataOffsets");
|
||||
prepareDataOffsets(RegCache::VEC_ARG_U, RegCache::VEC_ARG_V);
|
||||
if (id.hasAnyMips)
|
||||
prepareDataOffsets(RegCache::VEC_U1, RegCache::VEC_V1);
|
||||
@ -418,12 +425,14 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
regCache_.Unlock(vecResultReg, level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
|
||||
};
|
||||
|
||||
Describe("Calls");
|
||||
doNearestCall(0, false);
|
||||
doNearestCall(4, false);
|
||||
doNearestCall(8, false);
|
||||
doNearestCall(12, false);
|
||||
|
||||
if (id.hasAnyMips) {
|
||||
Describe("MipsCalls");
|
||||
if (regCache_.Has(RegCache::GEN_ARG_LEVELFRAC)) {
|
||||
X64Reg levelFracReg = regCache_.Find(RegCache::GEN_ARG_LEVELFRAC);
|
||||
CMP(8, R(levelFracReg), Imm8(0));
|
||||
@ -465,6 +474,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
|
||||
success = success && Jit_BlendQuad(id, false);
|
||||
if (id.hasAnyMips) {
|
||||
Describe("BlendMips");
|
||||
if (!regCache_.Has(RegCache::GEN_ARG_LEVELFRAC)) {
|
||||
X64Reg levelFracReg = regCache_.Alloc(RegCache::GEN_ARG_LEVELFRAC);
|
||||
MOVZX(32, 8, levelFracReg, MDisp(RSP, stackArgPos_ + 24));
|
||||
@ -478,6 +488,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
|
||||
success = success && Jit_BlendQuad(id, true);
|
||||
|
||||
Describe("BlendMips");
|
||||
// First, broadcast the levelFrac value into an XMM.
|
||||
X64Reg fracReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
MOVD_xmm(fracReg, R(levelFracReg));
|
||||
@ -514,6 +525,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
success = success && Jit_ApplyTextureFunc(id);
|
||||
|
||||
// Last of all, convert to 32-bit channels.
|
||||
Describe("Init");
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXWD(XMM0, R(XMM0));
|
||||
} else {
|
||||
@ -574,6 +586,7 @@ RegCache::Reg SamplerJitCache::GetGState() {
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_BlendQuad(const SamplerID &id, bool level1) {
|
||||
Describe(level1 ? "BlendQuadMips" : "BlendQuad");
|
||||
// First put the top RRRRRRRR LLLLLLLL into topReg, bottom into bottomReg.
|
||||
// Start with XXXX XXXX RRRR LLLL, and then expand 8 bits to 16 bits.
|
||||
X64Reg topReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
@ -704,6 +717,7 @@ bool SamplerJitCache::Jit_ApplyTextureFunc(const SamplerID &id) {
|
||||
// Note: color is in DWORDs, but result is in WORDs.
|
||||
switch (id.TexFunc()) {
|
||||
case GE_TEXFUNC_MODULATE:
|
||||
Describe("Modulate");
|
||||
PACKSSDW(primColorReg, R(primColorReg));
|
||||
MOVDQA(tempReg, M(constOnes16_));
|
||||
PADDW(tempReg, R(primColorReg));
|
||||
@ -726,6 +740,7 @@ bool SamplerJitCache::Jit_ApplyTextureFunc(const SamplerID &id) {
|
||||
break;
|
||||
|
||||
case GE_TEXFUNC_DECAL:
|
||||
Describe("Decal");
|
||||
PACKSSDW(primColorReg, R(primColorReg));
|
||||
if (id.useTextureAlpha) {
|
||||
// Get alpha into the tempReg.
|
||||
@ -756,6 +771,7 @@ bool SamplerJitCache::Jit_ApplyTextureFunc(const SamplerID &id) {
|
||||
|
||||
case GE_TEXFUNC_BLEND:
|
||||
{
|
||||
Describe("EnvBlend");
|
||||
PACKSSDW(primColorReg, R(primColorReg));
|
||||
|
||||
// Start out with the prim color side. Materialize a 255 to inverse resultReg and round.
|
||||
@ -809,6 +825,7 @@ bool SamplerJitCache::Jit_ApplyTextureFunc(const SamplerID &id) {
|
||||
}
|
||||
|
||||
case GE_TEXFUNC_REPLACE:
|
||||
Describe("Replace");
|
||||
if (id.useColorDoubling && id.useTextureAlpha) {
|
||||
// We can abuse primColorReg as a temp.
|
||||
MOVDQA(primColorReg, R(resultReg));
|
||||
@ -832,6 +849,7 @@ bool SamplerJitCache::Jit_ApplyTextureFunc(const SamplerID &id) {
|
||||
case GE_TEXFUNC_UNKNOWN1:
|
||||
case GE_TEXFUNC_UNKNOWN2:
|
||||
case GE_TEXFUNC_UNKNOWN3:
|
||||
Describe("Add");
|
||||
PACKSSDW(primColorReg, R(primColorReg));
|
||||
if (id.useTextureAlpha) {
|
||||
MOVDQA(tempReg, M(constOnes16_));
|
||||
@ -946,6 +964,7 @@ bool SamplerJitCache::Jit_ReadTextureFormat(const SamplerID &id) {
|
||||
|
||||
// Note: afterward, srcReg points at the block, and uReg/vReg have offset into block.
|
||||
bool SamplerJitCache::Jit_GetDXT1Color(const SamplerID &id, int blockSize, int alpha) {
|
||||
Describe("DXT1");
|
||||
// Like Jit_GetTexData, this gets the color into resultReg.
|
||||
// Note: color low bits are red, high bits are blue.
|
||||
_assert_msg_(blockSize == 8 || blockSize == 16, "Invalid DXT block size");
|
||||
@ -1174,6 +1193,7 @@ bool SamplerJitCache::Jit_ApplyDXTAlpha(const SamplerID &id) {
|
||||
|
||||
bool success = false;
|
||||
if (fmt == GE_TFMT_DXT3) {
|
||||
Describe("DXT3A");
|
||||
X64Reg srcReg = regCache_.Find(RegCache::GEN_ARG_TEXPTR);
|
||||
X64Reg uReg = regCache_.Find(RegCache::GEN_ARG_U);
|
||||
X64Reg vReg = regCache_.Find(RegCache::GEN_ARG_V);
|
||||
@ -1203,6 +1223,7 @@ bool SamplerJitCache::Jit_ApplyDXTAlpha(const SamplerID &id) {
|
||||
regCache_.Unlock(vReg, RegCache::GEN_ARG_V);
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_V);
|
||||
} else if (fmt == GE_TFMT_DXT5) {
|
||||
Describe("DXT5A");
|
||||
X64Reg uReg = regCache_.Find(RegCache::GEN_ARG_U);
|
||||
X64Reg vReg = regCache_.Find(RegCache::GEN_ARG_V);
|
||||
|
||||
@ -1333,6 +1354,7 @@ bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) {
|
||||
return Jit_GetTexDataSwizzled(id, bitsPerTexel);
|
||||
}
|
||||
if (id.linear) {
|
||||
Describe("TexDataL");
|
||||
// We can throw away bufw immediately. Maybe even earlier?
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW);
|
||||
|
||||
@ -1380,6 +1402,7 @@ bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) {
|
||||
return success;
|
||||
}
|
||||
|
||||
Describe("TexData");
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
|
||||
|
||||
@ -1463,6 +1486,7 @@ bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) {
|
||||
|
||||
bool SamplerJitCache::Jit_GetTexDataSwizzled4(const SamplerID &id) {
|
||||
if (id.linear) {
|
||||
Describe("TexDataS4L");
|
||||
// We can throw away bufw immediately. Maybe even earlier?
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW);
|
||||
|
||||
@ -1493,6 +1517,7 @@ bool SamplerJitCache::Jit_GetTexDataSwizzled4(const SamplerID &id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Describe("TexDataS4");
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
|
||||
X64Reg uReg = regCache_.Find(RegCache::GEN_ARG_U);
|
||||
@ -1557,6 +1582,7 @@ bool SamplerJitCache::Jit_GetTexDataSwizzled(const SamplerID &id, int bitsPerTex
|
||||
|
||||
bool success = true;
|
||||
if (id.linear) {
|
||||
Describe("TexDataSL");
|
||||
// We can throw away bufw immediately. Maybe even earlier?
|
||||
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW);
|
||||
// We've also baked uReg into vReg.
|
||||
@ -1591,6 +1617,7 @@ bool SamplerJitCache::Jit_GetTexDataSwizzled(const SamplerID &id, int bitsPerTex
|
||||
return success;
|
||||
}
|
||||
|
||||
Describe("TexDataS");
|
||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
|
||||
@ -1672,6 +1699,7 @@ bool SamplerJitCache::Jit_GetTexDataSwizzled(const SamplerID &id, int bitsPerTex
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_GetTexelCoordsQuad(const SamplerID &id) {
|
||||
Describe("TexelQuad");
|
||||
// RCX ought to be free, it was either bufw or never used.
|
||||
bool success = regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||
_assert_msg_(success, "Should have RCX free");
|
||||
@ -1972,6 +2000,7 @@ bool SamplerJitCache::Jit_PrepareDataOffsets(const SamplerID &id, RegCache::Reg
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_PrepareDataDirectOffsets(const SamplerID &id, RegCache::Reg uReg, RegCache::Reg vReg, int bitsPerTexel) {
|
||||
Describe("DataOff");
|
||||
X64Reg bufwVecReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
if (!id.useStandardBufw || id.hasAnyMips) {
|
||||
// Spread bufw into each lane.
|
||||
@ -2040,6 +2069,7 @@ bool SamplerJitCache::Jit_PrepareDataDirectOffsets(const SamplerID &id, RegCache
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_PrepareDataSwizzledOffsets(const SamplerID &id, RegCache::Reg uReg, RegCache::Reg vReg, int bitsPerTexel) {
|
||||
Describe("DataOffS");
|
||||
// See Jit_GetTexDataSwizzled() for usage of this offset.
|
||||
|
||||
X64Reg bufwVecReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
@ -2122,6 +2152,7 @@ bool SamplerJitCache::Jit_PrepareDataSwizzledOffsets(const SamplerID &id, RegCac
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_Decode5650() {
|
||||
Describe("5650");
|
||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
|
||||
@ -2163,6 +2194,7 @@ bool SamplerJitCache::Jit_Decode5650() {
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_Decode5551() {
|
||||
Describe("5551");
|
||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
|
||||
@ -2205,6 +2237,7 @@ bool SamplerJitCache::Jit_Decode5551() {
|
||||
alignas(16) static const u32 color4444mask[4] = { 0xf00ff00f, 0xf00ff00f, 0xf00ff00f, 0xf00ff00f, };
|
||||
|
||||
bool SamplerJitCache::Jit_Decode4444() {
|
||||
Describe("4444");
|
||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||
X64Reg vecTemp1Reg = regCache_.Alloc(RegCache::VEC_TEMP1);
|
||||
X64Reg vecTemp2Reg = regCache_.Alloc(RegCache::VEC_TEMP2);
|
||||
@ -2236,6 +2269,7 @@ bool SamplerJitCache::Jit_Decode4444() {
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerIndex) {
|
||||
Describe("TrCLUT");
|
||||
GEPaletteFormat fmt = id.ClutFmt();
|
||||
if (!id.hasClutShift && !id.hasClutMask && !id.hasClutOffset) {
|
||||
// This is simple - just mask if necessary.
|
||||
@ -2297,6 +2331,7 @@ bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerInd
|
||||
}
|
||||
|
||||
bool SamplerJitCache::Jit_ReadClutColor(const SamplerID &id) {
|
||||
Describe("ReadCLUT");
|
||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||
|
||||
if (!id.useSharedClut) {
|
||||
|
Loading…
Reference in New Issue
Block a user