Merge pull request #15180 from unknownbrackets/softjit-args

Refactor software renderer jit cache to be shared
This commit is contained in:
Henrik Rydgård 2021-11-28 22:12:29 +01:00 committed by GitHub
commit 2a9300698e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 800 additions and 533 deletions

View File

@ -1595,6 +1595,8 @@ set(GPU_SOURCES
GPU/Software/Rasterizer.h
GPU/Software/RasterizerRectangle.cpp
GPU/Software/RasterizerRectangle.h
GPU/Software/RasterizerRegCache.cpp
GPU/Software/RasterizerRegCache.h
GPU/Software/Sampler.cpp
GPU/Software/Sampler.h
GPU/Software/SoftGpu.cpp

View File

@ -458,6 +458,7 @@
<ClInclude Include="Software\FuncId.h" />
<ClInclude Include="Software\Rasterizer.h" />
<ClInclude Include="Software\RasterizerRectangle.h" />
<ClInclude Include="Software\RasterizerRegCache.h" />
<ClInclude Include="Software\Sampler.h" />
<ClInclude Include="Software\SoftGpu.h" />
<ClInclude Include="Software\TransformUnit.h" />
@ -636,6 +637,7 @@
<ClCompile Include="Software\FuncId.cpp" />
<ClCompile Include="Software\Rasterizer.cpp" />
<ClCompile Include="Software\RasterizerRectangle.cpp" />
<ClCompile Include="Software\RasterizerRegCache.cpp" />
<ClCompile Include="Software\Sampler.cpp" />
<ClCompile Include="Software\SamplerX86.cpp" />
<ClCompile Include="Software\SoftGpu.cpp" />

View File

@ -270,6 +270,9 @@
<ClInclude Include="Software\DrawPixel.h">
<Filter>Software</Filter>
</ClInclude>
<ClInclude Include="Software\RasterizerRegCache.h">
<Filter>Software</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
@ -545,5 +548,8 @@
<ClCompile Include="Software\DrawPixelX86.cpp">
<Filter>Software</Filter>
</ClCompile>
<ClCompile Include="Software\RasterizerRegCache.cpp">
<Filter>Software</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -64,6 +64,9 @@ public:
#if defined(_M_SSE)
__m128i ivec;
__m128 vec;
#elif PPSSPP_ARCH(ARM64)
int32x4_t ivec;
float32x4_t vec;
#endif
};
@ -76,6 +79,11 @@ public:
#if defined(_M_SSE)
Vec2(const __m128 &_vec) : vec(_vec) {}
Vec2(const __m128i &_ivec) : ivec(_ivec) {}
#elif PPSSPP_ARCH(ARM64)
Vec2(const float32x4_t &_vec) : vec(_vec) {}
#if !defined(_MSC_VER)
Vec2(const int32x4_t &_ivec) : ivec(_ivec) {}
#endif
#endif
template<typename T2>
@ -204,6 +212,9 @@ public:
#if defined(_M_SSE)
__m128i ivec;
__m128 vec;
#elif PPSSPP_ARCH(ARM64)
int32x4_t ivec;
float32x4_t vec;
#endif
};
@ -220,6 +231,14 @@ public:
Vec3(const Vec3Packed<T> &_xyz) {
vec = _mm_loadu_ps(_xyz.AsArray());
}
#elif PPSSPP_ARCH(ARM64)
Vec3(const float32x4_t &_vec) : vec(_vec) {}
#if !defined(_MSC_VER)
Vec3(const int32x4_t &_ivec) : ivec(_ivec) {}
#endif
Vec3(const Vec3Packed<T> &_xyz) {
vec = vld1q_f32(_xyz.AsArray());
}
#else
Vec3(const Vec3Packed<T> &_xyz) : x(_xyz.x), y(_xyz.y), z(_xyz.z) {}
#endif
@ -552,6 +571,9 @@ public:
#if defined(_M_SSE)
__m128i ivec;
__m128 vec;
#elif PPSSPP_ARCH(ARM64)
int32x4_t ivec;
float32x4_t vec;
#endif
};
@ -566,6 +588,11 @@ public:
#if defined(_M_SSE)
Vec4(const __m128 &_vec) : vec(_vec) {}
Vec4(const __m128i &_ivec) : ivec(_ivec) {}
#elif PPSSPP_ARCH(ARM64)
Vec4(const float32x4_t &_vec) : vec(_vec) {}
#if !defined(_MSC_VER)
Vec4(const int32x4_t &_ivec) : ivec(_ivec) {}
#endif
#endif
template<typename T2>

View File

@ -378,7 +378,7 @@ static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
}
template <bool clearMode, GEBufferFormat fbFormat>
void SOFTPIXEL_CALL DrawSinglePixel(int x, int y, int z, int fog, SOFTPIXEL_VEC4I color_in, const PixelFuncID &pixelID) {
void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) {
Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255);
// Depth range test - applied in clear mode, if not through mode.
if (pixelID.applyDepthRange)
@ -635,142 +635,4 @@ void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id) {
}
}
void PixelRegCache::Reset() {
regs.clear();
}
void PixelRegCache::Release(PixelRegCache::Reg r, PixelRegCache::Type t, PixelRegCache::Purpose p) {
RegStatus *status = FindReg(r, t);
if (status) {
_assert_msg_(status->locked > 0, "softjit Release() reg that isn't locked");
_assert_msg_(!status->forceLocked, "softjit Release() reg that is force locked");
status->purpose = p;
status->locked--;
return;
}
RegStatus newStatus;
newStatus.reg = r;
newStatus.purpose = p;
newStatus.type = t;
regs.push_back(newStatus);
}
void PixelRegCache::Unlock(PixelRegCache::Reg r, PixelRegCache::Type t) {
RegStatus *status = FindReg(r, t);
if (status) {
_assert_msg_(status->locked > 0, "softjit Unlock() reg that isn't locked");
status->locked--;
return;
}
_assert_msg_(false, "softjit Unlock() reg that isn't there");
}
bool PixelRegCache::Has(PixelRegCache::Purpose p, PixelRegCache::Type t) {
for (auto &reg : regs) {
if (reg.purpose == p && reg.type == t) {
return true;
}
}
return false;
}
PixelRegCache::Reg PixelRegCache::Find(PixelRegCache::Purpose p, PixelRegCache::Type t) {
for (auto &reg : regs) {
if (reg.purpose == p && reg.type == t) {
_assert_msg_(reg.locked <= 255, "softjit Find() reg has lots of locks");
reg.locked++;
return reg.reg;
}
}
_assert_msg_(false, "softjit Find() reg that isn't there (%d)", p);
return Reg(-1);
}
PixelRegCache::Reg PixelRegCache::Alloc(PixelRegCache::Purpose p, PixelRegCache::Type t) {
_assert_msg_(!Has(p, t), "softjit Alloc() reg duplicate");
RegStatus *best = nullptr;
for (auto &reg : regs) {
if (reg.locked != 0 || reg.forceLocked || reg.type != t)
continue;
if (best == nullptr)
best = &reg;
// Prefer a free/purposeless reg.
if (reg.purpose == INVALID || reg.purpose >= TEMP0) {
best = &reg;
break;
}
// But also prefer a lower priority reg.
if (reg.purpose < best->purpose)
best = &reg;
}
if (best) {
best->locked = 1;
best->purpose = p;
return best->reg;
}
_assert_msg_(false, "softjit Alloc() reg with none free (%d)", p);
return Reg();
}
void PixelRegCache::ForceLock(PixelRegCache::Purpose p, PixelRegCache::Type t, bool state) {
for (auto &reg : regs) {
if (reg.purpose == p && reg.type == t) {
reg.forceLocked = state;
return;
}
}
_assert_msg_(false, "softjit ForceLock() reg that isn't there");
}
void PixelRegCache::GrabReg(PixelRegCache::Reg r, PixelRegCache::Purpose p, PixelRegCache::Type t, bool &needsSwap, PixelRegCache::Reg swapReg) {
for (auto &reg : regs) {
if (reg.reg != r || reg.type != t)
continue;
// Easy version, it's free.
if (reg.locked == 0 && !reg.forceLocked) {
needsSwap = false;
reg.purpose = p;
reg.locked = 1;
return;
}
// Okay, we need to swap. Find that reg.
needsSwap = true;
RegStatus *swap = FindReg(swapReg, t);
if (swap) {
swap->purpose = reg.purpose;
swap->forceLocked = reg.forceLocked;
swap->locked = reg.locked;
} else {
RegStatus newStatus = reg;
newStatus.reg = swapReg;
regs.push_back(newStatus);
}
reg.purpose = p;
reg.locked = 1;
reg.forceLocked = false;
return;
}
_assert_msg_(false, "softjit GrabReg() reg that isn't there");
}
PixelRegCache::RegStatus *PixelRegCache::FindReg(PixelRegCache::Reg r, PixelRegCache::Type t) {
for (auto &reg : regs) {
if (reg.reg == r && reg.type == t) {
return &reg;
}
}
return nullptr;
}
};

View File

@ -22,37 +22,13 @@
#include <string>
#include <vector>
#include <unordered_map>
#if PPSSPP_ARCH(ARM)
#include "Common/ArmEmitter.h"
#elif PPSSPP_ARCH(ARM64)
#include "Common/Arm64Emitter.h"
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#include "Common/x64Emitter.h"
#elif PPSSPP_ARCH(MIPS)
#include "Common/MipsEmitter.h"
#else
#include "Common/FakeEmitter.h"
#endif
#include "GPU/Math3D.h"
#include "GPU/Software/FuncId.h"
#include "GPU/Software/RasterizerRegCache.h"
namespace Rasterizer {
#if PPSSPP_ARCH(AMD64) && PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__))
#define SOFTPIXEL_CALL __vectorcall
#define SOFTPIXEL_VEC4I __m128i
#define SOFTPIXEL_TO_VEC4I(x) (x).ivec
#elif PPSSPP_ARCH(AMD64)
#define SOFTPIXEL_CALL
#define SOFTPIXEL_VEC4I __m128i
#define SOFTPIXEL_TO_VEC4I(x) (x).ivec
#else
#define SOFTPIXEL_CALL
#define SOFTPIXEL_VEC4I const Math3D::Vec4<int> &
#define SOFTPIXEL_TO_VEC4I(x) (x)
#endif
typedef void (SOFTPIXEL_CALL *SingleFunc)(int x, int y, int z, int fog, SOFTPIXEL_VEC4I color_in, const PixelFuncID &pixelID);
typedef void (SOFTRAST_CALL *SingleFunc)(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID);
SingleFunc GetSingleFunc(const PixelFuncID &id);
void Init();
@ -60,62 +36,6 @@ void Shutdown();
bool DescribeCodePtr(const u8 *ptr, std::string &name);
struct PixelRegCache {
enum Purpose {
INVALID,
ZERO,
SRC_ALPHA,
GSTATE,
CONST_BASE,
STENCIL,
COLOR_OFF,
DEPTH_OFF,
// Above this can only be temps.
TEMP0,
TEMP1,
TEMP2,
TEMP3,
TEMP4,
TEMP5,
TEMP_HELPER,
};
enum Type {
T_GEN,
T_VEC,
};
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
typedef Gen::X64Reg Reg;
#else
typedef int Reg;
#endif
struct RegStatus {
Reg reg;
Purpose purpose;
Type type;
uint8_t locked = 0;
bool forceLocked = false;
};
void Reset();
void Release(Reg r, Type t, Purpose p = INVALID);
void Unlock(Reg r, Type t);
bool Has(Purpose p, Type t);
Reg Find(Purpose p, Type t);
Reg Alloc(Purpose p, Type t);
void ForceLock(Purpose p, Type t, bool state = true);
// For getting a specific reg. WARNING: May return a locked reg, so you have to check.
void GrabReg(Reg r, Purpose p, Type t, bool &needsSwap, Reg swapReg);
private:
RegStatus *FindReg(Reg r, Type t);
std::vector<RegStatus> regs;
};
struct PixelBlendState {
bool usesFactors = false;
bool usesDstAlpha = false;
@ -123,17 +43,7 @@ struct PixelBlendState {
};
void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id);
#if PPSSPP_ARCH(ARM)
class PixelJitCache : public ArmGen::ARMXCodeBlock {
#elif PPSSPP_ARCH(ARM64)
class PixelJitCache : public Arm64Gen::ARM64CodeBlock {
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
class PixelJitCache : public Gen::XCodeBlock {
#elif PPSSPP_ARCH(MIPS)
class PixelJitCache : public MIPSGen::MIPSCodeBlock {
#else
class PixelJitCache : public FakeGen::FakeXCodeBlock {
#endif
class PixelJitCache : public Rasterizer::CodeBlock {
public:
PixelJitCache();
@ -151,41 +61,41 @@ private:
Arm64Gen::ARM64FloatEmitter fp;
#endif
PixelRegCache::Reg GetGState();
PixelRegCache::Reg GetConstBase();
PixelRegCache::Reg GetZeroVec();
RegCache::Reg GetGState();
RegCache::Reg GetConstBase();
RegCache::Reg GetZeroVec();
// Note: these may require a temporary reg.
PixelRegCache::Reg GetColorOff(const PixelFuncID &id);
PixelRegCache::Reg GetDepthOff(const PixelFuncID &id);
PixelRegCache::Reg GetDestStencil(const PixelFuncID &id);
RegCache::Reg GetColorOff(const PixelFuncID &id);
RegCache::Reg GetDepthOff(const PixelFuncID &id);
RegCache::Reg GetDestStencil(const PixelFuncID &id);
bool Jit_ApplyDepthRange(const PixelFuncID &id);
bool Jit_AlphaTest(const PixelFuncID &id);
bool Jit_ApplyFog(const PixelFuncID &id);
bool Jit_ColorTest(const PixelFuncID &id);
bool Jit_StencilAndDepthTest(const PixelFuncID &id);
bool Jit_StencilTest(const PixelFuncID &id, PixelRegCache::Reg stencilReg, PixelRegCache::Reg maskedReg);
bool Jit_DepthTestForStencil(const PixelFuncID &id, PixelRegCache::Reg stencilReg);
bool Jit_ApplyStencilOp(const PixelFuncID &id, GEStencilOp op, PixelRegCache::Reg stencilReg);
bool Jit_WriteStencilOnly(const PixelFuncID &id, PixelRegCache::Reg stencilReg);
bool Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencilReg, RegCache::Reg maskedReg);
bool Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg stencilReg);
bool Jit_ApplyStencilOp(const PixelFuncID &id, GEStencilOp op, RegCache::Reg stencilReg);
bool Jit_WriteStencilOnly(const PixelFuncID &id, RegCache::Reg stencilReg);
bool Jit_DepthTest(const PixelFuncID &id);
bool Jit_WriteDepth(const PixelFuncID &id);
bool Jit_AlphaBlend(const PixelFuncID &id);
bool Jit_BlendFactor(const PixelFuncID &id, PixelRegCache::Reg factorReg, PixelRegCache::Reg dstReg, GEBlendSrcFactor factor);
bool Jit_DstBlendFactor(const PixelFuncID &id, PixelRegCache::Reg srcFactorReg, PixelRegCache::Reg dstFactorReg, PixelRegCache::Reg dstReg);
bool Jit_BlendFactor(const PixelFuncID &id, RegCache::Reg factorReg, RegCache::Reg dstReg, GEBlendSrcFactor factor);
bool Jit_DstBlendFactor(const PixelFuncID &id, RegCache::Reg srcFactorReg, RegCache::Reg dstFactorReg, RegCache::Reg dstReg);
bool Jit_Dither(const PixelFuncID &id);
bool Jit_WriteColor(const PixelFuncID &id);
bool Jit_ApplyLogicOp(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg maskReg);
bool Jit_ConvertTo565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
bool Jit_ConvertTo5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertTo4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertFrom565(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg);
bool Jit_ConvertFrom5551(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertFrom4444(const PixelFuncID &id, PixelRegCache::Reg colorReg, PixelRegCache::Reg temp1Reg, PixelRegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg maskReg);
bool Jit_ConvertTo565(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg);
bool Jit_ConvertTo5551(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertTo4444(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertFrom565(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg);
bool Jit_ConvertFrom5551(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
bool Jit_ConvertFrom4444(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha);
std::unordered_map<PixelFuncID, SingleFunc> cache_;
std::unordered_map<PixelFuncID, const u8 *> addresses_;
PixelRegCache regCache_;
RegCache regCache_;
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
void Discard();
@ -195,6 +105,7 @@ private:
std::vector<Gen::FixupBranch> discards_;
// Used in Jit_ApplyLogicOp() to skip the standard MOV/OR write.
std::vector<Gen::FixupBranch> skipStandardWrites_;
int stackIDOffset_ = 0;
bool colorIs16Bit_ = false;
#endif
};

File diff suppressed because it is too large Load Diff

View File

@ -906,7 +906,7 @@ void DrawTriangleSlice(
subp.x = p.x + (i & 1);
subp.y = p.y + (i / 2);
drawPixel(subp.x, subp.y, z[i], fog[i], SOFTPIXEL_TO_VEC4I(prim_color[i]), pixelID);
drawPixel(subp.x, subp.y, z[i], fog[i], ToVec4IntArg(prim_color[i]), pixelID);
}
}
}
@ -1054,7 +1054,7 @@ void DrawPoint(const VertexData &v0)
fog = ClampFogDepth(v0.fogdepth);
}
drawPixel(p.x, p.y, z, fog, SOFTPIXEL_TO_VEC4I(prim_color), pixelID);
drawPixel(p.x, p.y, z, fog, ToVec4IntArg(prim_color), pixelID);
}
void ClearRectangle(const VertexData &v0, const VertexData &v1)
@ -1344,7 +1344,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1)
ScreenCoords pprime = ScreenCoords((int)x, (int)y, (int)z);
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
drawPixel(p.x, p.y, z, fog, SOFTPIXEL_TO_VEC4I(prim_color), pixelID);
drawPixel(p.x, p.y, z, fog, ToVec4IntArg(prim_color), pixelID);
}
x += xinc;

View File

@ -193,7 +193,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) {
Vec4<int> prim_color = v1.color0;
Vec4<int> tex_color = Vec4<int>::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0));
prim_color = GetTextureFunctionOutput(prim_color, tex_color);
drawPixel(x, y, z, 255, SOFTPIXEL_TO_VEC4I(prim_color), pixelID);
drawPixel(x, y, z, 255, ToVec4IntArg(prim_color), pixelID);
s += ds;
}
t += dt;
@ -237,7 +237,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) {
for (int y = y1; y < y2; y++) {
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = v1.color0;
drawPixel(x, y, z, fog, SOFTPIXEL_TO_VEC4I(prim_color), pixelID);
drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
}
}
}, pos0.y, pos1.y, MIN_LINES_PER_THREAD);

View File

@ -0,0 +1,214 @@
// Copyright (c) 2021- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "GPU/Software/RasterizerRegCache.h"
namespace Rasterizer {
void RegCache::Reset(bool validate) {
if (validate) {
for (auto &reg : regs) {
_assert_msg_(reg.locked == 0, "softjit: Reset() with reg still locked (%04X)", reg.purpose);
_assert_msg_(!reg.forceRetained, "softjit: Reset() with reg force retained (%04X)", reg.purpose);
}
}
regs.clear();
}
void RegCache::Add(Reg r, Purpose p) {
for (auto &reg : regs) {
if (reg.reg == r && (reg.purpose & FLAG_GEN) == (p & FLAG_GEN)) {
_assert_msg_(false, "softjit Add() reg duplicate (%04X)", p);
}
}
_assert_msg_(r != REG_INVALID_VALUE, "softjit Add() invalid reg (%04X)", p);
RegStatus newStatus;
newStatus.reg = r;
newStatus.purpose = p;
regs.push_back(newStatus);
}
void RegCache::Change(Purpose history, Purpose destiny) {
for (auto &reg : regs) {
if (reg.purpose == history) {
reg.purpose = destiny;
return;
}
}
_assert_msg_(false, "softjit Change() reg that isn't there (%04X)", history);
}
void RegCache::Release(Reg &r, Purpose p) {
RegStatus *status = FindReg(r, p);
_assert_msg_(status != nullptr, "softjit Release() reg that isn't there (%04X)", p);
_assert_msg_(status->locked > 0, "softjit Release() reg that isn't locked (%04X)", p);
_assert_msg_(!status->forceRetained, "softjit Release() reg that is force retained (%04X)", p);
status->locked--;
if (status->locked == 0) {
if ((status->purpose & FLAG_GEN) != 0)
status->purpose = GEN_INVALID;
else
status->purpose = VEC_INVALID;
}
r = REG_INVALID_VALUE;
}
void RegCache::Unlock(Reg &r, Purpose p) {
RegStatus *status = FindReg(r, p);
if (status) {
_assert_msg_(status->locked > 0, "softjit Unlock() reg that isn't locked (%04X)", p);
status->locked--;
r = REG_INVALID_VALUE;
return;
}
_assert_msg_(false, "softjit Unlock() reg that isn't there (%04X)", p);
}
bool RegCache::Has(Purpose p) {
for (auto &reg : regs) {
if (reg.purpose == p) {
return true;
}
}
return false;
}
RegCache::Reg RegCache::Find(Purpose p) {
for (auto &reg : regs) {
if (reg.purpose == p) {
_assert_msg_(reg.locked <= 255, "softjit Find() reg has lots of locks (%04X)", p);
reg.locked++;
return reg.reg;
}
}
_assert_msg_(false, "softjit Find() reg that isn't there (%04X)", p);
return REG_INVALID_VALUE;
}
RegCache::Reg RegCache::Alloc(Purpose p) {
_assert_msg_(!Has(p), "softjit Alloc() reg duplicate (%04X)", p);
RegStatus *best = nullptr;
for (auto &reg : regs) {
if (reg.locked != 0 || reg.forceRetained)
continue;
// Needs to be the same type.
if ((reg.purpose & FLAG_GEN) != (p & FLAG_GEN))
continue;
if (best == nullptr)
best = &reg;
// Prefer a free/purposeless reg (includes INVALID.)
if ((reg.purpose & FLAG_TEMP) != 0) {
best = &reg;
break;
}
// But also prefer a lower priority reg.
if (reg.purpose < best->purpose)
best = &reg;
}
if (best) {
best->locked = 1;
best->purpose = p;
return best->reg;
}
_assert_msg_(false, "softjit Alloc() reg with none free (%04X)", p);
return REG_INVALID_VALUE;
}
void RegCache::ForceRetain(Purpose p) {
for (auto &reg : regs) {
if (reg.purpose == p) {
reg.forceRetained = true;
return;
}
}
_assert_msg_(false, "softjit ForceRetain() reg that isn't there (%04X)", p);
}
void RegCache::ForceRelease(Purpose p) {
for (auto &reg : regs) {
if (reg.purpose == p) {
_assert_msg_(reg.locked == 0, "softjit ForceRelease() while locked (%04X)", p);
reg.forceRetained = false;
if ((reg.purpose & FLAG_GEN) != 0)
reg.purpose = GEN_INVALID;
else
reg.purpose = VEC_INVALID;
return;
}
}
_assert_msg_(false, "softjit ForceRelease() reg that isn't there (%04X)", p);
}
void RegCache::GrabReg(Reg r, Purpose p, bool &needsSwap, Reg swapReg, Purpose swapPurpose) {
for (auto &reg : regs) {
if (reg.reg != r)
continue;
if ((reg.purpose & FLAG_GEN) != (p & FLAG_GEN))
continue;
// Easy version, it's free.
if (reg.locked == 0 && !reg.forceRetained) {
needsSwap = false;
reg.purpose = p;
reg.locked = 1;
return;
}
// Okay, we need to swap. Find that reg.
needsSwap = true;
RegStatus *swap = FindReg(swapReg, swapPurpose);
if (swap) {
swap->purpose = reg.purpose;
swap->forceRetained = reg.forceRetained;
swap->locked = reg.locked;
} else {
_assert_msg_(!Has(swapPurpose), "softjit GrabReg() wrong purpose (%04X)", swapPurpose);
RegStatus newStatus = reg;
newStatus.reg = swapReg;
regs.push_back(newStatus);
}
reg.purpose = p;
reg.locked = 1;
reg.forceRetained = false;
return;
}
_assert_msg_(false, "softjit GrabReg() reg that isn't there");
}
RegCache::RegStatus *RegCache::FindReg(Reg r, Purpose p) {
for (auto &reg : regs) {
if (reg.reg == r && reg.purpose == p) {
return &reg;
}
}
return nullptr;
}
};

View File

@ -0,0 +1,167 @@
// Copyright (c) 2021- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "ppsspp_config.h"
#include <cstdint>
#include <vector>
#if defined(_M_SSE)
#include <emmintrin.h>
#endif
#if PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#endif
#if PPSSPP_ARCH(ARM)
#include "Common/ArmEmitter.h"
#elif PPSSPP_ARCH(ARM64)
#include "Common/Arm64Emitter.h"
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#include "Common/x64Emitter.h"
#elif PPSSPP_ARCH(MIPS)
#include "Common/MipsEmitter.h"
#else
#include "Common/FakeEmitter.h"
#endif
#include "GPU/Math3D.h"
namespace Rasterizer {
// While not part of the reg cache proper, this is the type it is built for.
#if PPSSPP_ARCH(ARM)
typedef ArmGen::ARMXCodeBlock CodeBlock;
#elif PPSSPP_ARCH(ARM64)
typedef Arm64Gen::ARM64CodeBlock CodeBlock;
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
typedef Gen::XCodeBlock CodeBlock;
#elif PPSSPP_ARCH(MIPS)
typedef MIPSGen::MIPSCodeBlock CodeBlock;
#else
typedef FakeGen::FakeXCodeBlock CodeBlock;
#endif
// We also have the types of things that end up in regs.
#if PPSSPP_ARCH(ARM64)
typedef int32x4_t Vec4IntArg;
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
typedef __m128i Vec4IntArg;
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a.ivec; }
#else
typedef const Math3D::Vec4<int> &Vec4IntArg;
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a; }
#endif
#if PPSSPP_ARCH(AMD64) && PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER))
#define SOFTRAST_CALL __vectorcall
#else
#define SOFTRAST_CALL
#endif
struct RegCache {
enum Purpose {
FLAG_GEN = 0x0100,
FLAG_TEMP = 0x1000,
VEC_ZERO = 0x0000,
GEN_SRC_ALPHA = 0x0100,
GEN_GSTATE = 0x0101,
GEN_CONST_BASE = 0x0102,
GEN_STENCIL = 0x0103,
GEN_COLOR_OFF = 0x0104,
GEN_DEPTH_OFF = 0x0105,
GEN_ARG_X = 0x0180,
GEN_ARG_Y = 0x0181,
GEN_ARG_Z = 0x0182,
GEN_ARG_FOG = 0x0183,
GEN_ARG_ID = 0x0184,
VEC_ARG_COLOR = 0x0080,
VEC_ARG_MASK = 0x0081,
VEC_TEMP0 = 0x1000,
VEC_TEMP1 = 0x1001,
VEC_TEMP2 = 0x1002,
VEC_TEMP3 = 0x1003,
VEC_TEMP4 = 0x1004,
VEC_TEMP5 = 0x1005,
GEN_TEMP0 = 0x1100,
GEN_TEMP1 = 0x1101,
GEN_TEMP2 = 0x1102,
GEN_TEMP3 = 0x1103,
GEN_TEMP4 = 0x1104,
GEN_TEMP5 = 0x1105,
GEN_TEMP_HELPER = 0x1106,
VEC_INVALID = 0xFEFF,
GEN_INVALID = 0xFFFF,
};
#if PPSSPP_ARCH(ARM)
typedef ArmGen::ARMReg Reg;
static constexpr Reg REG_INVALID_VALUE = ArmGen::INVALID_REG;
#elif PPSSPP_ARCH(ARM64)
typedef Arm64Gen::ARM64Reg Reg;
static constexpr Reg REG_INVALID_VALUE = Arm64Gen::INVALID_REG;
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
typedef Gen::X64Reg Reg;
static constexpr Reg REG_INVALID_VALUE = Gen::INVALID_REG;
#elif PPSSPP_ARCH(MIPS)
typedef MIPSGen::MIPSReg Reg;
static constexpr Reg REG_INVALID_VALUE = MIPSGen::INVALID_REG;
#else
typedef int Reg;
static constexpr Reg REG_INVALID_VALUE = -1;
#endif
struct RegStatus {
Reg reg;
Purpose purpose;
uint8_t locked = 0;
bool forceRetained = false;
};
void Reset(bool validate);
void Add(Reg r, Purpose p);
void Change(Purpose history, Purpose destiny);
void Release(Reg &r, Purpose p);
void Unlock(Reg &r, Purpose p);
bool Has(Purpose p);
Reg Find(Purpose p);
Reg Alloc(Purpose p);
void ForceRetain(Purpose p);
void ForceRelease(Purpose p);
// For getting a specific reg. WARNING: May return a locked reg, so you have to check.
void GrabReg(Reg r, Purpose p, bool &needsSwap, Reg swapReg, Purpose swapPurpose);
private:
RegStatus *FindReg(Reg r, Purpose p);
std::vector<RegStatus> regs;
};
};

View File

@ -241,13 +241,14 @@ LinearFunc SamplerJitCache::GetLinear(const SamplerID &id) {
}
#if PPSSPP_ARCH(AMD64) && !PPSSPP_PLATFORM(UWP)
addresses_[id] = GetCodePointer();
LinearFunc func = CompileLinear(id);
cache_[id] = (NearestFunc)func;
return func;
#else
return nullptr;
if (g_Config.bSoftwareRenderingJit) {
addresses_[id] = GetCodePointer();
LinearFunc func = CompileLinear(id);
cache_[id] = (NearestFunc)func;
return func;
}
#endif
return nullptr;
}
template <unsigned int texel_size_bits>

View File

@ -20,19 +20,9 @@
#include "ppsspp_config.h"
#include <unordered_map>
#if PPSSPP_ARCH(ARM)
#include "Common/ArmEmitter.h"
#elif PPSSPP_ARCH(ARM64)
#include "Common/Arm64Emitter.h"
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#include "Common/x64Emitter.h"
#elif PPSSPP_ARCH(MIPS)
#include "Common/MipsEmitter.h"
#else
#include "Common/FakeEmitter.h"
#endif
#include "GPU/Math3D.h"
#include "GPU/Software/FuncId.h"
#include "GPU/Software/RasterizerRegCache.h"
namespace Sampler {
@ -58,17 +48,7 @@ void Shutdown();
bool DescribeCodePtr(const u8 *ptr, std::string &name);
#if PPSSPP_ARCH(ARM)
class SamplerJitCache : public ArmGen::ARMXCodeBlock {
#elif PPSSPP_ARCH(ARM64)
class SamplerJitCache : public Arm64Gen::ARM64CodeBlock {
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
class SamplerJitCache : public Gen::XCodeBlock {
#elif PPSSPP_ARCH(MIPS)
class SamplerJitCache : public MIPSGen::MIPSCodeBlock {
#else
class SamplerJitCache : public FakeGen::FakeXCodeBlock {
#endif
class SamplerJitCache : public Rasterizer::CodeBlock {
public:
SamplerJitCache();

View File

@ -431,6 +431,7 @@
<ClInclude Include="..\..\GPU\Software\Lighting.h" />
<ClInclude Include="..\..\GPU\Software\Rasterizer.h" />
<ClInclude Include="..\..\GPU\Software\RasterizerRectangle.h" />
<ClInclude Include="..\..\GPU\Software\RasterizerRegCache.h" />
<ClInclude Include="..\..\GPU\Software\Sampler.h" />
<ClInclude Include="..\..\GPU\Software\SoftGpu.h" />
<ClInclude Include="..\..\GPU\Software\TransformUnit.h" />
@ -491,6 +492,7 @@
<ClCompile Include="..\..\GPU\Software\Lighting.cpp" />
<ClCompile Include="..\..\GPU\Software\Rasterizer.cpp" />
<ClCompile Include="..\..\GPU\Software\RasterizerRectangle.cpp" />
<ClCompile Include="..\..\GPU\Software\RasterizerRegCache.cpp" />
<ClCompile Include="..\..\GPU\Software\Sampler.cpp" />
<ClCompile Include="..\..\GPU\Software\SoftGpu.cpp" />
<ClCompile Include="..\..\GPU\Software\TransformUnit.cpp" />

View File

@ -55,6 +55,7 @@
<ClCompile Include="..\..\GPU\Software\TransformUnit.cpp" />
<ClCompile Include="pch.cpp" />
<ClCompile Include="..\..\GPU\Software\RasterizerRectangle.cpp" />
<ClCompile Include="..\..\GPU\Software\RasterizerRegCache.cpp" />
<ClCompile Include="..\..\GPU\Common\FragmentShaderGenerator.cpp" />
<ClCompile Include="..\..\GPU\Common\VertexShaderGenerator.cpp" />
<ClCompile Include="..\..\GPU\Common\ReinterpretFramebuffer.cpp" />
@ -114,6 +115,7 @@
<ClInclude Include="pch.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="..\..\GPU\Software\RasterizerRectangle.h" />
<ClInclude Include="..\..\GPU\Software\RasterizerRegCache.h" />
<ClInclude Include="..\..\GPU\Common\FragmentShaderGenerator.h" />
<ClInclude Include="..\..\GPU\Common\VertexShaderGenerator.h" />
<ClInclude Include="..\..\GPU\Common\ReinterpretFramebuffer.h" />

View File

@ -369,6 +369,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/GPU/Software/Lighting.cpp \
$(SRC)/GPU/Software/Rasterizer.cpp.arm \
$(SRC)/GPU/Software/RasterizerRectangle.cpp.arm \
$(SRC)/GPU/Software/RasterizerRegCache.cpp \
$(SRC)/GPU/Software/Sampler.cpp \
$(SRC)/GPU/Software/SoftGpu.cpp \
$(SRC)/GPU/Software/TransformUnit.cpp \

View File

@ -356,6 +356,7 @@ SOURCES_CXX += \
$(GPUDIR)/Software/Lighting.cpp \
$(GPUDIR)/Software/Rasterizer.cpp \
$(GPUDIR)/Software/RasterizerRectangle.cpp \
$(GPUDIR)/Software/RasterizerRegCache.cpp \
$(GPUDIR)/GLES/DepalettizeShaderGLES.cpp \
$(GPUDIR)/GLES/DepthBufferGLES.cpp \
$(GPUDIR)/GLES/DrawEngineGLES.cpp \