mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-28 07:50:49 +00:00
Merge branch 'master' into platform_openxr_pico
This commit is contained in:
commit
0c2120d596
@ -48,16 +48,16 @@ struct FrameData {
|
||||
std::condition_variable fenceCondVar;
|
||||
bool readyForFence = true;
|
||||
|
||||
VkFence fence;
|
||||
VkFence readbackFence; // Strictly speaking we might only need one global of these.
|
||||
VkFence fence = VK_NULL_HANDLE;
|
||||
VkFence readbackFence = VK_NULL_HANDLE; // Strictly speaking we might only need one global of these.
|
||||
|
||||
// These are on different threads so need separate pools.
|
||||
VkCommandPool cmdPoolInit; // Written to from main thread
|
||||
VkCommandPool cmdPoolMain; // Written to from render thread, which also submits
|
||||
VkCommandPool cmdPoolInit = VK_NULL_HANDLE; // Written to from main thread
|
||||
VkCommandPool cmdPoolMain = VK_NULL_HANDLE; // Written to from render thread, which also submits
|
||||
|
||||
VkCommandBuffer initCmd;
|
||||
VkCommandBuffer mainCmd;
|
||||
VkCommandBuffer presentCmd;
|
||||
VkCommandBuffer initCmd = VK_NULL_HANDLE;
|
||||
VkCommandBuffer mainCmd = VK_NULL_HANDLE;
|
||||
VkCommandBuffer presentCmd = VK_NULL_HANDLE;
|
||||
|
||||
bool hasInitCommands = false;
|
||||
bool hasMainCommands = false;
|
||||
@ -73,7 +73,7 @@ struct FrameData {
|
||||
|
||||
// Profiling.
|
||||
QueueProfileContext profile;
|
||||
bool profilingEnabled_;
|
||||
bool profilingEnabled_ = false;
|
||||
|
||||
void Init(VulkanContext *vulkan, int index);
|
||||
void Destroy(VulkanContext *vulkan);
|
||||
|
@ -109,6 +109,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
|
||||
CheckSetting(iniFile, gameID, "SplitFramebufferMargin", &flags_.SplitFramebufferMargin);
|
||||
CheckSetting(iniFile, gameID, "ForceLowerResolutionForEffectsOn", &flags_.ForceLowerResolutionForEffectsOn);
|
||||
CheckSetting(iniFile, gameID, "AllowDownloadCLUT", &flags_.AllowDownloadCLUT);
|
||||
CheckSetting(iniFile, gameID, "NearestFilteringOnFramebufferCreate", &flags_.NearestFilteringOnFramebufferCreate);
|
||||
}
|
||||
|
||||
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {
|
||||
|
@ -89,6 +89,7 @@ struct CompatFlags {
|
||||
bool SplitFramebufferMargin;
|
||||
bool ForceLowerResolutionForEffectsOn;
|
||||
bool AllowDownloadCLUT;
|
||||
bool NearestFilteringOnFramebufferCreate;
|
||||
};
|
||||
|
||||
struct VRCompat {
|
||||
|
@ -125,7 +125,7 @@ struct SceMpegLLI
|
||||
};
|
||||
|
||||
void SceMpegAu::read(u32 addr) {
|
||||
Memory::Memcpy(this, addr, sizeof(this), "SceMpegAu");
|
||||
Memory::Memcpy(this, addr, sizeof(*this), "SceMpegAu");
|
||||
pts = (pts & 0xFFFFFFFFULL) << 32 | (((u64)pts) >> 32);
|
||||
dts = (dts & 0xFFFFFFFFULL) << 32 | (((u64)dts) >> 32);
|
||||
}
|
||||
@ -133,7 +133,7 @@ void SceMpegAu::read(u32 addr) {
|
||||
void SceMpegAu::write(u32 addr) {
|
||||
pts = (pts & 0xFFFFFFFFULL) << 32 | (((u64)pts) >> 32);
|
||||
dts = (dts & 0xFFFFFFFFULL) << 32 | (((u64)dts) >> 32);
|
||||
Memory::Memcpy(addr, this, sizeof(this), "SceMpegAu");
|
||||
Memory::Memcpy(addr, this, sizeof(*this), "SceMpegAu");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -717,6 +717,16 @@ uint64_t DrawEngineCommon::ComputeHash() {
|
||||
return fullhash;
|
||||
}
|
||||
|
||||
// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
|
||||
inline uint32_t lowbias32_r(uint32_t x) {
|
||||
x ^= x >> 16;
|
||||
x *= 0x43021123U;
|
||||
x ^= x >> 15 ^ x >> 30;
|
||||
x *= 0x1d69e2a5U;
|
||||
x ^= x >> 16;
|
||||
return x;
|
||||
}
|
||||
|
||||
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
|
||||
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
@ -745,10 +755,10 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
|
||||
if (g_Config.bVertexCache) {
|
||||
u32 dhash = dcid_;
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)verts, 13);
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)inds, 13);
|
||||
dhash = __rotl(dhash ^ (u32)vertTypeID, 13);
|
||||
dhash = __rotl(dhash ^ (u32)vertexCount, 13);
|
||||
dcid_ = dhash ^ (u32)prim;
|
||||
dhash = __rotl(dhash ^ (u32)(uintptr_t)inds, 19);
|
||||
dhash = __rotl(dhash ^ (u32)vertTypeID, 7);
|
||||
dhash = __rotl(dhash ^ (u32)vertexCount, 11);
|
||||
dcid_ = lowbias32_r(dhash ^ (u32)prim);
|
||||
}
|
||||
|
||||
DeferredDrawCall &dc = drawCalls[numDrawCalls];
|
||||
|
@ -183,7 +183,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
WRITE(p, "int roundAndScaleTo255i(in highp float x) { return int(floor(x * 255.0 + 0.5)); }\n");
|
||||
}
|
||||
if (enableColorTest && !colorTestAgainstZero) {
|
||||
WRITE(p, "ivec3 roundAndScaleTo255iv(in highp vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
|
||||
WRITE(p, "uint roundAndScaleTo8x4(in highp vec3 x) { uvec3 u = uvec3(floor(x * 255.0 + 0.5)); return u.r | (u.g << 8) | (u.b << 16); }\n");
|
||||
WRITE(p, "uint packFloatsTo8x4(in vec3 x) { uvec3 u = uvec3(x); return u.r | (u.g << 8) | (u.b << 16); }\n");
|
||||
}
|
||||
|
||||
WRITE(p, "layout (location = 0, index = 0) out vec4 fragColor0;\n");
|
||||
@ -262,7 +263,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
}
|
||||
if (enableColorTest) {
|
||||
if (compat.shaderLanguage == HLSL_D3D11) {
|
||||
WRITE(p, "uvec3 roundAndScaleTo255iv(float3 x) { return (floor(x * 255.0f + 0.5f)); }\n");
|
||||
WRITE(p, "uint roundAndScaleTo8x4(float3 x) { uvec3 u = (floor(x * 255.0f + 0.5f)); return u.r | (u.g << 8) | (u.b << 16); }\n");
|
||||
WRITE(p, "uint packFloatsTo8x4(in vec3 x) { uvec3 u = uvec3(x); return u.r | (u.g << 8) | (u.b << 16); }\n");
|
||||
} else {
|
||||
WRITE(p, "vec3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n");
|
||||
}
|
||||
@ -354,7 +356,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
WRITE(p, "uniform vec4 u_alphacolorref;\n");
|
||||
if (compat.bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
|
||||
*uniformMask |= DIRTY_ALPHACOLORMASK;
|
||||
WRITE(p, "uniform ivec4 u_alphacolormask;\n");
|
||||
WRITE(p, "uniform uint u_alphacolormask;\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -408,7 +410,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
}
|
||||
if (enableColorTest && !colorTestAgainstZero) {
|
||||
if (compat.bitwiseOps) {
|
||||
WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
|
||||
WRITE(p, "uint roundAndScaleTo8x4(in vec3 x) { uvec3 u = uvec3(floor(x * 255.99)); return u.r | (u.g << 8) | (u.b << 16); }\n");
|
||||
WRITE(p, "uint packFloatsTo8x4(in vec3 x) { uvec3 u = uvec3(x); return u.r | (u.g << 8) | (u.b << 16); }\n");
|
||||
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
|
||||
WRITE(p, "vec3 roundTo255thv(in vec3 x) { vec3 y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
|
||||
} else {
|
||||
@ -458,6 +461,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
if (compat.bitwiseOps && enableColorTest) {
|
||||
p.C("uvec3 unpackUVec3(highp uint x) {\n");
|
||||
p.C(" return uvec3(x & 0xFF, (x >> 8) & 0xFF, (x >> 16) & 0xFF);\n");
|
||||
p.C("}\n");
|
||||
}
|
||||
|
||||
// PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one.
|
||||
if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) {
|
||||
WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n");
|
||||
@ -873,7 +882,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " };
|
||||
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
|
||||
if (compat.bitwiseOps) {
|
||||
WRITE(p, " if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s int(u_alphacolorref.a)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
|
||||
WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 24)) %s int(u_alphacolorref.a)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
|
||||
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
|
||||
// Work around bad PVR driver problem where equality check + discard just doesn't work.
|
||||
if (alphaTestFunc != GE_COMP_NOTEQUAL) {
|
||||
@ -927,34 +936,22 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
}
|
||||
} else {
|
||||
const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
|
||||
if (colorTestFuncs[colorTestFunc][0] != '#') {
|
||||
const char *test = colorTestFuncs[colorTestFunc];
|
||||
if (test[0] != '#') {
|
||||
// TODO: Unify these paths better.
|
||||
if (compat.shaderLanguage == HLSL_D3D11) {
|
||||
const char *test = colorTestFuncs[colorTestFunc];
|
||||
WRITE(p, " uvec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
|
||||
WRITE(p, " uvec3 v_masked = v_scaled & u_alphacolormask.rgb;\n");
|
||||
WRITE(p, " uvec3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n");
|
||||
// We have to test the components separately, or we get incorrect results. See #10629.
|
||||
WRITE(p, " if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) %s\n", test, test, test, discardStatement);
|
||||
} else if (compat.shaderLanguage == HLSL_D3D9) {
|
||||
const char *test = colorTestFuncs[colorTestFunc];
|
||||
if (compat.shaderLanguage == HLSL_D3D9) {
|
||||
// TODO: Use a texture to lookup bitwise ops instead?
|
||||
WRITE(p, " vec3 colortest = roundAndScaleTo255v(v.rgb);\n");
|
||||
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) %s\n", test, test, test, discardStatement);
|
||||
} else if (compat.bitwiseOps) {
|
||||
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
|
||||
if (compat.shaderLanguage == GLSL_VULKAN) {
|
||||
// Apparently GLES3 does not support vector bitwise ops, but Vulkan does?
|
||||
WRITE(p, " if ((v_scaled & u_alphacolormask.rgb) %s (u_alphacolorref.rgb & u_alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
|
||||
} else {
|
||||
const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)";
|
||||
const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)";
|
||||
WRITE(p, " if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement);
|
||||
}
|
||||
WRITE(p, " uint v_uint = roundAndScaleTo8x4(v.rgb);\n");
|
||||
WRITE(p, " uint v_masked = v_uint & u_alphacolormask;\n");
|
||||
WRITE(p, " uint colorTestRef = packFloatsTo8x4(u_alphacolorref.rgb) & u_alphacolormask;\n");
|
||||
WRITE(p, " if (v_masked %s colorTestRef) %s\n", test, discardStatement);
|
||||
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
|
||||
WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
|
||||
WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", test, discardStatement);
|
||||
} else {
|
||||
WRITE(p, " if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
|
||||
WRITE(p, " if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) %s\n", test, discardStatement);
|
||||
}
|
||||
} else {
|
||||
WRITE(p, " %s\n", discardStatement);
|
||||
|
@ -1061,7 +1061,11 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int
|
||||
|
||||
DrawTextureFlags flags;
|
||||
if (useBufferedRendering_ && vfb && vfb->fbo) {
|
||||
flags = channel == RASTER_COLOR ? DRAWTEX_LINEAR : DRAWTEX_NEAREST;
|
||||
if (channel == RASTER_DEPTH || PSP_CoreParameter().compat.flags().NearestFilteringOnFramebufferCreate) {
|
||||
flags = DRAWTEX_NEAREST;
|
||||
} else {
|
||||
flags = DRAWTEX_LINEAR;
|
||||
}
|
||||
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
|
||||
SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
|
||||
draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
|
||||
|
@ -80,7 +80,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
|
||||
Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
|
||||
Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
|
||||
ub->colorTestMask = gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_FOGCOLOR) {
|
||||
Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
|
||||
|
@ -17,10 +17,9 @@ enum : uint64_t {
|
||||
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
|
||||
};
|
||||
|
||||
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
|
||||
// Currently 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
|
||||
// Currently 480 bytes. Probably can't get to 256 (nVidia's UBO alignment, also common in other vendors).
|
||||
// Every line here is a 4-float.
|
||||
struct UB_VS_FS_Base {
|
||||
struct alignas(16) UB_VS_FS_Base {
|
||||
float proj[16];
|
||||
float proj_through[16];
|
||||
float view[12];
|
||||
@ -29,21 +28,19 @@ struct UB_VS_FS_Base {
|
||||
float uvScaleOffset[4];
|
||||
float depthRange[4];
|
||||
// Rotation is used only for software transform.
|
||||
float fogCoef[2]; float stencil; float rotation;
|
||||
float matAmbient[4];
|
||||
float cullRangeMin[4];
|
||||
float cullRangeMax[4];
|
||||
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
|
||||
uint32_t colorWriteMask; float mipBias;
|
||||
// Fragment data
|
||||
float fogColor[4];
|
||||
float texEnvColor[4]; // .w is unused
|
||||
float fogColor[4]; // .w is unused
|
||||
float texEnvColor[3]; uint32_t colorTestMask;
|
||||
int alphaColorRef[4];
|
||||
int colorTestMask[4];
|
||||
float blendFixA[4]; // .w is unused
|
||||
float blendFixB[4]; // .w is unused
|
||||
float blendFixA[3]; float stencil;
|
||||
float blendFixB[3]; float rotation;
|
||||
float texClamp[4];
|
||||
float texClampOffset[4]; // .zw are unused
|
||||
float texClampOffset[2]; float fogCoef[2];
|
||||
};
|
||||
|
||||
static const char * const ub_baseStr =
|
||||
@ -54,9 +51,6 @@ R"( mat4 u_proj;
|
||||
mat3x4 u_texmtx;
|
||||
vec4 u_uvscaleoffset;
|
||||
vec4 u_depthRange;
|
||||
vec2 u_fogcoef;
|
||||
float u_stencilReplaceValue;
|
||||
float u_rotation;
|
||||
vec4 u_matambientalpha;
|
||||
vec4 u_cullRangeMin;
|
||||
vec4 u_cullRangeMax;
|
||||
@ -66,17 +60,18 @@ R"( mat4 u_proj;
|
||||
float u_mipBias;
|
||||
vec3 u_fogcolor;
|
||||
vec3 u_texenv;
|
||||
uint u_alphacolormask;
|
||||
ivec4 u_alphacolorref;
|
||||
ivec4 u_alphacolormask;
|
||||
vec3 u_blendFixA;
|
||||
vec3 u_blendFixB;
|
||||
vec3 u_blendFixA; float u_stencilReplaceValue;
|
||||
vec3 u_blendFixB; float u_rotation;
|
||||
vec4 u_texclamp;
|
||||
vec2 u_texclampoff;
|
||||
vec2 u_fogcoef;
|
||||
)";
|
||||
|
||||
// 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand
|
||||
// them to float unnecessarily, could just as well expand in the shader.
|
||||
struct UB_VS_Lights {
|
||||
struct alignas(16) UB_VS_Lights {
|
||||
float ambientColor[4];
|
||||
float materialDiffuse[4];
|
||||
float materialSpecular[4];
|
||||
@ -129,7 +124,7 @@ R"( vec4 u_ambient;
|
||||
|
||||
// With some cleverness, we could get away with uploading just half this when only the four or five first
|
||||
// bones are being used. This is 384b.
|
||||
struct UB_VS_Bones {
|
||||
struct alignas(16) UB_VS_Bones {
|
||||
float bones[8][12];
|
||||
};
|
||||
|
||||
|
@ -168,6 +168,29 @@ void SoftwareTransform::SetProjMatrix(float mtx[14], bool invertedX, bool invert
|
||||
projMatrix_.translateAndScale(trans, scale);
|
||||
}
|
||||
|
||||
static void ReadWeightedNormal(Vec3f &source, VertexReader &reader, u32 vertType, bool skinningEnabled) {
|
||||
if (reader.hasNormal())
|
||||
reader.ReadNrm(source.AsArray());
|
||||
if (skinningEnabled) {
|
||||
float weights[8];
|
||||
reader.ReadWeights(weights);
|
||||
|
||||
// Have to recalculate this, unfortunately. Please use software skinning...
|
||||
Vec3f nsum(0, 0, 0);
|
||||
for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) {
|
||||
if (weights[i] != 0.0f) {
|
||||
Vec3f norm;
|
||||
Norm3ByMatrix43(norm.AsArray(), source.AsArray(), gstate.boneMatrix + i * 12);
|
||||
nsum += norm * weights[i];
|
||||
}
|
||||
}
|
||||
|
||||
source = nsum;
|
||||
}
|
||||
if (gstate.areNormalsReversed())
|
||||
source = -source;
|
||||
}
|
||||
|
||||
void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVtxFormat, int maxIndex, SoftwareTransformResult *result) {
|
||||
u8 *decoded = params_.decoded;
|
||||
TransformedVertex *transformed = params_.transformed;
|
||||
@ -284,7 +307,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt
|
||||
}
|
||||
} else {
|
||||
float weights[8];
|
||||
// TODO: For flat, are weights from the provoking used for color/normal?
|
||||
// For flat, we need the vertex weights.
|
||||
reader.Goto(index);
|
||||
reader.ReadWeights(weights);
|
||||
|
||||
@ -358,10 +381,8 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt
|
||||
|
||||
case GE_TEXMAP_TEXTURE_MATRIX:
|
||||
{
|
||||
// TODO: What's the correct behavior with flat shading? Provoked normal or real normal?
|
||||
|
||||
// Projection mapping
|
||||
Vec3f source;
|
||||
Vec3f source(0.0f, 0.0f, 1.0f);
|
||||
switch (gstate.getUVProjMode()) {
|
||||
case GE_PROJMAP_POSITION: // Use model space XYZ as source
|
||||
source = pos;
|
||||
@ -372,14 +393,28 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt
|
||||
break;
|
||||
|
||||
case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source
|
||||
source = normal.NormalizedOr001(cpu_info.bSSE4_1);
|
||||
// Flat uses the vertex normal, not provoking.
|
||||
if (provokeIndOffset == 0) {
|
||||
source = normal.Normalized(cpu_info.bSSE4_1);
|
||||
} else {
|
||||
reader.Goto(index);
|
||||
ReadWeightedNormal(source, reader, vertType, skinningEnabled);
|
||||
source.Normalize();
|
||||
}
|
||||
if (!reader.hasNormal()) {
|
||||
ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?");
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_PROJMAP_NORMAL: // Use non-normalized normal as source!
|
||||
source = normal;
|
||||
// Flat uses the vertex normal, not provoking.
|
||||
if (provokeIndOffset == 0) {
|
||||
source = normal;
|
||||
} else {
|
||||
// Need to read the normal for this vertex and weight it again..
|
||||
reader.Goto(index);
|
||||
ReadWeightedNormal(source, reader, vertType, skinningEnabled);
|
||||
}
|
||||
if (!reader.hasNormal()) {
|
||||
ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?");
|
||||
}
|
||||
|
@ -1231,9 +1231,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
||||
break;
|
||||
case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized transformed normal as source
|
||||
if ((doBezier || doSpline) && hasNormalTess)
|
||||
temp_tc = StringFromFormat("length(tess.nrm) == 0.0 ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(normalize(%stess.nrm), 1.0)", flipNormalTess ? "-" : "");
|
||||
temp_tc = StringFromFormat("length(tess.nrm) == 0.0 ? vec4(0.0, 0.0, 0.0, 1.0) : vec4(normalize(%stess.nrm), 1.0)", flipNormalTess ? "-" : "");
|
||||
else if (hasNormal)
|
||||
temp_tc = StringFromFormat("length(normal) == 0.0 ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(normalize(%snormal), 1.0)", flipNormal ? "-" : "");
|
||||
temp_tc = StringFromFormat("length(normal) == 0.0 ? vec4(0.0, 0.0, 0.0, 1.0) : vec4(normalize(%snormal), 1.0)", flipNormal ? "-" : "");
|
||||
else
|
||||
temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
|
||||
break;
|
||||
|
@ -448,7 +448,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
|
||||
SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
|
||||
}
|
||||
if (dirty & DIRTY_ALPHACOLORMASK) {
|
||||
SetColorUniform3iAlpha(render_, &u_alphacolormask, gstate.colortestmask, gstate.getAlphaTestMask());
|
||||
render_->SetUniformUI1(&u_alphacolormask, gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24));
|
||||
}
|
||||
if (dirty & DIRTY_COLORWRITEMASK) {
|
||||
render_->SetUniformUI1(&u_colorWriteMask, ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF)));
|
||||
|
@ -45,7 +45,7 @@ inline bool different_signs(float x, float y) {
|
||||
return ((x <= 0 && y > 0) || (x > 0 && y <= 0));
|
||||
}
|
||||
|
||||
inline float clip_dotprod(const VertexData &vert, float A, float B, float C, float D) {
|
||||
inline float clip_dotprod(const ClipVertexData &vert, float A, float B, float C, float D) {
|
||||
return (vert.clippos.x * A + vert.clippos.y * B + vert.clippos.z * C + vert.clippos.w * D);
|
||||
}
|
||||
|
||||
@ -131,7 +131,61 @@ static inline bool CheckOutsideZ(ClipCoords p, int &pos, int &neg) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner) {
|
||||
static void RotateUV(const VertexData &tl, const VertexData &br, VertexData &tr, VertexData &bl) {
|
||||
const int x1 = tl.screenpos.x;
|
||||
const int x2 = br.screenpos.x;
|
||||
const int y1 = tl.screenpos.y;
|
||||
const int y2 = br.screenpos.y;
|
||||
|
||||
if ((x1 < x2 && y1 > y2) || (x1 > x2 && y1 < y2)) {
|
||||
std::swap(bl.texturecoords, tr.texturecoords);
|
||||
}
|
||||
}
|
||||
|
||||
// This is used for rectangle texture projection, which is very uncommon.
|
||||
// To avoid complicating the common rectangle path, this just uses triangles.
|
||||
static void AddTriangleRect(const VertexData &v0, const VertexData &v1, BinManager &binner) {
|
||||
VertexData buf[4];
|
||||
buf[0] = v1;
|
||||
buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z);
|
||||
buf[0].texturecoords = v0.texturecoords;
|
||||
|
||||
buf[1] = v1;
|
||||
buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z);
|
||||
buf[1].texturecoords = Vec3Packed<float>(v0.texturecoords.x, v1.texturecoords.y, v0.texturecoords.z);
|
||||
|
||||
buf[2] = v1;
|
||||
buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z);
|
||||
buf[2].texturecoords = Vec3Packed<float>(v1.texturecoords.x, v0.texturecoords.y, v1.texturecoords.z);
|
||||
|
||||
buf[3] = v1;
|
||||
|
||||
VertexData *topleft = &buf[0];
|
||||
VertexData *topright = &buf[1];
|
||||
VertexData *bottomleft = &buf[2];
|
||||
VertexData *bottomright = &buf[3];
|
||||
|
||||
// DrawTriangle always culls, so sort out the drawing order.
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y)
|
||||
topleft = &buf[i];
|
||||
if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y)
|
||||
topright = &buf[i];
|
||||
if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y)
|
||||
bottomleft = &buf[i];
|
||||
if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y)
|
||||
bottomright = &buf[i];
|
||||
}
|
||||
|
||||
RotateUV(v0, v1, *topright, *bottomleft);
|
||||
|
||||
binner.AddTriangle(*topleft, *topright, *bottomleft);
|
||||
binner.AddTriangle(*bottomleft, *topright, *topleft);
|
||||
binner.AddTriangle(*topright, *bottomright, *bottomleft);
|
||||
binner.AddTriangle(*bottomleft, *bottomright, *topright);
|
||||
}
|
||||
|
||||
void ProcessRect(const ClipVertexData &v0, const ClipVertexData &v1, BinManager &binner) {
|
||||
if (!binner.State().throughMode) {
|
||||
// If any verts were outside range, throw the entire prim away.
|
||||
if (v0.OutsideRange() || v1.OutsideRange())
|
||||
@ -149,37 +203,44 @@ void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner)
|
||||
else if (outsidePos >= 2 || outsideNeg >= 2)
|
||||
return;
|
||||
|
||||
if (v0.fogdepth != v1.fogdepth) {
|
||||
if (v0.v.fogdepth != v1.v.fogdepth) {
|
||||
// Rectangles seem to always use nearest along X for fog depth, but reversed.
|
||||
// TODO: Check exactness of middle.
|
||||
VertexData vhalf0 = v1;
|
||||
vhalf0.screenpos.x = v0.screenpos.x + (v1.screenpos.x - v0.screenpos.x) / 2;
|
||||
VertexData vhalf0 = v1.v;
|
||||
vhalf0.screenpos.x = v0.v.screenpos.x + (v1.v.screenpos.x - v0.v.screenpos.x) / 2;
|
||||
|
||||
VertexData vhalf1 = v1;
|
||||
vhalf1.screenpos.x = v0.screenpos.x + (v1.screenpos.x - v0.screenpos.x) / 2;
|
||||
vhalf1.screenpos.y = v0.screenpos.y;
|
||||
VertexData vhalf1 = v1.v;
|
||||
vhalf1.screenpos.x = v0.v.screenpos.x + (v1.v.screenpos.x - v0.v.screenpos.x) / 2;
|
||||
vhalf1.screenpos.y = v0.v.screenpos.y;
|
||||
|
||||
VertexData vrev1 = v1;
|
||||
vrev1.fogdepth = v0.fogdepth;
|
||||
VertexData vrev1 = v1.v;
|
||||
vrev1.fogdepth = v0.v.fogdepth;
|
||||
|
||||
binner.AddRect(v0, vhalf0);
|
||||
binner.AddRect(vhalf1, vrev1);
|
||||
if (binner.State().textureProj) {
|
||||
AddTriangleRect(v0.v, vhalf0, binner);
|
||||
AddTriangleRect(vhalf1, vrev1, binner);
|
||||
} else {
|
||||
binner.AddRect(v0.v, vhalf0);
|
||||
binner.AddRect(vhalf1, vrev1);
|
||||
}
|
||||
} else if (binner.State().textureProj) {
|
||||
AddTriangleRect(v0.v, v1.v, binner);
|
||||
} else {
|
||||
binner.AddRect(v0, v1);
|
||||
binner.AddRect(v0.v, v1.v);
|
||||
}
|
||||
} else {
|
||||
// through mode handling
|
||||
if (Rasterizer::RectangleFastPath(v0, v1, binner)) {
|
||||
if (Rasterizer::RectangleFastPath(v0.v, v1.v, binner)) {
|
||||
return;
|
||||
} else if (gstate.isModeClear() && !gstate.isDitherEnabled()) {
|
||||
binner.AddClearRect(v0, v1);
|
||||
binner.AddClearRect(v0.v, v1.v);
|
||||
} else {
|
||||
binner.AddRect(v0, v1);
|
||||
binner.AddRect(v0.v, v1.v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessPoint(const VertexData &v0, BinManager &binner) {
|
||||
void ProcessPoint(const ClipVertexData &v0, BinManager &binner) {
|
||||
// If any verts were outside range, throw the entire prim away.
|
||||
if (!binner.State().throughMode) {
|
||||
if (v0.OutsideRange())
|
||||
@ -187,13 +248,13 @@ void ProcessPoint(const VertexData &v0, BinManager &binner) {
|
||||
}
|
||||
|
||||
// Points need no clipping. Will be bounds checked in the rasterizer (which seems backwards?)
|
||||
binner.AddPoint(v0);
|
||||
binner.AddPoint(v0.v);
|
||||
}
|
||||
|
||||
void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner) {
|
||||
void ProcessLine(const ClipVertexData &v0, const ClipVertexData &v1, BinManager &binner) {
|
||||
if (binner.State().throughMode) {
|
||||
// Actually, should clip this one too so we don't need to do bounds checks in the rasterizer.
|
||||
binner.AddLine(v0, v1);
|
||||
binner.AddLine(v0.v, v1.v);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -216,24 +277,26 @@ void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner)
|
||||
int mask1 = CalcClipMask(v1.clippos);
|
||||
int mask = mask0 | mask1;
|
||||
if ((mask & CLIP_NEG_Z_BIT) == 0) {
|
||||
binner.AddLine(v0, v1);
|
||||
binner.AddLine(v0.v, v1.v);
|
||||
return;
|
||||
}
|
||||
|
||||
VertexData ClippedVertices[2] = { v0, v1 };
|
||||
VertexData *Vertices[2] = { &ClippedVertices[0], &ClippedVertices[1] };
|
||||
ClipVertexData ClippedVertices[2] = { v0, v1 };
|
||||
ClipVertexData *Vertices[2] = { &ClippedVertices[0], &ClippedVertices[1] };
|
||||
bool clipped = false;
|
||||
CLIP_LINE(CLIP_NEG_Z_BIT, 0, 0, 1, 1);
|
||||
|
||||
VertexData data[2] = { *Vertices[0], *Vertices[1] };
|
||||
ClipVertexData data[2] = { *Vertices[0], *Vertices[1] };
|
||||
if (clipped) {
|
||||
data[0].screenpos = TransformUnit::ClipToScreen(data[0].clippos);
|
||||
data[1].screenpos = TransformUnit::ClipToScreen(data[1].clippos);
|
||||
data[0].v.screenpos = TransformUnit::ClipToScreen(data[0].clippos);
|
||||
data[1].v.screenpos = TransformUnit::ClipToScreen(data[1].clippos);
|
||||
data[0].v.clipw = data[0].clippos.w;
|
||||
data[1].v.clipw = data[1].clippos.w;
|
||||
}
|
||||
binner.AddLine(data[0], data[1]);
|
||||
binner.AddLine(data[0].v, data[1].v);
|
||||
}
|
||||
|
||||
void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const VertexData &provoking, BinManager &binner) {
|
||||
void ProcessTriangle(const ClipVertexData &v0, const ClipVertexData &v1, const ClipVertexData &v2, const ClipVertexData &provoking, BinManager &binner) {
|
||||
int mask = 0;
|
||||
if (!binner.State().throughMode) {
|
||||
// If any verts were outside range, throw the entire prim away.
|
||||
@ -262,20 +325,20 @@ void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexDat
|
||||
if ((mask & CLIP_NEG_Z_BIT) == 0) {
|
||||
if (gstate.getShadeMode() == GE_SHADE_FLAT) {
|
||||
// So that the order of clipping doesn't matter...
|
||||
VertexData corrected2 = v2;
|
||||
corrected2.color0 = provoking.color0;
|
||||
corrected2.color1 = provoking.color1;
|
||||
binner.AddTriangle(v0, v1, corrected2);
|
||||
VertexData corrected2 = v2.v;
|
||||
corrected2.color0 = provoking.v.color0;
|
||||
corrected2.color1 = provoking.v.color1;
|
||||
binner.AddTriangle(v0.v, v1.v, corrected2);
|
||||
} else {
|
||||
binner.AddTriangle(v0, v1, v2);
|
||||
binner.AddTriangle(v0.v, v1.v, v2.v);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
enum { NUM_CLIPPED_VERTICES = 3, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 };
|
||||
|
||||
VertexData* Vertices[NUM_INDICES];
|
||||
VertexData ClippedVertices[NUM_INDICES];
|
||||
ClipVertexData* Vertices[NUM_INDICES];
|
||||
ClipVertexData ClippedVertices[NUM_INDICES];
|
||||
for (int i = 0; i < NUM_INDICES; ++i)
|
||||
Vertices[i] = &ClippedVertices[i];
|
||||
|
||||
@ -319,22 +382,25 @@ void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexDat
|
||||
|
||||
for (int i = 0; i + 3 <= numIndices; i += 3) {
|
||||
if (indices[i] != SKIP_FLAG) {
|
||||
VertexData &subv0 = *Vertices[indices[i + 0]];
|
||||
VertexData &subv1 = *Vertices[indices[i + 1]];
|
||||
VertexData &subv2 = *Vertices[indices[i + 2]];
|
||||
ClipVertexData &subv0 = *Vertices[indices[i + 0]];
|
||||
ClipVertexData &subv1 = *Vertices[indices[i + 1]];
|
||||
ClipVertexData &subv2 = *Vertices[indices[i + 2]];
|
||||
if (clipped) {
|
||||
subv0.screenpos = TransformUnit::ClipToScreen(subv0.clippos);
|
||||
subv1.screenpos = TransformUnit::ClipToScreen(subv1.clippos);
|
||||
subv2.screenpos = TransformUnit::ClipToScreen(subv2.clippos);
|
||||
subv0.v.screenpos = TransformUnit::ClipToScreen(subv0.clippos);
|
||||
subv1.v.screenpos = TransformUnit::ClipToScreen(subv1.clippos);
|
||||
subv2.v.screenpos = TransformUnit::ClipToScreen(subv2.clippos);
|
||||
subv0.v.clipw = subv0.clippos.w;
|
||||
subv1.v.clipw = subv1.clippos.w;
|
||||
subv2.v.clipw = subv2.clippos.w;
|
||||
}
|
||||
|
||||
if (gstate.getShadeMode() == GE_SHADE_FLAT) {
|
||||
// So that the order of clipping doesn't matter...
|
||||
subv2.color0 = provoking.color0;
|
||||
subv2.color1 = provoking.color1;
|
||||
subv2.v.color0 = provoking.v.color0;
|
||||
subv2.v.color1 = provoking.v.color1;
|
||||
}
|
||||
|
||||
binner.AddTriangle(subv0, subv1, subv2);
|
||||
binner.AddTriangle(subv0.v, subv1.v, subv2.v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -26,9 +26,9 @@ class BinManager;
|
||||
|
||||
namespace Clipper {
|
||||
|
||||
void ProcessPoint(const VertexData &v0, BinManager &binner);
|
||||
void ProcessLine(const VertexData &v0, const VertexData &v1, BinManager &binner);
|
||||
void ProcessTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2, const VertexData &provoking, BinManager &binner);
|
||||
void ProcessRect(const VertexData &v0, const VertexData &v1, BinManager &binner);
|
||||
void ProcessPoint(const ClipVertexData &v0, BinManager &binner);
|
||||
void ProcessLine(const ClipVertexData &v0, const ClipVertexData &v1, BinManager &binner);
|
||||
void ProcessTriangle(const ClipVertexData &v0, const ClipVertexData &v1, const ClipVertexData &v2, const ClipVertexData &provoking, BinManager &binner);
|
||||
void ProcessRect(const ClipVertexData &v0, const ClipVertexData &v1, BinManager &binner);
|
||||
|
||||
}
|
||||
|
@ -129,6 +129,7 @@ void ComputeRasterizerState(RasterizerState *state) {
|
||||
state->mipFilt = gstate.isMipmapFilteringEnabled();
|
||||
state->minFilt = gstate.isMinifyFilteringEnabled();
|
||||
state->magFilt = gstate.isMagnifyFilteringEnabled();
|
||||
state->textureProj = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
|
||||
}
|
||||
|
||||
state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD;
|
||||
@ -224,12 +225,9 @@ static inline u8 ClampFogDepth(float fogdepth) {
|
||||
}
|
||||
|
||||
static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& v1, const float p, float &s, float &t) {
|
||||
// All UV gen modes, by the time they get here, behave the same.
|
||||
|
||||
// TODO: What happens if vertex has no texture coordinates?
|
||||
// Note that for environment mapping, texture coordinates have been calculated during lighting
|
||||
float q0 = 1.f / v0.clippos.w;
|
||||
float q1 = 1.f / v1.clippos.w;
|
||||
float q0 = 1.f / v0.clipw;
|
||||
float q1 = 1.f / v1.clipw;
|
||||
float wq0 = p * q0;
|
||||
float wq1 = (1.0f - p) * q1;
|
||||
|
||||
@ -238,14 +236,26 @@ static inline void GetTextureCoordinates(const VertexData& v0, const VertexData&
|
||||
t = (v0.texturecoords.t() * wq0 + v1.texturecoords.t() * wq1) * q_recip;
|
||||
}
|
||||
|
||||
static inline void GetTextureCoordinates(const VertexData &v0, const VertexData &v1, const VertexData &v2, const Vec4<int> &w0, const Vec4<int> &w1, const Vec4<int> &w2, const Vec4<float> &wsum_recip, Vec4<float> &s, Vec4<float> &t) {
|
||||
// All UV gen modes, by the time they get here, behave the same.
|
||||
static inline void GetTextureCoordinatesProj(const VertexData& v0, const VertexData& v1, const float p, float &s, float &t) {
|
||||
// This is for texture matrix projection.
|
||||
float q0 = 1.f / v0.clipw;
|
||||
float q1 = 1.f / v1.clipw;
|
||||
float wq0 = p * q0;
|
||||
float wq1 = (1.0f - p) * q1;
|
||||
|
||||
// TODO: What happens if vertex has no texture coordinates?
|
||||
float q_recip = 1.0f / (wq0 + wq1);
|
||||
float q = (v0.texturecoords.q() * wq0 + v1.texturecoords.q() * wq1) * q_recip;
|
||||
q_recip *= 1.0f / q;
|
||||
|
||||
s = (v0.texturecoords.s() * wq0 + v1.texturecoords.s() * wq1) * q_recip;
|
||||
t = (v0.texturecoords.t() * wq0 + v1.texturecoords.t() * wq1) * q_recip;
|
||||
}
|
||||
|
||||
static inline void GetTextureCoordinates(const VertexData &v0, const VertexData &v1, const VertexData &v2, const Vec4<int> &w0, const Vec4<int> &w1, const Vec4<int> &w2, const Vec4<float> &wsum_recip, Vec4<float> &s, Vec4<float> &t) {
|
||||
// Note that for environment mapping, texture coordinates have been calculated during lighting.
|
||||
float q0 = 1.f / v0.clippos.w;
|
||||
float q1 = 1.f / v1.clippos.w;
|
||||
float q2 = 1.f / v2.clippos.w;
|
||||
float q0 = 1.f / v0.clipw;
|
||||
float q1 = 1.f / v1.clipw;
|
||||
float q2 = 1.f / v2.clipw;
|
||||
Vec4<float> wq0 = w0.Cast<float>() * q0;
|
||||
Vec4<float> wq1 = w1.Cast<float>() * q1;
|
||||
Vec4<float> wq2 = w2.Cast<float>() * q2;
|
||||
@ -255,6 +265,23 @@ static inline void GetTextureCoordinates(const VertexData &v0, const VertexData
|
||||
t = Interpolate(v0.texturecoords.t(), v1.texturecoords.t(), v2.texturecoords.t(), wq0, wq1, wq2, q_recip);
|
||||
}
|
||||
|
||||
static inline void GetTextureCoordinatesProj(const VertexData &v0, const VertexData &v1, const VertexData &v2, const Vec4<int> &w0, const Vec4<int> &w1, const Vec4<int> &w2, const Vec4<float> &wsum_recip, Vec4<float> &s, Vec4<float> &t) {
|
||||
// This is for texture matrix projection.
|
||||
float q0 = 1.f / v0.clipw;
|
||||
float q1 = 1.f / v1.clipw;
|
||||
float q2 = 1.f / v2.clipw;
|
||||
Vec4<float> wq0 = w0.Cast<float>() * q0;
|
||||
Vec4<float> wq1 = w1.Cast<float>() * q1;
|
||||
Vec4<float> wq2 = w2.Cast<float>() * q2;
|
||||
|
||||
Vec4<float> q_recip = (wq0 + wq1 + wq2).Reciprocal();
|
||||
Vec4<float> q = Interpolate(v0.texturecoords.q(), v1.texturecoords.q(), v2.texturecoords.q(), wq0, wq1, wq2, q_recip);
|
||||
q_recip = q_recip * q.Reciprocal();
|
||||
|
||||
s = Interpolate(v0.texturecoords.s(), v1.texturecoords.s(), v2.texturecoords.s(), wq0, wq1, wq2, q_recip);
|
||||
t = Interpolate(v0.texturecoords.t(), v1.texturecoords.t(), v2.texturecoords.t(), wq0, wq1, wq2, q_recip);
|
||||
}
|
||||
|
||||
static inline void SetPixelDepth(int x, int y, int stride, u16 value) {
|
||||
depthbuf.Set16(x, y, stride, value);
|
||||
}
|
||||
@ -676,6 +703,9 @@ void DrawTriangleSlice(
|
||||
// For levels > 0, mipmapping is always based on level 0. Simpler to scale first.
|
||||
s *= 1.0f / (float)(1 << state.samplerID.width0Shift);
|
||||
t *= 1.0f / (float)(1 << state.samplerID.height0Shift);
|
||||
} else if (state.textureProj) {
|
||||
// Texture coordinate interpolation must definitely be perspective-correct.
|
||||
GetTextureCoordinatesProj(v0, v1, v2, w0, w1, w2, wsum_recip, s, t);
|
||||
} else {
|
||||
// Texture coordinate interpolation must definitely be perspective-correct.
|
||||
GetTextureCoordinates(v0, v1, v2, w0, w1, w2, wsum_recip, s, t);
|
||||
@ -772,8 +802,9 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &
|
||||
Vec2f stx(0.0f, 0.0f);
|
||||
Vec2f sty(0.0f, 0.0f);
|
||||
if (state.enableTextures) {
|
||||
Vec2f tc0 = v0.texturecoords;
|
||||
Vec2f tc1 = v1.texturecoords;
|
||||
// Note: texture projection is not handled here, those always turn into triangles.
|
||||
Vec2f tc0 = v0.texturecoords.uv();
|
||||
Vec2f tc1 = v1.texturecoords.uv();
|
||||
if (state.throughMode) {
|
||||
// For levels > 0, mipmapping is always based on level 0. Simpler to scale first.
|
||||
tc0.s() *= 1.0f / (float)(1 << state.samplerID.width0Shift);
|
||||
@ -960,6 +991,8 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta
|
||||
if (state.throughMode) {
|
||||
s *= 1.0f / (float)(1 << state.samplerID.width0Shift);
|
||||
t *= 1.0f / (float)(1 << state.samplerID.height0Shift);
|
||||
} else if (state.textureProj) {
|
||||
GetTextureCoordinatesProj(v0, v0, 0.0f, s, t);
|
||||
} else {
|
||||
// Texture coordinate interpolation must definitely be perspective-correct.
|
||||
GetTextureCoordinates(v0, v0, 0.0f, s, t);
|
||||
@ -1270,13 +1303,16 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range
|
||||
float s, s1;
|
||||
float t, t1;
|
||||
if (state.throughMode) {
|
||||
Vec2<float> tc = (v0.texturecoords * (float)(steps - i) + v1.texturecoords * (float)i) / steps1;
|
||||
Vec2<float> tc1 = (v0.texturecoords * (float)(steps - i - 1) + v1.texturecoords * (float)(i + 1)) / steps1;
|
||||
Vec2<float> tc = (v0.texturecoords.uv() * (float)(steps - i) + v1.texturecoords.uv() * (float)i) / steps1;
|
||||
Vec2<float> tc1 = (v0.texturecoords.uv() * (float)(steps - i - 1) + v1.texturecoords.uv() * (float)(i + 1)) / steps1;
|
||||
|
||||
s = tc.s() * (1.0f / (float)(1 << state.samplerID.width0Shift));
|
||||
s1 = tc1.s() * (1.0f / (float)(1 << state.samplerID.width0Shift));
|
||||
t = tc.t() * (1.0f / (float)(1 << state.samplerID.height0Shift));
|
||||
t1 = tc1.t() * (1.0f / (float)(1 << state.samplerID.height0Shift));
|
||||
} else if (state.textureProj) {
|
||||
GetTextureCoordinatesProj(v0, v1, (float)(steps - i) / steps1, s, t);
|
||||
GetTextureCoordinatesProj(v0, v1, (float)(steps - i - 1) / steps1, s1, t1);
|
||||
} else {
|
||||
// Texture coordinate interpolation must definitely be perspective-correct.
|
||||
GetTextureCoordinates(v0, v1, (float)(steps - i) / steps1, s, t);
|
||||
|
@ -54,6 +54,7 @@ struct RasterizerState {
|
||||
bool minFilt : 1;
|
||||
bool magFilt : 1;
|
||||
bool antialiasLines : 1;
|
||||
bool textureProj : 1;
|
||||
};
|
||||
|
||||
#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
|
||||
|
@ -356,7 +356,7 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
|
||||
// Currently only works for TL/BR, which is the most common but not required.
|
||||
bool orient_check = xdiff >= 0 && ydiff >= 0;
|
||||
// We already have a fast path for clear in ClearRectangle.
|
||||
bool state_check = state.throughMode && !state.pixelID.clearMode && !state.samplerID.hasAnyMips && NoClampOrWrap(state, v0.texturecoords) && NoClampOrWrap(state, v1.texturecoords);
|
||||
bool state_check = state.throughMode && !state.pixelID.clearMode && !state.samplerID.hasAnyMips && NoClampOrWrap(state, v0.texturecoords.uv()) && NoClampOrWrap(state, v1.texturecoords.uv());
|
||||
// This doesn't work well with offset drawing, see #15876. Through never has a subpixel offset.
|
||||
bool subpixel_check = ((v0.screenpos.x | v0.screenpos.y | v1.screenpos.x | v1.screenpos.y) & 0xF) == 0;
|
||||
if ((coord_check || !state.enableTextures) && orient_check && state_check && subpixel_check) {
|
||||
@ -393,16 +393,16 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool AreCoordsRectangleCompatible(const RasterizerState &state, const VertexData &data0, const VertexData &data1) {
|
||||
if (data1.color0 != data0.color0)
|
||||
static bool AreCoordsRectangleCompatible(const RasterizerState &state, const ClipVertexData &data0, const ClipVertexData &data1) {
|
||||
if (data1.v.color0 != data0.v.color0)
|
||||
return false;
|
||||
if (data1.screenpos.z != data0.screenpos.z) {
|
||||
if (data1.v.screenpos.z != data0.v.screenpos.z) {
|
||||
// Sometimes, we don't actually care about z.
|
||||
if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
|
||||
return false;
|
||||
}
|
||||
if (!state.throughMode) {
|
||||
if (data1.color1 != data0.color1)
|
||||
if (data1.v.color1 != data0.v.color1)
|
||||
return false;
|
||||
// This means it should be culled, outside range.
|
||||
if (data1.OutsideRange() || data0.OutsideRange())
|
||||
@ -414,26 +414,29 @@ static bool AreCoordsRectangleCompatible(const RasterizerState &state, const Ver
|
||||
if (data1.clippos.w - halftexel > data0.clippos.w || data1.clippos.w + halftexel < data0.clippos.w)
|
||||
return false;
|
||||
}
|
||||
if (state.pixelID.applyFog && data1.fogdepth != data0.fogdepth) {
|
||||
// If we're projecting textures, only allow an exact match for simplicity.
|
||||
if (state.enableTextures && data1.v.texturecoords.q() != data0.v.texturecoords.q())
|
||||
return false;
|
||||
if (state.pixelID.applyFog && data1.v.fogdepth != data0.v.fogdepth) {
|
||||
// Similar to w, this only matters if they're farther apart than 1/255.
|
||||
static constexpr float foghalfstep = 0.5f / 255.0f;
|
||||
if (data1.fogdepth - foghalfstep > data0.fogdepth || data1.fogdepth + foghalfstep < data0.fogdepth)
|
||||
if (data1.v.fogdepth - foghalfstep > data0.v.fogdepth || data1.v.fogdepth + foghalfstep < data0.v.fogdepth)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DetectRectangleFromStrip(const RasterizerState &state, const VertexData data[4], int *tlIndex, int *brIndex) {
|
||||
bool DetectRectangleFromStrip(const RasterizerState &state, const ClipVertexData data[4], int *tlIndex, int *brIndex) {
|
||||
// Color and Z must be flat. Also find the TL and BR meanwhile.
|
||||
int tl = 0, br = 0;
|
||||
for (int i = 1; i < 4; ++i) {
|
||||
if (!AreCoordsRectangleCompatible(state, data[i], data[0]))
|
||||
return false;
|
||||
|
||||
if (data[i].screenpos.x <= data[tl].screenpos.x && data[i].screenpos.y <= data[tl].screenpos.y)
|
||||
if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)
|
||||
tl = i;
|
||||
if (data[i].screenpos.x >= data[br].screenpos.x && data[i].screenpos.y >= data[br].screenpos.y)
|
||||
if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)
|
||||
br = i;
|
||||
}
|
||||
|
||||
@ -442,36 +445,36 @@ bool DetectRectangleFromStrip(const RasterizerState &state, const VertexData dat
|
||||
|
||||
// OK, now let's look at data to detect rectangles. There are a few possibilities
|
||||
// but we focus on Darkstalkers for now.
|
||||
if (data[0].screenpos.x == data[1].screenpos.x &&
|
||||
data[0].screenpos.y == data[2].screenpos.y &&
|
||||
data[2].screenpos.x == data[3].screenpos.x &&
|
||||
data[1].screenpos.y == data[3].screenpos.y) {
|
||||
if (data[0].v.screenpos.x == data[1].v.screenpos.x &&
|
||||
data[0].v.screenpos.y == data[2].v.screenpos.y &&
|
||||
data[2].v.screenpos.x == data[3].v.screenpos.x &&
|
||||
data[1].v.screenpos.y == data[3].v.screenpos.y) {
|
||||
// Okay, this is in the shape of a rectangle, but what about texture?
|
||||
if (!state.enableTextures)
|
||||
return true;
|
||||
|
||||
if (data[0].texturecoords.x == data[1].texturecoords.x &&
|
||||
data[0].texturecoords.y == data[2].texturecoords.y &&
|
||||
data[2].texturecoords.x == data[3].texturecoords.x &&
|
||||
data[1].texturecoords.y == data[3].texturecoords.y) {
|
||||
if (data[0].v.texturecoords.x == data[1].v.texturecoords.x &&
|
||||
data[0].v.texturecoords.y == data[2].v.texturecoords.y &&
|
||||
data[2].v.texturecoords.x == data[3].v.texturecoords.x &&
|
||||
data[1].v.texturecoords.y == data[3].v.texturecoords.y) {
|
||||
// It's a rectangle!
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// There's the other vertex order too...
|
||||
if (data[0].screenpos.x == data[2].screenpos.x &&
|
||||
data[0].screenpos.y == data[1].screenpos.y &&
|
||||
data[1].screenpos.x == data[3].screenpos.x &&
|
||||
data[2].screenpos.y == data[3].screenpos.y) {
|
||||
if (data[0].v.screenpos.x == data[2].v.screenpos.x &&
|
||||
data[0].v.screenpos.y == data[1].v.screenpos.y &&
|
||||
data[1].v.screenpos.x == data[3].v.screenpos.x &&
|
||||
data[2].v.screenpos.y == data[3].v.screenpos.y) {
|
||||
// Okay, this is in the shape of a rectangle, but what about texture?
|
||||
if (!state.enableTextures)
|
||||
return true;
|
||||
|
||||
if (data[0].texturecoords.x == data[2].texturecoords.x &&
|
||||
data[0].texturecoords.y == data[1].texturecoords.y &&
|
||||
data[1].texturecoords.x == data[3].texturecoords.x &&
|
||||
data[2].texturecoords.y == data[3].texturecoords.y) {
|
||||
if (data[0].v.texturecoords.x == data[2].v.texturecoords.x &&
|
||||
data[0].v.texturecoords.y == data[1].v.texturecoords.y &&
|
||||
data[1].v.texturecoords.x == data[3].v.texturecoords.x &&
|
||||
data[2].v.texturecoords.y == data[3].v.texturecoords.y) {
|
||||
// It's a rectangle!
|
||||
return true;
|
||||
}
|
||||
@ -480,7 +483,7 @@ bool DetectRectangleFromStrip(const RasterizerState &state, const VertexData dat
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DetectRectangleFromFan(const RasterizerState &state, const VertexData *data, int c, int *tlIndex, int *brIndex) {
|
||||
bool DetectRectangleFromFan(const RasterizerState &state, const ClipVertexData *data, int c, int *tlIndex, int *brIndex) {
|
||||
// Color and Z must be flat.
|
||||
for (int i = 1; i < c; ++i) {
|
||||
if (!AreCoordsRectangleCompatible(state, data[i], data[0]))
|
||||
@ -489,8 +492,8 @@ bool DetectRectangleFromFan(const RasterizerState &state, const VertexData *data
|
||||
|
||||
// Check for the common case: a single TL-TR-BR-BL.
|
||||
if (c == 4) {
|
||||
const auto &pos0 = data[0].screenpos, &pos1 = data[1].screenpos;
|
||||
const auto &pos2 = data[2].screenpos, &pos3 = data[3].screenpos;
|
||||
const auto &pos0 = data[0].v.screenpos, &pos1 = data[1].v.screenpos;
|
||||
const auto &pos2 = data[2].v.screenpos, &pos3 = data[3].v.screenpos;
|
||||
if (pos0.x == pos3.x && pos1.x == pos2.x && pos0.y == pos1.y && pos3.y == pos2.y) {
|
||||
// Looking like yes. Set TL/BR based on y order first...
|
||||
*tlIndex = pos0.y > pos3.y ? 2 : 0;
|
||||
@ -505,13 +508,13 @@ bool DetectRectangleFromFan(const RasterizerState &state, const VertexData *data
|
||||
if (!state.enableTextures)
|
||||
return true;
|
||||
|
||||
const auto &textl = data[*tlIndex].texturecoords, &textr = data[*tlIndex ^ 1].texturecoords;
|
||||
const auto &texbl = data[*brIndex ^ 1].texturecoords, &texbr = data[*brIndex].texturecoords;
|
||||
const auto &textl = data[*tlIndex].v.texturecoords, &textr = data[*tlIndex ^ 1].v.texturecoords;
|
||||
const auto &texbl = data[*brIndex ^ 1].v.texturecoords, &texbr = data[*brIndex].v.texturecoords;
|
||||
|
||||
if (textl.x == texbl.x && textr.x == texbr.x && textl.y == textr.y && texbl.y == texbr.y) {
|
||||
// Okay, the texture is also good, but let's avoid rotation issues.
|
||||
const auto &postl = data[*tlIndex].screenpos;
|
||||
const auto &posbr = data[*brIndex].screenpos;
|
||||
const auto &postl = data[*tlIndex].v.screenpos;
|
||||
const auto &posbr = data[*brIndex].v.screenpos;
|
||||
return textl.y < texbr.y && postl.y < posbr.y && textl.x < texbr.x && postl.x < posbr.x;
|
||||
}
|
||||
}
|
||||
@ -520,26 +523,26 @@ bool DetectRectangleFromFan(const RasterizerState &state, const VertexData *data
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DetectRectangleFromPair(const RasterizerState &state, const VertexData data[6], int *tlIndex, int *brIndex) {
|
||||
bool DetectRectangleFromPair(const RasterizerState &state, const ClipVertexData data[6], int *tlIndex, int *brIndex) {
|
||||
// Color and Z must be flat. Also find the TL and BR meanwhile.
|
||||
int tl = 0, br = 0;
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
if (!AreCoordsRectangleCompatible(state, data[i], data[0]))
|
||||
return false;
|
||||
|
||||
if (data[i].screenpos.x <= data[tl].screenpos.x && data[i].screenpos.y <= data[tl].screenpos.y)
|
||||
if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)
|
||||
tl = i;
|
||||
if (data[i].screenpos.x >= data[br].screenpos.x && data[i].screenpos.y >= data[br].screenpos.y)
|
||||
if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)
|
||||
br = i;
|
||||
}
|
||||
|
||||
*tlIndex = tl;
|
||||
*brIndex = br;
|
||||
|
||||
auto xat = [&](int i) { return data[i].screenpos.x; };
|
||||
auto yat = [&](int i) { return data[i].screenpos.y; };
|
||||
auto uat = [&](int i) { return data[i].texturecoords.x; };
|
||||
auto vat = [&](int i) { return data[i].texturecoords.y; };
|
||||
auto xat = [&](int i) { return data[i].v.screenpos.x; };
|
||||
auto yat = [&](int i) { return data[i].v.screenpos.y; };
|
||||
auto uat = [&](int i) { return data[i].v.texturecoords.x; };
|
||||
auto vat = [&](int i) { return data[i].v.texturecoords.y; };
|
||||
|
||||
// A likely order would be: TL, TR, BR, TL, BR, BL. We'd have the last index of each.
|
||||
// TODO: Make more generic.
|
||||
@ -567,12 +570,12 @@ bool DetectRectangleFromPair(const RasterizerState &state, const VertexData data
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DetectRectangleThroughModeSlices(const RasterizerState &state, const VertexData data[4]) {
|
||||
bool DetectRectangleThroughModeSlices(const RasterizerState &state, const ClipVertexData data[4]) {
|
||||
// Color and Z must be flat.
|
||||
for (int i = 1; i < 4; ++i) {
|
||||
if (!(data[i].color0 == data[0].color0))
|
||||
if (!(data[i].v.color0 == data[0].v.color0))
|
||||
return false;
|
||||
if (!(data[i].screenpos.z == data[0].screenpos.z)) {
|
||||
if (!(data[i].v.screenpos.z == data[0].v.screenpos.z)) {
|
||||
// Sometimes, we don't actually care about z.
|
||||
if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
|
||||
return false;
|
||||
@ -580,15 +583,15 @@ bool DetectRectangleThroughModeSlices(const RasterizerState &state, const Vertex
|
||||
}
|
||||
|
||||
// Games very commonly use vertical strips of rectangles. Detect and combine.
|
||||
const auto &tl1 = data[0].screenpos, &br1 = data[1].screenpos;
|
||||
const auto &tl2 = data[2].screenpos, &br2 = data[3].screenpos;
|
||||
const auto &tl1 = data[0].v.screenpos, &br1 = data[1].v.screenpos;
|
||||
const auto &tl2 = data[2].v.screenpos, &br2 = data[3].v.screenpos;
|
||||
if (tl1.y == tl2.y && br1.y == br2.y && br1.y > tl1.y) {
|
||||
if (br1.x == tl2.x && tl1.x < br1.x && tl2.x < br2.x) {
|
||||
if (!state.enableTextures)
|
||||
return true;
|
||||
|
||||
const auto &textl1 = data[0].texturecoords, &texbr1 = data[1].texturecoords;
|
||||
const auto &textl2 = data[2].texturecoords, &texbr2 = data[3].texturecoords;
|
||||
const auto &textl1 = data[0].v.texturecoords, &texbr1 = data[1].v.texturecoords;
|
||||
const auto &textl2 = data[2].v.texturecoords, &texbr2 = data[3].v.texturecoords;
|
||||
if (textl1.y != textl2.y || texbr1.y != texbr2.y || textl1.y > texbr1.y)
|
||||
return false;
|
||||
if (texbr1.x != textl2.x || textl1.x > texbr1.x || textl2.x > texbr2.x)
|
||||
|
@ -20,8 +20,8 @@ namespace Rasterizer {
|
||||
bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &binner);
|
||||
void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state);
|
||||
|
||||
bool DetectRectangleFromStrip(const RasterizerState &state, const VertexData data[4], int *tlIndex, int *brIndex);
|
||||
bool DetectRectangleFromFan(const RasterizerState &state, const VertexData *data, int c, int *tlIndex, int *brIndex);
|
||||
bool DetectRectangleFromPair(const RasterizerState &state, const VertexData data[6], int *tlIndex, int *brIndex);
|
||||
bool DetectRectangleThroughModeSlices(const RasterizerState &state, const VertexData data[4]);
|
||||
bool DetectRectangleFromStrip(const RasterizerState &state, const ClipVertexData data[4], int *tlIndex, int *brIndex);
|
||||
bool DetectRectangleFromFan(const RasterizerState &state, const ClipVertexData *data, int c, int *tlIndex, int *brIndex);
|
||||
bool DetectRectangleFromPair(const RasterizerState &state, const ClipVertexData data[6], int *tlIndex, int *brIndex);
|
||||
bool DetectRectangleThroughModeSlices(const RasterizerState &state, const ClipVertexData data[4]);
|
||||
}
|
||||
|
@ -160,7 +160,7 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
|
||||
{ GE_CMD_LOGICOP, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },
|
||||
{ GE_CMD_LOGICOPENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },
|
||||
|
||||
{ GE_CMD_TEXMAPMODE, 0, SoftDirty::TRANSFORM_BASIC },
|
||||
{ GE_CMD_TEXMAPMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX },
|
||||
|
||||
// These are read on every SubmitPrim, no need for dirtying or flushing.
|
||||
{ GE_CMD_TEXSCALEU },
|
||||
|
@ -102,22 +102,24 @@ void SoftwareDrawEngine::DispatchSubmitImm(GEPrimitiveType prim, TransformedVert
|
||||
transformUnit.SubmitPrimitive(nullptr, nullptr, prim, 0, vertTypeID, nullptr, this);
|
||||
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
VertexData vert;
|
||||
ClipVertexData vert;
|
||||
vert.clippos = ClipCoords(buffer[i].pos);
|
||||
vert.texturecoords.x = buffer[i].u;
|
||||
vert.texturecoords.y = buffer[i].v;
|
||||
vert.v.texturecoords.x = buffer[i].u;
|
||||
vert.v.texturecoords.y = buffer[i].v;
|
||||
vert.v.texturecoords.z = buffer[i].uv_w;
|
||||
if (gstate.isModeThrough()) {
|
||||
vert.texturecoords.x *= gstate.getTextureWidth(0);
|
||||
vert.texturecoords.y *= gstate.getTextureHeight(0);
|
||||
vert.v.texturecoords.x *= gstate.getTextureWidth(0);
|
||||
vert.v.texturecoords.y *= gstate.getTextureHeight(0);
|
||||
} else {
|
||||
vert.clippos.z *= 1.0f / 65535.0f;
|
||||
}
|
||||
vert.color0 = buffer[i].color0_32;
|
||||
vert.color1 = gstate.isUsingSecondaryColor() && !gstate.isModeThrough() ? buffer[i].color1_32 : 0;
|
||||
vert.fogdepth = buffer[i].fog;
|
||||
vert.screenpos.x = (int)(buffer[i].x * 16.0f);
|
||||
vert.screenpos.y = (int)(buffer[i].y * 16.0f);
|
||||
vert.screenpos.z = (u16)(u32)buffer[i].z;
|
||||
vert.v.clipw = buffer[i].pos_w;
|
||||
vert.v.color0 = buffer[i].color0_32;
|
||||
vert.v.color1 = gstate.isUsingSecondaryColor() && !gstate.isModeThrough() ? buffer[i].color1_32 : 0;
|
||||
vert.v.fogdepth = buffer[i].fog;
|
||||
vert.v.screenpos.x = (int)(buffer[i].x * 16.0f);
|
||||
vert.v.screenpos.y = (int)(buffer[i].y * 16.0f);
|
||||
vert.v.screenpos.z = (u16)(u32)buffer[i].z;
|
||||
|
||||
transformUnit.SubmitImmVertex(vert, this);
|
||||
}
|
||||
@ -259,6 +261,8 @@ void ComputeTransformState(TransformState *state, const VertexReader &vreader) {
|
||||
state->negateNormals = gstate.areNormalsReversed();
|
||||
|
||||
state->uvGenMode = gstate.getUVGenMode();
|
||||
if (state->uvGenMode == GE_TEXMAP_UNKNOWN)
|
||||
state->uvGenMode = GE_TEXMAP_TEXTURE_COORDS;
|
||||
|
||||
if (state->enableTransform) {
|
||||
bool canSkipWorldPos = true;
|
||||
@ -315,26 +319,34 @@ void ComputeTransformState(TransformState *state, const VertexReader &vreader) {
|
||||
state->roundToScreen = &ClipToScreenInternal<false, true>;
|
||||
}
|
||||
|
||||
VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state) {
|
||||
ClipVertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState &state) {
|
||||
PROFILE_THIS_SCOPE("read_vert");
|
||||
VertexData vertex;
|
||||
// If we ever thread this, we'll have to change this.
|
||||
ClipVertexData vertex;
|
||||
|
||||
ModelCoords pos;
|
||||
// VertexDecoder normally scales z, but we want it unscaled.
|
||||
vreader.ReadPosThroughZ16(pos.AsArray());
|
||||
|
||||
static Vec3Packedf lastTC;
|
||||
if (state.readUV) {
|
||||
vreader.ReadUV(vertex.texturecoords.AsArray());
|
||||
vreader.ReadUV(vertex.v.texturecoords.AsArray());
|
||||
vertex.v.texturecoords.q() = 0.0f;
|
||||
lastTC = vertex.v.texturecoords;
|
||||
} else {
|
||||
vertex.texturecoords.SetZero();
|
||||
vertex.v.texturecoords = lastTC;
|
||||
}
|
||||
|
||||
Vec3<float> normal;
|
||||
Vec3f normal;
|
||||
static Vec3f lastnormal;
|
||||
if (vreader.hasNormal()) {
|
||||
vreader.ReadNrm(normal.AsArray());
|
||||
lastnormal = normal;
|
||||
|
||||
if (state.negateNormals)
|
||||
normal = -normal;
|
||||
} else {
|
||||
normal = lastnormal;
|
||||
}
|
||||
|
||||
if (state.readWeights) {
|
||||
@ -359,12 +371,12 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState
|
||||
}
|
||||
|
||||
if (vreader.hasColor0()) {
|
||||
vreader.ReadColor0_8888((u8 *)&vertex.color0);
|
||||
vreader.ReadColor0_8888((u8 *)&vertex.v.color0);
|
||||
} else {
|
||||
vertex.color0 = gstate.getMaterialAmbientRGBA();
|
||||
vertex.v.color0 = gstate.getMaterialAmbientRGBA();
|
||||
}
|
||||
|
||||
vertex.color1 = 0;
|
||||
vertex.v.color1 = 0;
|
||||
|
||||
if (state.enableTransform) {
|
||||
WorldCoords worldpos;
|
||||
@ -389,18 +401,19 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState
|
||||
screenScaled = vertex.clippos.xyz() * state.screenScale / vertex.clippos.w + state.screenAdd;
|
||||
#endif
|
||||
bool outside_range_flag = false;
|
||||
vertex.screenpos = state.roundToScreen(screenScaled, vertex.clippos, &outside_range_flag);
|
||||
vertex.v.screenpos = state.roundToScreen(screenScaled, vertex.clippos, &outside_range_flag);
|
||||
if (outside_range_flag) {
|
||||
// We use this, essentially, as the flag.
|
||||
vertex.screenpos.x = 0x7FFFFFFF;
|
||||
vertex.v.screenpos.x = 0x7FFFFFFF;
|
||||
return vertex;
|
||||
}
|
||||
|
||||
if (state.enableFog) {
|
||||
vertex.fogdepth = Dot(state.posToFog, Vec4f(pos, 1.0f));
|
||||
vertex.v.fogdepth = Dot(state.posToFog, Vec4f(pos, 1.0f));
|
||||
} else {
|
||||
vertex.fogdepth = 1.0f;
|
||||
vertex.v.fogdepth = 1.0f;
|
||||
}
|
||||
vertex.v.clipw = vertex.clippos.w;
|
||||
|
||||
Vec3<float> worldnormal;
|
||||
if (vreader.hasNormal()) {
|
||||
@ -419,40 +432,35 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, const TransformState
|
||||
break;
|
||||
|
||||
case GE_PROJMAP_UV:
|
||||
source = Vec3f(vertex.texturecoords, 0.0f);
|
||||
source = Vec3f(vertex.v.texturecoords.uv(), 0.0f);
|
||||
break;
|
||||
|
||||
case GE_PROJMAP_NORMALIZED_NORMAL:
|
||||
source = normal.NormalizedOr001(cpu_info.bSSE4_1);
|
||||
// This does not use 0, 0, 1 if length is zero.
|
||||
source = normal.Normalized(cpu_info.bSSE4_1);
|
||||
break;
|
||||
|
||||
case GE_PROJMAP_NORMAL:
|
||||
source = normal;
|
||||
break;
|
||||
|
||||
default:
|
||||
source = Vec3f::AssignToAll(0.0f);
|
||||
ERROR_LOG_REPORT(G3D, "Software: Unsupported UV projection mode %x", gstate.getUVProjMode());
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: What about uv scale and offset?
|
||||
// Note that UV scale/offset are not used in this mode.
|
||||
Vec3<float> stq = Vec3ByMatrix43(source, gstate.tgenMatrix);
|
||||
float z_recip = 1.0f / stq.z;
|
||||
vertex.texturecoords = Vec2f(stq.x * z_recip, stq.y * z_recip);
|
||||
vertex.v.texturecoords = Vec3Packedf(stq.x, stq.y, stq.z);
|
||||
} else if (state.uvGenMode == GE_TEXMAP_ENVIRONMENT_MAP) {
|
||||
Lighting::GenerateLightST(vertex, worldnormal);
|
||||
Lighting::GenerateLightST(vertex.v, worldnormal);
|
||||
}
|
||||
|
||||
PROFILE_THIS_SCOPE("light");
|
||||
if (state.enableLighting)
|
||||
Lighting::Process(vertex, worldpos, worldnormal, state.lightingState);
|
||||
Lighting::Process(vertex.v, worldpos, worldnormal, state.lightingState);
|
||||
} else {
|
||||
vertex.screenpos.x = (int)(pos[0] * SCREEN_SCALE_FACTOR);
|
||||
vertex.screenpos.y = (int)(pos[1] * SCREEN_SCALE_FACTOR);
|
||||
vertex.screenpos.z = pos[2];
|
||||
vertex.clippos.w = 1.f;
|
||||
vertex.fogdepth = 1.f;
|
||||
vertex.v.screenpos.x = (int)(pos[0] * SCREEN_SCALE_FACTOR);
|
||||
vertex.v.screenpos.y = (int)(pos[1] * SCREEN_SCALE_FACTOR);
|
||||
vertex.v.screenpos.z = pos[2];
|
||||
vertex.v.clipw = 1.0f;
|
||||
vertex.v.fogdepth = 1.0f;
|
||||
}
|
||||
|
||||
return vertex;
|
||||
@ -503,7 +511,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
inline VertexData Read(int vtx) {
|
||||
inline ClipVertexData Read(int vtx) {
|
||||
if (useIndices_) {
|
||||
if (useCache_) {
|
||||
return cached_[conv_(vtx) - lowerBound_];
|
||||
@ -523,13 +531,13 @@ protected:
|
||||
TransformUnit &transform_;
|
||||
uint16_t lowerBound_;
|
||||
uint16_t upperBound_;
|
||||
static std::vector<VertexData> cached_;
|
||||
static std::vector<ClipVertexData> cached_;
|
||||
bool useIndices_ = false;
|
||||
bool useCache_ = false;
|
||||
};
|
||||
|
||||
// Static to reduce allocations mid-frame.
|
||||
std::vector<VertexData> SoftwareVertexReader::cached_;
|
||||
std::vector<ClipVertexData> SoftwareVertexReader::cached_;
|
||||
|
||||
void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine)
|
||||
{
|
||||
@ -572,7 +580,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
if (vreader.IsThrough() && cullType == CullType::OFF && prim_type == GE_PRIM_TRIANGLES && data_index_ == 0 && vertex_count >= 6 && ((vertex_count) % 6) == 0) {
|
||||
// Some games send rectangles as a series of regular triangles.
|
||||
// We look for this, but only in throughmode.
|
||||
VertexData buf[6];
|
||||
ClipVertexData buf[6];
|
||||
int buf_index = data_index_;
|
||||
for (int i = 0; i < data_index_; ++i) {
|
||||
buf[i] = data_[i];
|
||||
@ -823,7 +831,7 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
||||
}
|
||||
}
|
||||
|
||||
void TransformUnit::SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine) {
|
||||
void TransformUnit::SubmitImmVertex(const ClipVertexData &vert, SoftwareDrawEngine *drawEngine) {
|
||||
// Where we put it is different for STRIP/FAN types.
|
||||
switch (prev_prim_) {
|
||||
case GE_PRIM_POINTS:
|
||||
@ -864,7 +872,7 @@ void TransformUnit::SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *
|
||||
isImmDraw_ = false;
|
||||
}
|
||||
|
||||
void TransformUnit::SendTriangle(CullType cullType, const VertexData *verts, int provoking) {
|
||||
void TransformUnit::SendTriangle(CullType cullType, const ClipVertexData *verts, int provoking) {
|
||||
if (cullType == CullType::OFF) {
|
||||
Clipper::ProcessTriangle(verts[0], verts[1], verts[2], verts[provoking], *binner_);
|
||||
Clipper::ProcessTriangle(verts[2], verts[1], verts[0], verts[provoking], *binner_);
|
||||
|
@ -78,28 +78,33 @@ struct DrawingCoords {
|
||||
s16 y;
|
||||
};
|
||||
|
||||
struct VertexData {
|
||||
void Lerp(float t, const VertexData &a, const VertexData &b) {
|
||||
struct alignas(16) VertexData {
|
||||
Vec3Packedf texturecoords;
|
||||
float clipw;
|
||||
uint32_t color0;
|
||||
uint32_t color1;
|
||||
ScreenCoords screenpos;
|
||||
float fogdepth;
|
||||
};
|
||||
|
||||
struct ClipVertexData {
|
||||
void Lerp(float t, const ClipVertexData &a, const ClipVertexData &b) {
|
||||
clippos = ::Lerp(a.clippos, b.clippos, t);
|
||||
// Ignore screenpos because Lerp() is only used pre-calculation of screenpos.
|
||||
texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t);
|
||||
fogdepth = ::Lerp(a.fogdepth, b.fogdepth, t);
|
||||
v.texturecoords = ::Lerp(a.v.texturecoords, b.v.texturecoords, t);
|
||||
v.fogdepth = ::Lerp(a.v.fogdepth, b.v.fogdepth, t);
|
||||
|
||||
u16 t_int = (u16)(t * 256);
|
||||
color0 = LerpInt<Vec4<int>, 256>(Vec4<int>::FromRGBA(a.color0), Vec4<int>::FromRGBA(b.color0), t_int).ToRGBA();
|
||||
color1 = LerpInt<Vec3<int>, 256>(Vec3<int>::FromRGB(a.color1), Vec3<int>::FromRGB(b.color1), t_int).ToRGB();
|
||||
v.color0 = LerpInt<Vec4<int>, 256>(Vec4<int>::FromRGBA(a.v.color0), Vec4<int>::FromRGBA(b.v.color0), t_int).ToRGBA();
|
||||
v.color1 = LerpInt<Vec3<int>, 256>(Vec3<int>::FromRGB(a.v.color1), Vec3<int>::FromRGB(b.v.color1), t_int).ToRGB();
|
||||
}
|
||||
|
||||
bool OutsideRange() const {
|
||||
return screenpos.x == 0x7FFFFFFF;
|
||||
return v.screenpos.x == 0x7FFFFFFF;
|
||||
}
|
||||
|
||||
ClipCoords clippos;
|
||||
Vec2<float> texturecoords;
|
||||
uint32_t color0;
|
||||
uint32_t color1;
|
||||
ScreenCoords screenpos; // TODO: Shouldn't store this ?
|
||||
float fogdepth;
|
||||
VertexData v;
|
||||
};
|
||||
|
||||
class VertexReader;
|
||||
@ -130,7 +135,7 @@ public:
|
||||
static ScreenCoords DrawingToScreen(const DrawingCoords &coords, u16 z);
|
||||
|
||||
void SubmitPrimitive(const void* vertices, const void* indices, GEPrimitiveType prim_type, int vertex_count, u32 vertex_type, int *bytesRead, SoftwareDrawEngine *drawEngine);
|
||||
void SubmitImmVertex(const VertexData &vert, SoftwareDrawEngine *drawEngine);
|
||||
void SubmitImmVertex(const ClipVertexData &vert, SoftwareDrawEngine *drawEngine);
|
||||
|
||||
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
|
||||
|
||||
@ -144,14 +149,14 @@ public:
|
||||
SoftDirty GetDirty();
|
||||
|
||||
private:
|
||||
VertexData ReadVertex(VertexReader &vreader, const TransformState &state);
|
||||
void SendTriangle(CullType cullType, const VertexData *verts, int provoking = 2);
|
||||
ClipVertexData ReadVertex(VertexReader &vreader, const TransformState &state);
|
||||
void SendTriangle(CullType cullType, const ClipVertexData *verts, int provoking = 2);
|
||||
|
||||
u8 *decoded_ = nullptr;
|
||||
BinManager *binner_ = nullptr;
|
||||
|
||||
// Normally max verts per prim is 3, but we temporarily need 4 to detect rectangles from strips.
|
||||
VertexData data_[4];
|
||||
ClipVertexData data_[4];
|
||||
// This is the index of the next vert in data (or higher, may need modulus.)
|
||||
int data_index_ = 0;
|
||||
GEPrimitiveType prev_prim_ = GE_PRIM_POINTS;
|
||||
|
@ -1283,6 +1283,27 @@ ULJM05494 = true
|
||||
NPJH50143 = true
|
||||
ULJM05738 = true
|
||||
|
||||
[NearestFilteringOnFramebufferCreate]
|
||||
# Ridge Racer speedometer dynamic CLUT problem - they rely on some palette entries
|
||||
# from memory, and render to the rest of the palette. The palette entries loaded from memory
|
||||
# must not be blurred by filtering, so nearest it is. See issue #8509
|
||||
|
||||
# Ridge Racer
|
||||
ULJS00001 = true
|
||||
ULUS10001 = true
|
||||
UCKS45002 = true
|
||||
UCES00002 = true
|
||||
ULJS19002 = true
|
||||
UCKS45053 = true
|
||||
NPJH50140 = true
|
||||
|
||||
# Ridge Racer 2
|
||||
ULJS00080 = true
|
||||
UCKS45032 = true
|
||||
UCES00422 = true
|
||||
UCAS40273 = true
|
||||
NPJH50366 = true
|
||||
|
||||
[AllowDownloadCLUT]
|
||||
# Temporary compatibility option, while working on the GPU CLUT-from-framebuffer path.
|
||||
# Not required for any games now that it works, but might be useful for development.
|
||||
|
Loading…
Reference in New Issue
Block a user