Merge remote-tracking branch 'TASVideos/master'

This commit is contained in:
Brandon Wright 2019-01-01 11:37:16 -06:00
commit 5c8ba82e33
10 changed files with 514 additions and 281 deletions

View File

@ -54,7 +54,6 @@ typedef struct
static OGLVersion _OGLDriverVersion = {0, 0, 0};
// Lookup Tables
static CACHE_ALIGN GLfloat material_8bit_to_float[256] = {0};
CACHE_ALIGN const GLfloat divide5bitBy31_LUT[32] = {0.0, 0.0322580645161, 0.0645161290323, 0.0967741935484,
0.1290322580645, 0.1612903225806, 0.1935483870968, 0.2258064516129,
0.2580645161290, 0.2903225806452, 0.3225806451613, 0.3548387096774,
@ -292,7 +291,7 @@ void main() \n\
\n\
vtxPosition = inPosition; \n\
vtxTexCoord = texScaleMtx * inTexCoord0; \n\
vtxColor = vec4(inColor * 4.0, polyAlpha); \n\
vtxColor = vec4(inColor / 63.0, polyAlpha); \n\
\n\
gl_Position = vtxPosition; \n\
} \n\
@ -330,18 +329,6 @@ void main()\n\
#endif\n\
#if ENABLE_FOG\n\
vec4 newFogAttributes = vec4(0.0, 0.0, 0.0, 0.0);\n\
#endif\n\
\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\
\n\
if ((polyMode != 3) || polyDrawShadow)\n\
@ -416,6 +403,27 @@ void main()\n\
gl_FragData[2] = newFogAttributes;\n\
#endif\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
// It is tempting to perform the NDS depth calculation in the vertex shader rather than in the fragment shader.\n\
// Resist this temptation! It is much more reliable to do the depth calculation in the fragment shader due to\n\
// subtle interpolation differences between various GPUs and/or drivers. If the depth calculation is not done\n\
// here, then it is very possible for the user to experience Z-fighting in certain rendering situations.\n\
\n\
#if NEEDS_DEPTH_EQUALS_TEST\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float newFragDepthValue = clamp( ( (floor(gl_FragCoord.z * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#else\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( (vtxPosition.w * 4096.0) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( (floor(gl_FragCoord.z * 4194303.0) * 4.0) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\
\n\
gl_FragDepth = newFragDepthValue;\n\
#endif\n\
}\n\
@ -1226,6 +1234,8 @@ OpenGLRenderer::OpenGLRenderer()
isMultisampledFBOSupported = false;
isShaderSupported = false;
isSampleShadingSupported = false;
isConservativeDepthSupported = false;
isConservativeDepthAMDSupported = false;
isVAOSupported = false;
willFlipOnlyFramebufferOnGPU = false;
willFlipAndConvertFramebufferOnGPU = false;
@ -1244,6 +1254,7 @@ OpenGLRenderer::OpenGLRenderer()
_workingTextureUnpackBuffer = (FragmentColor *)malloc_alignedCacheLine(1024 * 1024 * sizeof(FragmentColor));
_pixelReadNeedsFinish = false;
_needsZeroDstAlphaPass = true;
_renderNeedsDepthEqualsTest = false;
_currentPolyIndex = 0;
_lastTextureDrawTarget = OGLTextureUnitID_GColor;
_geometryProgramFlags.value = 0;
@ -1908,7 +1919,7 @@ size_t OpenGLRenderer::DrawPolygonsForIndexRange(const POLYLIST *polyList, const
polyPrimitive != GL_LINE_STRIP &&
oglPrimitiveType[nextPoly.vtxFormat] != GL_LINE_LOOP &&
oglPrimitiveType[nextPoly.vtxFormat] != GL_LINE_STRIP &&
this->_isPolyFrontFacing[i] != this->_isPolyFrontFacing[i+1])
this->_isPolyFrontFacing[i] == this->_isPolyFrontFacing[i+1])
{
continue;
}
@ -2113,76 +2124,73 @@ Render3DError OpenGLRenderer::DrawAlphaTexturePolygon(const GLenum polyPrimitive
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
}
}
else
else if (DRAWMODE != OGLPolyDrawMode_DrawOpaquePolys)
{
if (DRAWMODE != OGLPolyDrawMode_DrawOpaquePolys)
// Draw the translucent fragments.
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
// Draw the opaque fragments if they might exist.
if (canHaveOpaqueFragments)
{
// Draw the translucent fragments.
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
// Draw the opaque fragments if they might exist.
if (canHaveOpaqueFragments)
if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass)
{
if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass)
{
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glDepthMask(GL_TRUE);
}
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass)
{
glStencilFunc(GL_NOTEQUAL, 0x40 | opaquePolyID, 0x7F);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glDepthMask((enableAlphaDepthWrite) ? GL_TRUE : GL_FALSE);
}
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glDepthMask(GL_TRUE);
}
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass)
{
glStencilFunc(GL_NOTEQUAL, 0x40 | opaquePolyID, 0x7F);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glDepthMask((enableAlphaDepthWrite) ? GL_TRUE : GL_FALSE);
}
}
else // Draw the polygon as completely opaque.
}
else // Draw the polygon as completely opaque.
{
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE);
if (this->_emulateDepthLEqualPolygonFacing)
{
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE);
if (this->_emulateDepthLEqualPolygonFacing)
if (isPolyFrontFacing)
{
if (isPolyFrontFacing)
{
glDepthFunc(GL_EQUAL);
glStencilFunc(GL_EQUAL, 0x40 | opaquePolyID, 0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glDepthMask(GL_FALSE);
glStencilOp(GL_KEEP, GL_KEEP, GL_ZERO);
glStencilMask(0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE);
glDepthFunc(GL_LESS);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilMask(0xFF);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
}
else
{
glStencilFunc(GL_ALWAYS, 0x40 | opaquePolyID, 0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
}
glDepthFunc(GL_EQUAL);
glStencilFunc(GL_EQUAL, 0x40 | opaquePolyID, 0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glDepthMask(GL_FALSE);
glStencilOp(GL_KEEP, GL_KEEP, GL_ZERO);
glStencilMask(0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE);
glDepthFunc(GL_LESS);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilMask(0xFF);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
}
else
{
glStencilFunc(GL_ALWAYS, 0x40 | opaquePolyID, 0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
}
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
}
else
{
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
}
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
}
}
else
@ -2442,9 +2450,6 @@ Render3DError OpenGLRenderer_1_2::InitExtensions()
glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyOGL);
this->_deviceInfo.maxAnisotropy = maxAnisotropyOGL;
// Initialize OpenGL
this->InitTables();
this->isShaderSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_shader_objects") &&
this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_vertex_shader") &&
this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_fragment_shader") &&
@ -2765,7 +2770,7 @@ Render3DError OpenGLRenderer_1_2::CreateVAOs()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, coord));
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, texcoord));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));
glBindVertexArray(0);
@ -3090,11 +3095,11 @@ Render3DError OpenGLRenderer_1_2::CreateGeometryPrograms()
OGLGeometryFlags programFlags;
programFlags.value = 0;
std::stringstream shaderHeader;
shaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0 \n";
shaderHeader << "\n";
std::stringstream fragShaderHeader;
fragShaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0 \n";
fragShaderHeader << "\n";
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++, programFlags.value++)
for (size_t flagsValue = 0; flagsValue < 128; flagsValue++, programFlags.value++)
{
std::stringstream shaderFlags;
shaderFlags << "#define USE_TEXTURE_SMOOTHING " << ((this->_enableTextureSmoothing) ? 1 : 0) << "\n";
@ -3106,9 +3111,10 @@ Render3DError OpenGLRenderer_1_2::CreateGeometryPrograms()
shaderFlags << "#define ENABLE_FOG " << ((programFlags.EnableFog) ? 1 : 0) << "\n";
shaderFlags << "#define ENABLE_EDGE_MARK " << ((programFlags.EnableEdgeMark) ? 1 : 0) << "\n";
shaderFlags << "#define TOON_SHADING_MODE " << ((programFlags.ToonShadingMode) ? 1 : 0) << "\n";
shaderFlags << "#define NEEDS_DEPTH_EQUALS_TEST " << ((programFlags.NeedsDepthEqualsTest) ? 1 : 0) << "\n";
shaderFlags << "\n";
std::string fragShaderCode = shaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_100);
std::string fragShaderCode = fragShaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_100);
error = this->ShaderProgramCreate(OGLRef.vertexGeometryShaderID,
OGLRef.fragmentGeometryShaderID[flagsValue],
@ -3176,7 +3182,7 @@ void OpenGLRenderer_1_2::DestroyGeometryPrograms()
OGLRenderRef &OGLRef = *this->ref;
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++)
for (size_t flagsValue = 0; flagsValue < 128; flagsValue++)
{
if (OGLRef.programGeometryID[flagsValue] == 0)
{
@ -3752,21 +3758,6 @@ Render3DError OpenGLRenderer_1_2::InitFinalRenderStates(const std::set<std::stri
return OGLERROR_NOERR;
}
Render3DError OpenGLRenderer_1_2::InitTables()
{
static bool needTableInit = true;
if (needTableInit)
{
for (size_t i = 0; i < 256; i++)
material_8bit_to_float[i] = (GLfloat)(i * 4) / 255.0f;
needTableInit = false;
}
return OGLERROR_NOERR;
}
Render3DError OpenGLRenderer_1_2::InitPostprocessingPrograms(const char *edgeMarkVtxShaderCString,
const char *edgeMarkFragShaderCString,
const char *framebufferOutputVtxShaderCString,
@ -3931,7 +3922,7 @@ Render3DError OpenGLRenderer_1_2::EnableVertexAttributes()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrPosition);
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrTexCoord);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), OGLRef.vtxPtrColor);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrColor);
}
else
{
@ -4259,37 +4250,6 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
return OGLERROR_BEGINGL_FAILED;
}
if (this->isShaderSupported)
{
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1i(OGLRef.uniformStateClearPolyID, this->_clearAttributes.opaquePolyID);
glUniform1f(OGLRef.uniformStateClearDepth, (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
}
else
{
if(engine.renderState.enableAlphaTest && (engine.renderState.alphaTestRef > 0))
{
glAlphaFunc(GL_GEQUAL, divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
}
else
{
glAlphaFunc(GL_GREATER, 0);
}
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
}
GLushort *indexPtr = NULL;
if (this->isVBOSupported)
@ -4307,6 +4267,7 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
indexPtr = OGLRef.vertIndexBuffer;
}
this->_renderNeedsDepthEqualsTest = false;
size_t vertIndexCount = 0;
for (size_t i = 0; i < engine.polylist->count; i++)
@ -4356,9 +4317,9 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
// Consolidate the vertex color and the poly alpha to our internal color buffer
// so that OpenGL can use it.
const VERT *vertForAlpha = &engine.vertList[vertIndex];
OGLRef.color4fBuffer[colorIndex+0] = material_8bit_to_float[vertForAlpha->color[0]];
OGLRef.color4fBuffer[colorIndex+1] = material_8bit_to_float[vertForAlpha->color[1]];
OGLRef.color4fBuffer[colorIndex+2] = material_8bit_to_float[vertForAlpha->color[2]];
OGLRef.color4fBuffer[colorIndex+0] = divide6bitBy63_LUT[vertForAlpha->color[0]];
OGLRef.color4fBuffer[colorIndex+1] = divide6bitBy63_LUT[vertForAlpha->color[1]];
OGLRef.color4fBuffer[colorIndex+2] = divide6bitBy63_LUT[vertForAlpha->color[2]];
OGLRef.color4fBuffer[colorIndex+3] = thePolyAlpha;
// While we're looping through our vertices, add each vertex index to a
@ -4382,15 +4343,16 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
// Get this polygon's facing.
const size_t n = polyType - 1;
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x)
+ (vert[1].y + vert[0].y) * (vert[1].x - vert[0].x)
+ (vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) +
(vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) +
(vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
for (size_t j = 2; j < n; j++)
{
facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x);
}
this->_renderNeedsDepthEqualsTest = this->_renderNeedsDepthEqualsTest || (thePoly.attribute.DepthEqualTest_Enable != 0);
this->_isPolyFrontFacing[i] = (facing < 0);
// Get the texture that is to be attached to this polygon.
@ -4403,6 +4365,38 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
}
if (this->isShaderSupported)
{
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
this->_geometryProgramFlags.NeedsDepthEqualsTest = (this->_renderNeedsDepthEqualsTest) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1i(OGLRef.uniformStateClearPolyID, this->_clearAttributes.opaquePolyID);
glUniform1f(OGLRef.uniformStateClearDepth, (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
}
else
{
if(engine.renderState.enableAlphaTest && (engine.renderState.alphaTestRef > 0))
{
glAlphaFunc(GL_GEQUAL, divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
}
else
{
glAlphaFunc(GL_GREATER, 0);
}
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
}
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE);
@ -5107,7 +5101,7 @@ Render3DError OpenGLRenderer_1_2::DrawShadowPolygon(const GLenum polyPrimitive,
// 1st pass: Create the shadow volume.
if (opaquePolyID == 0)
{
if (performDepthEqualTest && this->isShaderSupported)
if (performDepthEqualTest && this->_emulateNDSDepthCalculation && this->isShaderSupported)
{
// Use the stencil buffer to determine which fragments fail the depth test using the lower-side tolerance.
glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 1);
@ -5124,6 +5118,8 @@ Render3DError OpenGLRenderer_1_2::DrawShadowPolygon(const GLenum polyPrimitive,
glStencilOp(GL_KEEP, GL_REPLACE, GL_KEEP);
glStencilMask(0x80);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 0);
}
else
{
@ -5134,7 +5130,7 @@ Render3DError OpenGLRenderer_1_2::DrawShadowPolygon(const GLenum polyPrimitive,
}
// 2nd pass: Do the polygon ID check.
if (performDepthEqualTest && this->isShaderSupported)
if (performDepthEqualTest && this->_emulateNDSDepthCalculation && this->isShaderSupported)
{
// Use the stencil buffer to determine which fragments pass the lower-side tolerance.
glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 1);
@ -5248,6 +5244,7 @@ Render3DError OpenGLRenderer_1_2::Reset()
memset(OGLRef.vertIndexBuffer, 0, OGLRENDER_VERT_INDEX_BUFFER_COUNT * sizeof(GLushort));
}
this->_renderNeedsDepthEqualsTest = false;
this->_currentPolyIndex = 0;
OGLRef.vtxPtrPosition = (GLvoid *)offsetof(VERT, coord);
@ -5516,7 +5513,7 @@ Render3DError OpenGLRenderer_2_0::EnableVertexAttributes()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrPosition);
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrTexCoord);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), OGLRef.vtxPtrColor);
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), OGLRef.vtxPtrColor);
}
return OGLERROR_NOERR;
@ -5547,40 +5544,35 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
return OGLERROR_BEGINGL_FAILED;
}
// Setup render states
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID);
this->_renderNeedsDepthEqualsTest = false;
size_t vertIndexCount = 0;
GLushort *indexPtr = (GLushort *)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY);
for (size_t i = 0; i < engine.polylist->count; i++)
{
const POLY *thePoly = &engine.polylist->list[engine.indexlist.list[i]];
const size_t polyType = thePoly->type;
const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]];
const size_t polyType = thePoly.type;
const VERT vert[4] = {
engine.vertList[thePoly.vertIndexes[0]],
engine.vertList[thePoly.vertIndexes[1]],
engine.vertList[thePoly.vertIndexes[2]],
engine.vertList[thePoly.vertIndexes[3]]
};
for (size_t j = 0; j < polyType; j++)
{
const GLushort vertIndex = thePoly->vertIndexes[j];
const GLushort vertIndex = thePoly.vertIndexes[j];
// While we're looping through our vertices, add each vertex index to
// a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional
// vertices here to convert them to GL_TRIANGLES, which are much easier
// to work with and won't be deprecated in future OpenGL versions.
indexPtr[vertIndexCount++] = vertIndex;
if (thePoly->vtxFormat == GFX3D_QUADS || thePoly->vtxFormat == GFX3D_QUAD_STRIP)
if (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP)
{
if (j == 2)
{
@ -5588,17 +5580,46 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
}
else if (j == 3)
{
indexPtr[vertIndexCount++] = thePoly->vertIndexes[0];
indexPtr[vertIndexCount++] = thePoly.vertIndexes[0];
}
}
}
this->_textureList[i] = this->GetLoadedTextureFromPolygon(*thePoly, this->_enableTextureSampling);
// Get this polygon's facing.
const size_t n = polyType - 1;
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) +
(vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) +
(vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
for (size_t j = 2; j < n; j++)
{
facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x);
}
this->_renderNeedsDepthEqualsTest = this->_renderNeedsDepthEqualsTest || (thePoly.attribute.DepthEqualTest_Enable != 0);
this->_isPolyFrontFacing[i] = (facing < 0);
// Get the texture that is to be attached to this polygon.
this->_textureList[i] = this->GetLoadedTextureFromPolygon(thePoly, this->_enableTextureSampling);
}
glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
// Setup render states
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
this->_geometryProgramFlags.NeedsDepthEqualsTest = (this->_renderNeedsDepthEqualsTest) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE);

View File

@ -418,7 +418,8 @@ union OGLGeometryFlags
u8 EnableFog:1;
u8 EnableEdgeMark:1;
u8 ToonShadingMode:1;
u8 :3;
u8 NeedsDepthEqualsTest:1;
u8 :1;
};
};
typedef OGLGeometryFlags OGLGeometryFlags;
@ -661,6 +662,8 @@ protected:
bool isShaderSupported;
bool isVAOSupported;
bool isSampleShadingSupported;
bool isConservativeDepthSupported;
bool isConservativeDepthAMDSupported;
bool willFlipOnlyFramebufferOnGPU;
bool willFlipAndConvertFramebufferOnGPU;
bool willUsePerSampleZeroDstPass;
@ -674,6 +677,7 @@ protected:
FragmentColor *_workingTextureUnpackBuffer;
bool _pixelReadNeedsFinish;
bool _needsZeroDstAlphaPass;
bool _renderNeedsDepthEqualsTest;
size_t _currentPolyIndex;
OGLTextureUnitID _lastTextureDrawTarget;
OGLGeometryFlags _geometryProgramFlags;
@ -731,7 +735,6 @@ protected:
virtual void DestroyFramebufferOutput8888Program() = 0;
virtual Render3DError InitFinalRenderStates(const std::set<std::string> *oglExtensionSet) = 0;
virtual Render3DError InitTables() = 0;
virtual Render3DError InitPostprocessingPrograms(const char *edgeMarkVtxShader,
const char *edgeMarkFragShader,
const char *framebufferOutputVtxShader,
@ -807,7 +810,6 @@ protected:
virtual void DestroyFramebufferOutput8888Program();
virtual Render3DError InitFinalRenderStates(const std::set<std::string> *oglExtensionSet);
virtual Render3DError InitTables();
virtual Render3DError InitPostprocessingPrograms(const char *edgeMarkVtxShader,
const char *edgeMarkFragShader,
const char *framebufferOutputVtxShader,

View File

@ -147,7 +147,7 @@ void main() \n\
\n\
vtxPosition = inPosition; \n\
vtxTexCoord = texScaleMtx * inTexCoord0; \n\
vtxColor = vec4(inColor * 4.0, polyAlpha); \n\
vtxColor = vec4(inColor / 63.0, polyAlpha); \n\
\n\
gl_Position = vtxPosition; \n\
} \n\
@ -197,6 +197,9 @@ out vec4 outPolyID;\n\
#if ENABLE_FOG\n\
out vec4 outFogAttributes;\n\
#endif\n\
#if IS_CONSERVATIVE_DEPTH_SUPPORTED && (USE_NDS_DEPTH_CALCULATION || ENABLE_FOG) && !NEEDS_DEPTH_EQUALS_TEST && !ENABLE_W_DEPTH\n\
layout (depth_less) out float gl_FragDepth;\n\
#endif\n\
\n\
void main()\n\
{\n\
@ -206,18 +209,6 @@ void main()\n\
#endif\n\
#if ENABLE_FOG\n\
vec4 newFogAttributes = vec4(0.0, 0.0, 0.0, 0.0);\n\
#endif\n\
\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\
\n\
if ((polyMode != 3u) || polyDrawShadow)\n\
@ -299,6 +290,27 @@ void main()\n\
outFogAttributes = newFogAttributes;\n\
#endif\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
// It is tempting to perform the NDS depth calculation in the vertex shader rather than in the fragment shader.\n\
// Resist this temptation! It is much more reliable to do the depth calculation in the fragment shader due to\n\
// subtle interpolation differences between various GPUs and/or drivers. If the depth calculation is not done\n\
// here, then it is very possible for the user to experience Z-fighting in certain rendering situations.\n\
\n\
#if NEEDS_DEPTH_EQUALS_TEST\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float newFragDepthValue = clamp( ( (floor(gl_FragCoord.z * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#else\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( (vtxPosition.w * 4096.0) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( (floor(gl_FragCoord.z * 4194303.0) * 4.0) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\
\n\
gl_FragDepth = newFragDepthValue;\n\
#endif\n\
}\n\
@ -797,9 +809,6 @@ Render3DError OpenGLRenderer_3_2::InitExtensions()
this->_deviceInfo.isEdgeMarkSupported = true;
this->_deviceInfo.isFogSupported = true;
// Initialize OpenGL
this->InitTables();
glGenTextures(1, &OGLRef.texFinalColorID);
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor);
glBindTexture(GL_TEXTURE_2D, OGLRef.texFinalColorID);
@ -818,6 +827,8 @@ Render3DError OpenGLRenderer_3_2::InitExtensions()
this->willFlipAndConvertFramebufferOnGPU = true;
this->isSampleShadingSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_sample_shading");
this->isConservativeDepthSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_conservative_depth") && IsOpenGLDriverVersionSupported(4, 0, 0);
this->isConservativeDepthAMDSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_AMD_conservative_depth") && IsOpenGLDriverVersionSupported(4, 0, 0);
this->_enableTextureSmoothing = CommonSettings.GFX3D_Renderer_TextureSmoothing;
this->_emulateShadowPolygon = CommonSettings.OpenGL_Emulation_ShadowPolygon;
@ -1259,7 +1270,7 @@ Render3DError OpenGLRenderer_3_2::CreateVAOs()
glEnableVertexAttribArray(OGLVertexAttributeID_Color);
glVertexAttribPointer(OGLVertexAttributeID_Position, 4, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, coord));
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, texcoord));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));
glVertexAttribPointer(OGLVertexAttributeID_Color, 3, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(VERT), (const GLvoid *)offsetof(VERT, color));
glBindVertexArray(0);
@ -1301,14 +1312,38 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
OGLGeometryFlags programFlags;
programFlags.value = 0;
std::stringstream shaderHeader;
shaderHeader << "#version 150\n";
shaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0\n";
shaderHeader << "\n";
std::stringstream vtxShaderHeader;
if (this->isConservativeDepthSupported || this->isConservativeDepthAMDSupported)
{
vtxShaderHeader << "#version 400\n";
}
else
{
vtxShaderHeader << "#version 150\n";
}
vtxShaderHeader << "\n";
std::string vtxShaderCode = shaderHeader.str() + std::string(GeometryVtxShader_150);
std::string vtxShaderCode = vtxShaderHeader.str() + std::string(GeometryVtxShader_150);
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++, programFlags.value++)
std::stringstream fragShaderHeader;
if (this->isConservativeDepthSupported || this->isConservativeDepthAMDSupported)
{
fragShaderHeader << "#version 400\n";
// Prioritize using GL_AMD_conservative_depth over GL_ARB_conservative_depth, since AMD drivers
// seem to have problems with GL_ARB_conservative_depth.
fragShaderHeader << ((this->isConservativeDepthAMDSupported) ? "#extension GL_AMD_conservative_depth : require\n" : "#extension GL_ARB_conservative_depth : require\n");
}
else
{
fragShaderHeader << "#version 150\n";
}
fragShaderHeader << "\n";
fragShaderHeader << "#define IS_CONSERVATIVE_DEPTH_SUPPORTED " << ((this->isConservativeDepthSupported || this->isConservativeDepthAMDSupported) ? 1 : 0) << "\n";
fragShaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0\n";
fragShaderHeader << "\n";
for (size_t flagsValue = 0; flagsValue < 128; flagsValue++, programFlags.value++)
{
std::stringstream shaderFlags;
shaderFlags << "#define USE_TEXTURE_SMOOTHING " << ((this->_enableTextureSmoothing) ? 1 : 0) << "\n";
@ -1320,9 +1355,10 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
shaderFlags << "#define ENABLE_FOG " << ((programFlags.EnableFog) ? 1 : 0) << "\n";
shaderFlags << "#define ENABLE_EDGE_MARK " << ((programFlags.EnableEdgeMark) ? 1 : 0) << "\n";
shaderFlags << "#define TOON_SHADING_MODE " << ((programFlags.ToonShadingMode) ? 1 : 0) << "\n";
shaderFlags << "#define NEEDS_DEPTH_EQUALS_TEST " << ((programFlags.NeedsDepthEqualsTest) ? 1 : 0) << "\n";
shaderFlags << "\n";
std::string fragShaderCode = shaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_150);
std::string fragShaderCode = fragShaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_150);
error = this->ShaderProgramCreate(OGLRef.vertexGeometryShaderID,
OGLRef.fragmentGeometryShaderID[flagsValue],
@ -1426,7 +1462,7 @@ void OpenGLRenderer_3_2::DestroyGeometryPrograms()
OGLRef.uboRenderStatesID = 0;
OGLRef.tboPolyStatesID = 0;
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++)
for (size_t flagsValue = 0; flagsValue < 128; flagsValue++)
{
if (OGLRef.programGeometryID[flagsValue] == 0)
{
@ -2151,6 +2187,8 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID);
glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID);
this->_renderNeedsDepthEqualsTest = false;
size_t vertIndexCount = 0;
GLushort *indexPtr = (GLushort *)glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, engine.polylist->count * 6 * sizeof(GLushort), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
OGLPolyStates *polyStates = (OGLPolyStates *)glMapBufferRange(GL_TEXTURE_BUFFER, 0, engine.polylist->count * sizeof(OGLPolyStates), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
@ -2190,15 +2228,16 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
// Get the polygon's facing.
const size_t n = polyType - 1;
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x)
+ (vert[1].y + vert[0].y) * (vert[1].x - vert[0].x)
+ (vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) +
(vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) +
(vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
for (size_t j = 2; j < n; j++)
{
facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x);
}
this->_renderNeedsDepthEqualsTest = this->_renderNeedsDepthEqualsTest || (thePoly.attribute.DepthEqualTest_Enable != 0);
this->_isPolyFrontFacing[i] = (facing < 0);
// Get the texture that is to be attached to this polygon.
@ -2232,6 +2271,7 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
this->_geometryProgramFlags.NeedsDepthEqualsTest = (this->_renderNeedsDepthEqualsTest) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);

View File

@ -296,6 +296,11 @@
AB564915186E6F67002740F4 /* Image_Piano.png in Resources */ = {isa = PBXBuildFile; fileRef = AB56490B186E6F67002740F4 /* Image_Piano.png */; };
AB5785FD17176AFC002C5FC7 /* OpenEmuBase.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */; };
AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB58F32C1364F44B0074C376 /* cocoa_file.mm */; };
AB5B1D4A21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB5B1D4921D1F31E00BF0E0F /* MetalRendererCommonShaders.metal */; };
AB5B1D4B21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB5B1D4921D1F31E00BF0E0F /* MetalRendererCommonShaders.metal */; };
AB5B1D4C21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB5B1D4921D1F31E00BF0E0F /* MetalRendererCommonShaders.metal */; };
AB5B1D4D21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB5B1D4921D1F31E00BF0E0F /* MetalRendererCommonShaders.metal */; };
AB5B1D4E21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB5B1D4921D1F31E00BF0E0F /* MetalRendererCommonShaders.metal */; };
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
AB64987C13ECC73800EE7DD2 /* FileTypeInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */; };
AB68101B187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
@ -2486,6 +2491,8 @@
AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenEmuBase.framework; path = openemu/OpenEmuBase.framework; sourceTree = "<group>"; };
AB58F32B1364F44B0074C376 /* cocoa_file.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cocoa_file.h; sourceTree = "<group>"; };
AB58F32C1364F44B0074C376 /* cocoa_file.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = cocoa_file.mm; sourceTree = "<group>"; };
AB5B1D4821D1F31D00BF0E0F /* MetalRendererCommonShaders.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MetalRendererCommonShaders.h; sourceTree = "<group>"; };
AB5B1D4921D1F31E00BF0E0F /* MetalRendererCommonShaders.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = MetalRendererCommonShaders.metal; sourceTree = "<group>"; };
AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = FileTypeInfo.plist; sourceTree = "<group>"; };
AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_GuitarGrip_Button_Blue_512x512.png; path = images/Icon_GuitarGrip_Button_Blue_512x512.png; sourceTree = "<group>"; };
AB681014187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Green_512x512.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_GuitarGrip_Button_Green_512x512.png; path = images/Icon_GuitarGrip_Button_Green_512x512.png; sourceTree = "<group>"; };
@ -3183,12 +3190,14 @@
ABE5DFE3143FB1DA00835AD8 /* cocoa_videofilter.h */,
AB1B9E611501A78000464647 /* coreaudiosound.h */,
AB28625520AE3E9E00EAED43 /* macOS_driver.h */,
AB5B1D4821D1F31D00BF0E0F /* MetalRendererCommonShaders.h */,
ABD10AE41715FCDD00B5729D /* mic_ext.h */,
ABB24F6C1A81EE92006C1108 /* OGLDisplayOutput_3_2.h */,
ABE6840E189E33D5007FD69C /* OGLDisplayOutput.h */,
AB1B9E621501A78000464647 /* ringbuffer.h */,
ABD104011346652500AF11D1 /* sndOSX.h */,
AB82445E1704AEC400B8EE20 /* utilities.h */,
AB5B1D4921D1F31E00BF0E0F /* MetalRendererCommonShaders.metal */,
ABA6574A14511EC90077E5E9 /* cocoa_cheat.mm */,
ABD104121346652500AF11D1 /* cocoa_core.mm */,
AB58F32C1364F44B0074C376 /* cocoa_file.mm */,
@ -5539,6 +5548,7 @@
ABD104281346653B00AF11D1 /* main.m in Sources */,
AB2ABA411C9F9CFA00173B15 /* rsemaphore.c in Sources */,
ABA6574B14511EC90077E5E9 /* cocoa_cheat.mm in Sources */,
AB5B1D4E21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */,
ABD1041D1346652500AF11D1 /* cocoa_core.mm in Sources */,
AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */,
AB3BF43E1E26289E003E2B24 /* MacMetalDisplayView.mm in Sources */,
@ -5766,6 +5776,7 @@
AB7900C8215B84E50082AE82 /* ftinit.c in Sources */,
AB7900C9215B84E50082AE82 /* vfat.cpp in Sources */,
AB7900CA215B84E50082AE82 /* colorspacehandler.cpp in Sources */,
AB5B1D4C21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */,
AB7900CB215B84E50082AE82 /* videofilter.cpp in Sources */,
AB7900CC215B84E50082AE82 /* WavFile.cpp in Sources */,
AB7900CD215B84E50082AE82 /* wifi.cpp in Sources */,
@ -5878,6 +5889,7 @@
AB7901B3215B84F20082AE82 /* ClientExecutionControl.cpp in Sources */,
AB7901B4215B84F20082AE82 /* deposterize.cpp in Sources */,
AB7901B5215B84F20082AE82 /* ftgasp.c in Sources */,
AB5B1D4D21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */,
AB7901B6215B84F20082AE82 /* ftotval.c in Sources */,
AB7901B7215B84F20082AE82 /* ftdebug.c in Sources */,
AB7901B8215B84F20082AE82 /* ftstroke.c in Sources */,
@ -6221,6 +6233,7 @@
ABFEA82B1BB4EC1100B08C25 /* ftinit.c in Sources */,
AB796D4415CDCBA200C59155 /* vfat.cpp in Sources */,
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */,
AB5B1D4A21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */,
AB796D4515CDCBA200C59155 /* videofilter.cpp in Sources */,
AB796D4615CDCBA200C59155 /* WavFile.cpp in Sources */,
AB796D4715CDCBA200C59155 /* wifi.cpp in Sources */,
@ -6333,6 +6346,7 @@
ABB1C9491F5281AE0004844F /* ClientExecutionControl.cpp in Sources */,
AB301BE01D9C8BCD00246A93 /* deposterize.cpp in Sources */,
ABFEA8211BB4EC1000B08C25 /* ftgasp.c in Sources */,
AB5B1D4B21D1F31E00BF0E0F /* MetalRendererCommonShaders.metal in Sources */,
ABFEA83C1BB4EC1100B08C25 /* ftotval.c in Sources */,
ABFEA8181BB4EC1000B08C25 /* ftdebug.c in Sources */,
ABFEA8541BB4EC1100B08C25 /* ftstroke.c in Sources */,

View File

@ -0,0 +1,29 @@
/*
Copyright (C) 2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _METAL_RENDERER_COMMON_H_
#define _METAL_RENDERER_COMMON_H_
float4 unpack_rgba5551_to_unorm8888(const ushort color16);
ushort pack_unorm8888_to_rgba5551(const float4 inColor);
uchar4 pack_unorm8888_to_rgba6665(const float4 inColor);
uchar4 pack_unorm8888_to_rgba8888(const float4 inColor);
float4 convert_unorm666X_to_unorm8888(const float4 inColor);
#endif // _METAL_RENDERER_COMMON_H_

View File

@ -0,0 +1,56 @@
/*
Copyright (C) 2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include <metal_stdlib>
using namespace metal;
#include "MetalRendererCommonShaders.h"
float4 unpack_rgba5551_to_unorm8888(const ushort color16)
{
return float4((float)((color16 >> 0) & 0x1F) / 31.0f,
(float)((color16 >> 5) & 0x1F) / 31.0f,
(float)((color16 >> 10) & 0x1F) / 31.0f,
(float)(color16 >> 15));
}
ushort pack_unorm8888_to_rgba5551(const float4 inColor)
{
ushort4 color16 = ushort4( (inColor * 31.0f) + 0.1f );
color16.g <<= 5;
color16.b <<= 10;
color16.a = (color16.a < 0.0001) ? 0 : 0x8000;
return (color16.r | color16.g | color16.b | color16.a);
}
uchar4 pack_unorm8888_to_rgba6665(const float4 inColor)
{
return uchar4( (inColor * float4(63.0f, 63.0f, 63.0f, 31.0f)) + 0.1f );
}
uchar4 pack_unorm8888_to_rgba8888(const float4 inColor)
{
return uchar4( (inColor * 255.0f) + 0.1f );
}
float4 convert_unorm666X_to_unorm8888(const float4 inColor)
{
return float4( inColor.rgb * (255.0f/63.0f), 1.0f );
}

View File

@ -129,8 +129,9 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
id<MTLTexture> texHQ4xLUT;
id<MTLTexture> texCurrentHQnxLUT;
MTLSize _fetchThreadsPerGroup;
MTLSize _fetchThreadsPerGroupNative;
MTLSize _fetchThreadGroupsPerGridNative;
MTLSize _fetchThreadsPerGroupCustom;
MTLSize _fetchThreadGroupsPerGridCustom;
MTLSize deposterizeThreadsPerGroup;
MTLSize deposterizeThreadGroupsPerGrid;

View File

@ -67,29 +67,59 @@
commandQueue = [device newCommandQueue];
_fetchCommandQueue = [device newCommandQueue];
defaultLibrary = [device newDefaultLibrary];
_fetch555Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555"] error:nil] retain];
_fetch666Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666"] error:nil] retain];
_fetch888Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch888"] error:nil] retain];
_fetch555ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555ConvertOnly"] error:nil] retain];
_fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain];
deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain];
size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]);
while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
MTLComputePipelineDescriptor *computePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
[computePipelineDesc setThreadGroupSizeIsMultipleOfThreadExecutionWidth:YES];
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"convert_texture_rgb555_to_unorm8888"]];
_fetch555ConvertOnlyPipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"convert_texture_unorm666X_to_unorm8888"]];
_fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"]];
deposterizePipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
#if defined(MAC_OS_X_VERSION_10_13) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_13)
if (@available(macOS 10.13, *))
{
[[[computePipelineDesc buffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable];
[[[computePipelineDesc buffers] objectAtIndexedSubscript:1] setMutability:MTLMutabilityImmutable];
}
#endif
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555"]];
_fetch555Pipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666"]];
_fetch666Pipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"nds_fetch888"]];
_fetch888Pipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
[computePipelineDesc release];
NSUInteger tw = [_fetch555Pipeline threadExecutionWidth];
while ( ((GPU_FRAMEBUFFER_NATIVE_WIDTH % tw) != 0) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
{
tw >>= 1;
}
size_t th = [_fetch555Pipeline maxTotalThreadsPerThreadgroup] / tw;
NSUInteger th = [_fetch555Pipeline maxTotalThreadsPerThreadgroup] / tw;
while ( ((GPU_FRAMEBUFFER_NATIVE_HEIGHT % th) != 0) || (th > GPU_FRAMEBUFFER_NATIVE_HEIGHT) )
{
th >>= 1;
}
_fetchThreadsPerGroup = MTLSizeMake(tw, th, 1);
_fetchThreadsPerGroupNative = MTLSizeMake(tw, th, 1);
_fetchThreadGroupsPerGridNative = MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH / tw,
GPU_FRAMEBUFFER_NATIVE_HEIGHT / th,
1);
_fetchThreadsPerGroupCustom = _fetchThreadsPerGroupNative;
_fetchThreadGroupsPerGridCustom = _fetchThreadGroupsPerGridNative;
deposterizeThreadsPerGroup = _fetchThreadsPerGroup;
deposterizeThreadsPerGroup = _fetchThreadsPerGroupNative;
deposterizeThreadGroupsPerGrid = _fetchThreadGroupsPerGridNative;
MTLRenderPipelineDescriptor *hudPipelineDesc = [[MTLRenderPipelineDescriptor alloc] init];
@ -105,6 +135,18 @@
[hudPipelineDesc setVertexFunction:[defaultLibrary newFunctionWithName:@"hud_vertex"]];
[hudPipelineDesc setFragmentFunction:hudFragmentFunction];
#if defined(MAC_OS_X_VERSION_10_13) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_13)
if (@available(macOS 10.13, *))
{
[[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable];
[[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:1] setMutability:MTLMutabilityImmutable];
[[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:2] setMutability:MTLMutabilityImmutable];
[[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:3] setMutability:MTLMutabilityImmutable];
[[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:4] setMutability:MTLMutabilityImmutable];
[[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:5] setMutability:MTLMutabilityImmutable];
}
#endif
[[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatBGRA8Unorm];
hudPipeline = [[device newRenderPipelineStateWithDescriptor:hudPipelineDesc error:nil] retain];
@ -376,9 +418,22 @@
_fetchPixelBytes = dispInfo.pixelBytes;
const size_t tw = _fetchThreadsPerGroup.width;
const size_t th = _fetchThreadsPerGroup.height;
_fetchThreadGroupsPerGridCustom = MTLSizeMake((w + tw - 1) / tw, (h + th - 1) / th, 1);
NSUInteger tw = [_fetch555Pipeline threadExecutionWidth];
while ( ((w % tw) != 0) || (tw > w) )
{
tw >>= 1;
}
NSUInteger th = [_fetch555Pipeline maxTotalThreadsPerThreadgroup] / tw;
while ( ((h % th) != 0) || (th > h) )
{
th >>= 1;
}
_fetchThreadsPerGroupCustom = MTLSizeMake(tw, th, 1);
_fetchThreadGroupsPerGridCustom = MTLSizeMake(w / tw,
h / th,
1);
id<MTLCommandBuffer> cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:dispInfo.bufferIndex commandBuffer:cb];
@ -461,7 +516,7 @@
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
targetTexPair.main = _texDisplayPostprocessNative[NDSDisplayID_Main][index];
}
@ -470,7 +525,7 @@
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
targetTexPair.main = _texDisplayPostprocessCustom[NDSDisplayID_Main][index];
}
@ -491,7 +546,7 @@
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
targetTexPair.touch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index];
}
@ -500,7 +555,7 @@
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
targetTexPair.touch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index];
}
@ -535,7 +590,7 @@
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
targetTexPair.main = _texDisplayPostprocessNative[NDSDisplayID_Main][index];
}
@ -544,7 +599,7 @@
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
targetTexPair.main = _texDisplayPostprocessCustom[NDSDisplayID_Main][index];
}
@ -557,7 +612,7 @@
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
targetTexPair.touch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index];
}
@ -566,7 +621,7 @@
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
targetTexPair.touch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index];
}
@ -854,104 +909,119 @@
{
id<MTLTexture> currentHQnxLUT = nil;
MTLComputePipelineDescriptor *computePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
[computePipelineDesc setThreadGroupSizeIsMultipleOfThreadExecutionWidth:YES];
switch (filterID)
{
case VideoFilterTypeID_Nearest2X:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_nearest2x"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_nearest2x"]];
break;
case VideoFilterTypeID_Scanline:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_scanline"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_scanline"]];
break;
case VideoFilterTypeID_EPX:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xEPX"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xEPX"]];
break;
case VideoFilterTypeID_EPXPlus:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xEPXPlus"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xEPXPlus"]];
break;
case VideoFilterTypeID_2xSaI:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xSaI"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xSaI"]];
break;
case VideoFilterTypeID_Super2xSaI:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_Super2xSaI"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_Super2xSaI"]];
break;
case VideoFilterTypeID_SuperEagle:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xSuperEagle"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xSuperEagle"]];
break;
case VideoFilterTypeID_LQ2X:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_LQ2x"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_LQ2x"]];
currentHQnxLUT = [sharedData texLQ2xLUT];
break;
case VideoFilterTypeID_LQ2XS:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_LQ2xS"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_LQ2xS"]];
currentHQnxLUT = [sharedData texLQ2xLUT];
break;
case VideoFilterTypeID_HQ2X:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ2x"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ2x"]];
currentHQnxLUT = [sharedData texHQ2xLUT];
break;
case VideoFilterTypeID_HQ2XS:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ2xS"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ2xS"]];
currentHQnxLUT = [sharedData texHQ2xLUT];
break;
case VideoFilterTypeID_HQ3X:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ3x"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ3x"]];
currentHQnxLUT = [sharedData texHQ3xLUT];
break;
case VideoFilterTypeID_HQ3XS:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ3xS"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ3xS"]];
currentHQnxLUT = [sharedData texHQ3xLUT];
break;
case VideoFilterTypeID_HQ4X:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ4x"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ4x"]];
currentHQnxLUT = [sharedData texHQ4xLUT];
break;
case VideoFilterTypeID_HQ4XS:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ4xS"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ4xS"]];
currentHQnxLUT = [sharedData texHQ4xLUT];
break;
case VideoFilterTypeID_2xBRZ:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xBRZ"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xBRZ"]];
break;
case VideoFilterTypeID_3xBRZ:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_3xBRZ"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_3xBRZ"]];
break;
case VideoFilterTypeID_4xBRZ:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_4xBRZ"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_4xBRZ"]];
break;
case VideoFilterTypeID_5xBRZ:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_5xBRZ"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_5xBRZ"]];
break;
case VideoFilterTypeID_6xBRZ:
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_6xBRZ"] error:nil]];
[computePipelineDesc setComputeFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_6xBRZ"]];
break;
case VideoFilterTypeID_None:
default:
[self setPixelScalePipeline:nil];
[computePipelineDesc release];
computePipelineDesc = nil;
break;
}
[sharedData setTexCurrentHQnxLUT:currentHQnxLUT];
if (computePipelineDesc != nil)
{
[self setPixelScalePipeline:[[sharedData device] newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil]];
[computePipelineDesc release];
computePipelineDesc = nil;
}
else
{
[self setPixelScalePipeline:nil];
}
if ([self pixelScalePipeline] != nil)
{
const VideoFilterAttributes vfAttr = VideoFilter::GetAttributesByID(filterID);
@ -971,17 +1041,21 @@
_texDisplayPixelScaler[NDSDisplayID_Main] = [[sharedData device] newTextureWithDescriptor:texDisplayPixelScaleDesc];
_texDisplayPixelScaler[NDSDisplayID_Touch] = [[sharedData device] newTextureWithDescriptor:texDisplayPixelScaleDesc];
size_t tw = GetNearestPositivePOT((uint32_t)[[self pixelScalePipeline] threadExecutionWidth]);
while ( (tw > [[self pixelScalePipeline] threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
NSUInteger tw = [[self pixelScalePipeline] threadExecutionWidth];
while ( ((newScalerWidth % tw) != 0) || (tw > newScalerWidth) )
{
tw >>= 1;
}
const size_t th = [[self pixelScalePipeline] maxTotalThreadsPerThreadgroup] / tw;
NSUInteger th = [[self pixelScalePipeline] maxTotalThreadsPerThreadgroup] / tw;
while ( ((newScalerHeight % th) != 0) || (th > newScalerHeight) )
{
th >>= 1;
}
_pixelScalerThreadsPerGroup = MTLSizeMake(tw, th, 1);
_pixelScalerThreadGroupsPerGrid = MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH / tw,
GPU_FRAMEBUFFER_NATIVE_HEIGHT / th,
_pixelScalerThreadGroupsPerGrid = MTLSizeMake(newScalerWidth / tw,
newScalerHeight / th,
1);
}
else
@ -1049,6 +1123,17 @@
[self setOutputDrawablePipeline:[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil]];
}
#if defined(MAC_OS_X_VERSION_10_13) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_13)
if (@available(macOS 10.13, *))
{
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:1] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:2] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:3] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc fragmentBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable];
}
#endif
[outputPipelineDesc release];
}
@ -1064,6 +1149,17 @@
[outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_vertex"]];
[outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_bilinear"]];
#if defined(MAC_OS_X_VERSION_10_13) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_13)
if (@available(macOS 10.13, *))
{
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:1] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:2] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:3] setMutability:MTLMutabilityImmutable];
[[[outputPipelineDesc fragmentBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable];
}
#endif
[[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatRGBA8Unorm];
outputRGBAPipeline = [[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil] retain];

View File

@ -18,8 +18,11 @@
#include <metal_stdlib>
using namespace metal;
#include "../MetalRendererCommonShaders.h"
#define LANCZOS_FIX(c) max(abs(c), 1e-5)
struct HUDVtx
{
float4 position [[position]];
@ -44,7 +47,6 @@ struct DisplayViewShaderProperties
};
float reduce(const float3 color);
float4 unpack_unorm1555_to_unorm8888(const ushort color16);
float3 color_interpolate_LTE(const float3 pixA, const float3 pixB, const float3 threshold);
float4 bicubic_weight_bspline(const float x);
float4 bicubic_weight_mitchell_netravali(const float x);
@ -89,14 +91,6 @@ bool InterpDiff(const float3 p1, const float3 p2)
return any( yuv > float3(192.0f/255.0f, 28.0f/255.0f, 48.0f/255.0f) );
}
float4 unpack_unorm1555_to_unorm8888(const ushort color16)
{
return float4((float)((color16 >> 0) & 0x1F) / 31.0f,
(float)((color16 >> 5) & 0x1F) / 31.0f,
(float)((color16 >> 10) & 0x1F) / 31.0f,
(float)(color16 >> 16));
}
float3 color_interpolate_LTE(const float3 pixA, const float3 pixB, const float3 threshold)
{
const float3 interpPix = mix(pixA, pixB, 0.5f);
@ -438,12 +432,7 @@ kernel void nds_fetch555(const uint2 position [[thread_position_in_grid]],
{
const uint h = inTexture.get_height();
if ( (position.x > inTexture.get_width() - 1) || (position.y > h - 1) )
{
return;
}
const float4 inColor = unpack_unorm1555_to_unorm8888( (ushort)inTexture.read(position).r );
const float4 inColor = unpack_rgba5551_to_unorm8888( (ushort)inTexture.read(position).r );
float3 outColor = inColor.rgb;
const uint line = uint( (float)position.y / ((float)h / 192.0f) );
@ -460,11 +449,6 @@ kernel void nds_fetch666(const uint2 position [[thread_position_in_grid]],
{
const uint h = inTexture.get_height();
if ( (position.x > inTexture.get_width() - 1) || (position.y > h - 1) )
{
return;
}
const float4 inColor = inTexture.read(position);
float3 outColor = inColor.rgb * float3(255.0f/63.0f);
@ -482,11 +466,6 @@ kernel void nds_fetch888(const uint2 position [[thread_position_in_grid]],
{
const uint h = inTexture.get_height();
if ( (position.x > inTexture.get_width() - 1) || (position.y > h - 1) )
{
return;
}
const float4 inColor = inTexture.read(position);
float3 outColor = inColor.rgb;
@ -496,30 +475,20 @@ kernel void nds_fetch888(const uint2 position [[thread_position_in_grid]],
outTexture.write(float4(outColor, 1.0f), position);
}
kernel void nds_fetch555ConvertOnly(const uint2 position [[thread_position_in_grid]],
const texture2d<ushort, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]])
kernel void convert_texture_rgb555_to_unorm8888(const uint2 position [[thread_position_in_grid]],
const texture2d<ushort, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]])
{
if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) )
{
return;
}
const float4 outColor = unpack_unorm1555_to_unorm8888( (ushort)inTexture.read(position).r );
const float4 outColor = unpack_rgba5551_to_unorm8888( (ushort)inTexture.read(position).r );
outTexture.write(float4(outColor.rgb, 1.0f), position);
}
kernel void nds_fetch666ConvertOnly(const uint2 position [[thread_position_in_grid]],
const texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]])
kernel void convert_texture_unorm666X_to_unorm8888(const uint2 position [[thread_position_in_grid]],
const texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]])
{
if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) )
{
return;
}
const float3 outColor = inTexture.read(position).rgb * float3(255.0f/63.0f);
outTexture.write(float4(outColor, 1.0f), position);
const float4 outColor = convert_unorm666X_to_unorm8888( inTexture.read(position) );
outTexture.write(outColor, position);
}
float3 nds_apply_master_brightness(const float3 inColor, const uchar mode, const float intensity)

View File

@ -244,6 +244,11 @@ Render3D::Render3D()
_textureDeposterizeSrcSurface.Height = _textureDeposterizeDstSurface.Height = 1;
_textureDeposterizeSrcSurface.Pitch = _textureDeposterizeDstSurface.Pitch = 1;
for (size_t i = 0; i < POLYLIST_SIZE; i++)
{
_textureList[i] = NULL;
}
Reset();
}