Start moving GPU feature compatibility decisions to startup

(and a single, cache-friendly flag field)
This commit is contained in:
Henrik Rydgard 2015-09-05 19:58:47 +02:00
parent 86a02fed61
commit a71be3f75f
8 changed files with 107 additions and 53 deletions

View File

@ -156,7 +156,7 @@ ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
return REPLACE_ALPHA_YES;
} else {
if (gl_extensions.ARB_blend_func_extended) {
if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) {
return REPLACE_ALPHA_DUALSOURCE;
} else {
return REPLACE_ALPHA_NO;
@ -285,7 +285,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend) {
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
// We can't technically do this correctly (due to clamping) without reading the dst color.
// Using a copy isn't accurate either, though, when there's overlap.
if (gl_extensions.ANY_shader_framebuffer_fetch)
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)
return !allowShaderBlend ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
@ -344,14 +344,14 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend) {
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
// Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap.
if (gl_extensions.ANY_shader_framebuffer_fetch)
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)
return !allowShaderBlend ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
} else {
// This means dst alpha/color is used in the src factor.
// Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?)
// We will just hope that doubling alpha for the dst factor will not clamp too badly.
if (gl_extensions.ANY_shader_framebuffer_fetch)
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)
return !allowShaderBlend ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return REPLACE_BLEND_2X_ALPHA;
}
@ -426,14 +426,14 @@ void ComputeFragmentShaderID(ShaderID *id) {
id0 |= (doTextureAlpha & 1) << 5; // rgb or rgba
id0 |= (gstate_c.flipTexture & 1) << 6;
if (gstate_c.needShaderTexClamp) {
bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0;
// 3 bits total.
id0 |= 1 << 7;
id0 |= gstate.isTexCoordClampedS() << 8;
id0 |= gstate.isTexCoordClampedT() << 9;
id0 |= (textureAtOffset & 1) << 10;
}
if (gstate_c.needShaderTexClamp) {
bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0;
// 3 bits total.
id0 |= 1 << 7;
id0 |= gstate.isTexCoordClampedS() << 8;
id0 |= gstate.isTexCoordClampedT() << 9;
id0 |= (textureAtOffset & 1) << 10;
}
}
id0 |= (lmode & 1) << 11;
@ -456,7 +456,7 @@ void ComputeFragmentShaderID(ShaderID *id) {
id0 |= (enableColorDoubling & 1) << 23;
// 2 bits
id0 |= (stencilToAlpha) << 24;
if (stencilToAlpha != REPLACE_ALPHA_NO) {
// 4 bits
id0 |= ReplaceAlphaWithStencilType() << 26;
@ -504,7 +504,7 @@ void GenerateFragmentShader(char *buffer) {
#if defined(USING_GLES2)
// Let's wait until we have a real use for this.
// ES doesn't support dual source alpha :(
if (gl_extensions.GLES3) {
if (gstate_c.featureFlags & GPU_SUPPORTS_GLSL_ES_300) {
WRITE(p, "#version 300 es\n"); // GLSL ES 3.0
fragColor0 = "fragColor0";
texture = "texture";
@ -525,16 +525,18 @@ void GenerateFragmentShader(char *buffer) {
highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false;
highpTexcoord = highpFog;
if (gl_extensions.EXT_shader_framebuffer_fetch) {
WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n");
lastFragData = "gl_LastFragData[0]";
} else if (gl_extensions.NV_shader_framebuffer_fetch) {
// GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop.
WRITE(p, "#extension GL_NV_shader_framebuffer_fetch : require\n");
lastFragData = "gl_LastFragData[0]";
} else if (gl_extensions.ARM_shader_framebuffer_fetch) {
WRITE(p, "#extension GL_ARM_shader_framebuffer_fetch : require\n");
lastFragData = "gl_LastFragColorARM";
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
if (gl_extensions.EXT_shader_framebuffer_fetch) {
WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n");
lastFragData = "gl_LastFragData[0]";
} else if (gl_extensions.NV_shader_framebuffer_fetch) {
// GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop.
WRITE(p, "#extension GL_NV_shader_framebuffer_fetch : require\n");
lastFragData = "gl_LastFragData[0]";
} else if (gl_extensions.ARM_shader_framebuffer_fetch) {
WRITE(p, "#extension GL_ARM_shader_framebuffer_fetch : require\n");
lastFragData = "gl_LastFragColorARM";
}
}
WRITE(p, "precision lowp float;\n");
@ -611,7 +613,7 @@ void GenerateFragmentShader(char *buffer) {
if (doTexture)
WRITE(p, "uniform sampler2D tex;\n");
if (!gstate.isModeClear() && replaceBlend > REPLACE_BLEND_STANDARD) {
if (!gl_extensions.ANY_shader_framebuffer_fetch && replaceBlend == REPLACE_BLEND_COPY_FBO) {
if (!(gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) && replaceBlend == REPLACE_BLEND_COPY_FBO) {
if (!texelFetch) {
WRITE(p, "uniform vec2 u_fbotexSize;\n");
}
@ -962,7 +964,7 @@ void GenerateFragmentShader(char *buffer) {
if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
// If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
if (gl_extensions.ANY_shader_framebuffer_fetch) {
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
WRITE(p, " lowp vec4 destColor = %s;\n", lastFragData);
} else if (!texelFetch) {
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", texture);

View File

@ -1128,24 +1128,6 @@ void FramebufferManager::CopyDisplayToOutput() {
}
}
inline bool FramebufferManager::ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const {
#ifndef USING_GLES2
bool useCPU = g_Config.iRenderingMode == FB_READFBOMEMORY_CPU;
// We might get here if hackForce04154000Download_ is hit.
// Some cards or drivers seem to always dither when downloading a framebuffer to 16-bit.
// This causes glitches in games that expect the exact values.
// It has not been experienced on NVIDIA cards, so those are left using the GPU (which is faster.)
if (g_Config.iRenderingMode == FB_BUFFERED_MODE) {
if (gl_extensions.gpuVendor != GPU_VENDOR_NVIDIA || gl_extensions.ver[0] < 3) {
useCPU = true;
}
}
return useCPU;
#else
return true;
#endif
}
void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) {
PROFILE_THIS_SCOPE("gpu-readback");
#ifndef USING_GLES2
@ -1212,7 +1194,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
nvfb->colorDepth = FBO_8888;
break;
}
if (ShouldDownloadUsingCPU(vfb)) {
if (gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD)) {
nvfb->colorDepth = vfb->colorDepth;
}
@ -1309,12 +1291,12 @@ void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int
bool useNV = false;
#ifndef USING_GLES2
if (gl_extensions.FBO_ARB) {
if (gstate_c.Supports(GPU_SUPPORTS_FBO_ARB)) {
useNV = false;
useBlit = true;
}
#else
if (gl_extensions.GLES3 || gl_extensions.NV_framebuffer_blit) {
if (gl_extensions.GLES3 || (gstate_c..Supports(GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT)) {
useNV = !gl_extensions.GLES3;
useBlit = true;
}
@ -1497,7 +1479,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) {
GLubyte *packed = 0;
bool unbind = false;
const u8 nextPBO = (currentPBO_ + 1) % MAX_PBO;
const bool useCPU = ShouldDownloadUsingCPU(vfb);
const bool useCPU = gstate_c.Supports(GPU_PREFER_CPU_DOWNLOAD);
// We'll prepare two PBOs to switch between readying and reading
if (!pixelBufObj_) {
@ -1541,7 +1523,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) {
if (vfb) {
int pixelType, pixelSize, pixelFormat, align;
bool reverseOrder = (gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD);
bool reverseOrder = gstate_c.Supports(GPU_PREFER_REVERSE_COLOR_ORDER);
switch (vfb->format) {
// GL_UNSIGNED_INT_8_8_8_8 returns A B G R (little-endian, tested in Nvidia card/x86 PC)
// GL_UNSIGNED_BYTE returns R G B A in consecutive bytes ("big-endian"/not treated as 32-bit value)

View File

@ -145,8 +145,6 @@ private:
void SetNumExtraFBOs(int num);
inline bool ShouldDownloadUsingCPU(const VirtualFramebuffer *vfb) const;
#ifndef USING_GLES2
void PackFramebufferAsync_(VirtualFramebuffer *vfb);
#endif

View File

@ -396,6 +396,7 @@ GLES_GPU::CommandInfo GLES_GPU::cmdInfo_[256];
GLES_GPU::GLES_GPU()
: resized_(false) {
UpdateVsyncInterval(true);
CheckGPUFeatures();
shaderManager_ = new ShaderManager();
transformDraw_.SetShaderManager(shaderManager_);
@ -463,6 +464,57 @@ GLES_GPU::~GLES_GPU() {
glstate.SetVSyncInterval(0);
}
// Take the raw GL extension and versioning data and turn into feature flags.
void GLES_GPU::CheckGPUFeatures() {
u32 features = 0;
if (gl_extensions.ARB_blend_func_extended /*|| gl_extensions.EXT_blend_func_extended*/)
features |= GPU_SUPPORTS_DUALSOURCE_BLEND;
#ifdef USING_GLES2
if (gl_extensions.GLES3)
features |= GPU_SUPPORTS_GLSL_ES_300;
#else
if (gl_extensions.VersionGEThan(3, 3, 0))
features |= GPU_SUPPORTS_GLSL_330;
#endif
// Framebuffer fetch appears to be buggy at least on Tegra 3 devices. So we blacklist it.
// Tales of Destiny 2 has been reported to display green.
if (gl_extensions.EXT_shader_framebuffer_fetch || gl_extensions.NV_shader_framebuffer_fetch || gl_extensions.ARM_shader_framebuffer_fetch) {
features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
// Blacklist Tegra 3, doesn't work very well.
if (strstr(gl_extensions.model, "NVIDIA Tegra 3") != 0) {
features &= ~GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
}
}
if (gl_extensions.FBO_ARB || gl_extensions.IsGLES) {
features |= GPU_SUPPORTS_FBO_ARB;
}
#ifndef USING_GLES2
bool useCPU = g_Config.iRenderingMode == FB_READFBOMEMORY_CPU;
// We might get here if hackForce04154000Download_ is hit.
// Some cards or drivers seem to always dither when downloading a framebuffer to 16-bit.
// This causes glitches in games that expect the exact values.
// It has not been experienced on NVIDIA cards, so those are left using the GPU (which is faster.)
if (g_Config.iRenderingMode == FB_BUFFERED_MODE) {
if (gl_extensions.gpuVendor != GPU_VENDOR_NVIDIA || gl_extensions.ver[0] < 3) {
useCPU = true;
}
}
#else
useCPU = true;
#endif
if (useCPU) {
features |= GPU_PREFER_CPU_DOWNLOAD;
}
if ((gl_extensions.gpuVendor == GPU_VENDOR_NVIDIA) || (gl_extensions.gpuVendor == GPU_VENDOR_AMD))
features |= GPU_PREFER_REVERSE_COLOR_ORDER;
gstate_c.featureFlags = features;
}
// Let's avoid passing nulls into snprintf().
static const char *GetGLStringAlways(GLenum name) {
const GLubyte *value = glGetString(name);

View File

@ -36,6 +36,9 @@ class GLES_GPU : public GPUCommon {
public:
GLES_GPU();
~GLES_GPU();
void CheckGPUFeatures();
void InitClear() override;
void Reinitialize() override;
void PreExecuteOp(u32 op, u32 diff) override;

View File

@ -103,7 +103,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans
glBindAttribLocation(program, ATTR_COLOR1, "color1");
#ifndef USING_GLES2
if (gl_extensions.ARB_blend_func_extended) {
if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) {
// Dual source alpha
glBindFragDataLocationIndexed(program, 0, 0, "fragColor0");
glBindFragDataLocationIndexed(program, 0, 1, "fragColor1");

View File

@ -168,7 +168,7 @@ static inline bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margi
}
bool TransformDrawEngine::ApplyShaderBlending() {
if (gl_extensions.ANY_shader_framebuffer_fetch) {
if (gstate_c.featureFlags & GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) {
return true;
}

View File

@ -442,7 +442,24 @@ enum TextureChangeReason {
TEXCHANGE_PARAMSONLY = 0x02,
};
#define FLAG_BIT(x) (1 << x)
enum {
GPU_SUPPORTS_DUALSOURCE_BLEND = FLAG_BIT(0),
GPU_SUPPORTS_GLSL_ES_300 = FLAG_BIT(1),
GPU_SUPPORTS_GLSL_330 = FLAG_BIT(2),
GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT = FLAG_BIT(10),
GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH = FLAG_BIT(20),
GPU_SUPPORTS_FBO_ARB = FLAG_BIT(25),
GPU_PREFER_CPU_DOWNLOAD = FLAG_BIT(30),
GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31),
};
struct GPUStateCache {
bool Supports(int flag) { return (featureFlags & flag) != 0; }
u32 featureFlags;
u32 vertexAddr;
u32 indexAddr;
u32 offsetAddr;