OGL: Re-implement async shader compiling

This commit is contained in:
Stenzek 2018-02-25 17:56:09 +10:00
parent dec0c3bce8
commit f9c829c7f7
16 changed files with 136 additions and 33 deletions

View File

@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = true;
IDXGIFactory2* factory;
IDXGIAdapter* ad;

View File

@ -46,6 +46,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;
// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();

View File

@ -46,10 +46,11 @@ OGLPipeline::~OGLPipeline()
std::unique_ptr<OGLPipeline> OGLPipeline::Create(const AbstractPipelineConfig& config)
{
const PipelineProgram* program =
ProgramShaderCache::GetPipelineProgram(static_cast<const OGLShader*>(config.vertex_shader),
static_cast<const OGLShader*>(config.geometry_shader),
static_cast<const OGLShader*>(config.pixel_shader));
const PipelineProgram* program = ProgramShaderCache::GetPipelineProgram(
static_cast<const GLVertexFormat*>(config.vertex_format),
static_cast<const OGLShader*>(config.vertex_shader),
static_cast<const OGLShader*>(config.geometry_shader),
static_cast<const OGLShader*>(config.pixel_shader));
if (!program)
return nullptr;

View File

@ -51,9 +51,10 @@ static std::unique_ptr<StreamBuffer> s_buffer;
static int num_failures = 0;
static GLuint CurrentProgram = 0;
ProgramShaderCache::PipelineProgramMap ProgramShaderCache::pipelineprograms;
std::mutex ProgramShaderCache::pipelineprogramlock;
ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs;
std::mutex ProgramShaderCache::s_pipeline_program_lock;
static std::string s_glsl_header = "";
static thread_local bool s_is_shared_context = false;
static std::string GetGLSLVersionString()
{
@ -506,8 +507,8 @@ void ProgramShaderCache::Shutdown()
s_last_VAO = 0;
// All pipeline programs should have been released.
_dbg_assert_(VIDEO, pipelineprograms.empty());
pipelineprograms.clear();
_dbg_assert_(VIDEO, s_pipeline_programs.empty());
s_pipeline_programs.clear();
}
void ProgramShaderCache::CreateAttributelessVAO()
@ -548,21 +549,28 @@ void ProgramShaderCache::InvalidateLastProgram()
CurrentProgram = 0;
}
const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* vertex_shader,
const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader,
const OGLShader* pixel_shader)
{
PipelineProgramKey key = {vertex_shader, geometry_shader, pixel_shader};
{
std::lock_guard<std::mutex> guard(pipelineprogramlock);
auto iter = pipelineprograms.find(key);
if (iter != pipelineprograms.end())
std::lock_guard<std::mutex> guard(s_pipeline_program_lock);
auto iter = s_pipeline_programs.find(key);
if (iter != s_pipeline_programs.end())
{
iter->second->reference_count++;
return iter->second.get();
}
}
// We temporarily change the vertex array to the pipeline's vertex format.
// This can prevent the NVIDIA OpenGL driver from recompiling on first use.
GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
if (s_is_shared_context || vao != s_last_VAO)
glBindVertexArray(vao);
std::unique_ptr<PipelineProgram> prog = std::make_unique<PipelineProgram>();
prog->key = key;
@ -581,6 +589,11 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
// Link program.
prog->shader.SetProgramBindings(false);
glLinkProgram(prog->shader.glprogid);
// Restore VAO binding after linking.
if (!s_is_shared_context && vao != s_last_VAO)
glBindVertexArray(s_last_VAO);
if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {}))
{
prog->shader.Destroy();
@ -588,9 +601,9 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
}
// Lock to insert. A duplicate program may have been created in the meantime.
std::lock_guard<std::mutex> guard(pipelineprogramlock);
auto iter = pipelineprograms.find(key);
if (iter != pipelineprograms.end())
std::lock_guard<std::mutex> guard(s_pipeline_program_lock);
auto iter = s_pipeline_programs.find(key);
if (iter != s_pipeline_programs.end())
{
// Destroy this program, and use the one which was created first.
prog->shader.Destroy();
@ -601,19 +614,25 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
// Set program variables on the shader which will be returned.
// This is only needed for drivers which don't support binding layout.
prog->shader.SetProgramVariables();
auto ip = pipelineprograms.emplace(key, std::move(prog));
// If this is a shared context, ensure we sync before we return the program to
// the main thread. If we don't do this, some driver can lock up (e.g. AMD).
if (s_is_shared_context)
glFinish();
auto ip = s_pipeline_programs.emplace(key, std::move(prog));
return ip.first->second.get();
}
void ProgramShaderCache::ReleasePipelineProgram(const PipelineProgram* prog)
{
auto iter = pipelineprograms.find(prog->key);
_assert_(iter != pipelineprograms.end() && prog == iter->second.get());
auto iter = s_pipeline_programs.find(prog->key);
_assert_(iter != s_pipeline_programs.end() && prog == iter->second.get());
if (--iter->second->reference_count == 0)
{
iter->second->shader.Destroy();
pipelineprograms.erase(iter);
s_pipeline_programs.erase(iter);
}
}
@ -783,4 +802,55 @@ void ProgramShaderCache::CreateHeader()
v > GlslEs300 ? "precision highp sampler2DMS;" : "",
v >= GlslEs310 ? "precision highp image2DArray;" : "");
}
bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
{
std::unique_ptr<cInterfaceBase> context = GLInterface->CreateSharedContext();
if (!context)
{
PanicAlert("Failed to create shared context for shader compiling.");
return false;
}
*param = context.release();
return true;
}
bool SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param)
{
cInterfaceBase* context = static_cast<cInterfaceBase*>(param);
if (!context->MakeCurrent())
return false;
s_is_shared_context = true;
if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart)
{
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
{
glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX);
}
else
{
if (GLExtensions::Version() >= 310)
{
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(65535);
}
else
{
glEnableClientState(GL_PRIMITIVE_RESTART_NV);
glPrimitiveRestartIndexNV(65535);
}
}
}
return true;
}
void SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param)
{
cInterfaceBase* context = static_cast<cInterfaceBase*>(param);
context->ClearCurrent();
delete context;
}
} // namespace OGL

View File

@ -11,6 +11,7 @@
#include <unordered_map>
#include "Common/GL/GLUtil.h"
#include "VideoCommon/AsyncShaderCompiler.h"
namespace OGL
{
@ -87,7 +88,8 @@ public:
static void Shutdown();
static void CreateHeader();
static const PipelineProgram* GetPipelineProgram(const OGLShader* vertex_shader,
static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader,
const OGLShader* pixel_shader);
static void ReleasePipelineProgram(const PipelineProgram* prog);
@ -99,8 +101,8 @@ private:
static void CreateAttributelessVAO();
static PipelineProgramMap pipelineprograms;
static std::mutex pipelineprogramlock;
static PipelineProgramMap s_pipeline_programs;
static std::mutex s_pipeline_program_lock;
static u32 s_ubo_buffer_size;
static s32 s_ubo_align;
@ -110,4 +112,12 @@ private:
static GLuint s_last_VAO;
};
class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler
{
protected:
bool WorkerThreadInitMainThread(void** param) override;
bool WorkerThreadInitWorkerThread(void* param) override;
void WorkerThreadExit(void* param) override;
};
} // namespace OGL

View File

@ -81,8 +81,8 @@ static bool s_efbCacheIsCleared = false;
static std::vector<u32>
s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor
static void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const char* message, const void* userParam)
void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
const char* message, const void* userParam)
{
const char* s_source;
const char* s_type;
@ -677,6 +677,10 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsPaletteConversion &&
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
// Background compiling is supported only when shared contexts aren't broken.
g_Config.backend_info.bSupportsBackgroundCompiling =
!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION);
if (g_ogl_config.bSupportsDebug)
{
if (GLExtensions::Supports("GL_KHR_debug"))
@ -1695,4 +1699,9 @@ void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* u
glDispatchCompute(groups_x, groups_y, groups_z);
ProgramShaderCache::InvalidateLastProgram();
}
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<SharedContextAsyncShaderCompiler>();
}
}

View File

@ -139,6 +139,8 @@ public:
void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size,
u32 groups_x, u32 groups_y, u32 groups_z) override;
std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler() override;
private:
void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc,
const TargetRectangle& targetPixelRc, const void* data);

View File

@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsCopyToVram = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;
// aamodes
g_Config.backend_info.AAModes = {1};

View File

@ -235,6 +235,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsBitfield = true; // Assumed support.
config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support.
config->backend_info.bSupportsPostProcessing = true; // Assumed support.
config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support.
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.
config->backend_info.bSupportsGeometryShaders = false; // Dependent on features.
config->backend_info.bSupportsGSInstancing = false; // Dependent on features.

View File

@ -102,8 +102,6 @@ static BugInfo m_known_bugs[] = {
true},
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN,
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN,
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKEN_MSAA_CLEAR, -1.0,
-1.0, true},
{API_VULKAN, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN,

View File

@ -252,8 +252,10 @@ enum Bug
// the negated value to a temporary variable then using that in the bitwise op.
BUG_BROKEN_BITWISE_OP_NEGATION,
// Bug: Shaders are recompiled on the main thread after being previously compiled on
// a worker thread on Mesa i965.
// BUG: The GPU shader code appears to be context-specific on Mesa/i965.
// This means that if we compiled the ubershaders asynchronously, they will be recompiled
// on the main thread the first time they are used, causing stutter. For now, disable
// asynchronous compilation on Mesa i965.
// Started version: -1
// Ended Version: -1
BUG_SHARED_CONTEXT_SHADER_COMPILATION,

View File

@ -1027,3 +1027,8 @@ bool Renderer::UseVertexDepthRange() const
// in the vertex shader.
return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f;
}
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<VideoCommon::AsyncShaderCompiler>();
}

View File

@ -28,6 +28,7 @@
#include "Common/Flag.h"
#include "Common/MathUtil.h"
#include "VideoCommon/AVIDump.h"
#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/FPSCounter.h"
#include "VideoCommon/RenderState.h"
@ -189,6 +190,8 @@ public:
void ResizeSurface(int new_width, int new_height);
bool UseVertexDepthRange() const;
virtual std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler();
virtual void Shutdown();
// Drawing utility shaders.

View File

@ -27,7 +27,7 @@ bool ShaderCache::Initialize()
m_efb_multisamples = g_ActiveConfig.iMultisamples;
// Create the async compiler, and start the worker threads.
m_async_shader_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
m_async_shader_compiler = g_renderer->CreateAsyncShaderCompiler();
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads());
// Load shader and UID caches.

View File

@ -187,8 +187,7 @@ static u32 GetNumAutoShaderCompilerThreads()
u32 VideoConfig::GetShaderCompilerThreads() const
{
// videocommon shader cache is currently broken on OGL, needs multiple contexts.
if (backend_info.api_type == APIType::OpenGL)
if (!backend_info.bSupportsBackgroundCompiling)
return 0;
if (iShaderCompilerThreads >= 0)
@ -199,8 +198,7 @@ u32 VideoConfig::GetShaderCompilerThreads() const
u32 VideoConfig::GetShaderPrecompilerThreads() const
{
// videocommon shader cache is currently broken on OGL, needs multiple contexts.
if (backend_info.api_type == APIType::OpenGL)
if (!backend_info.bSupportsBackgroundCompiling)
return 0;
if (iShaderPrecompilerThreads >= 0)

View File

@ -227,6 +227,7 @@ struct VideoConfig final
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsBPTCTextures;
bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES
bool bSupportsBackgroundCompiling;
} backend_info;
// Utility