MetalDevice: Support multisampling

This commit is contained in:
Stenzek 2023-09-10 16:26:58 +10:00
parent 6fbea12ed3
commit a2f19143da
6 changed files with 254 additions and 25 deletions

View File

@ -0,0 +1,47 @@
# Borrowed from PCSX2.
if(APPLE)
function(add_metal_sources target sources)
if(CMAKE_GENERATOR MATCHES "Xcode")
# If we're generating an xcode project, you can just add the shaders to the main pcsx2 target and xcode will deal with them properly
# This will make sure xcode supplies code completion, etc (if you use a custom command, it won't)
set_target_properties(${target} PROPERTIES
XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO INCLUDE_SOURCE
)
foreach(shader IN LISTS sources)
target_sources(${target} PRIVATE ${shader})
set_source_files_properties(${shader} PROPERTIES LANGUAGE METAL)
endforeach()
else()
function(generateMetallib std triple outputName)
set(MetalShaderOut)
set(flags
-ffast-math
$<$<NOT:$<CONFIG:Release,MinSizeRel>>:-gline-tables-only>
$<$<NOT:$<CONFIG:Release,MinSizeRel>>:-MO>
)
foreach(shader IN LISTS sources)
file(RELATIVE_PATH relativeShader "${CMAKE_SOURCE_DIR}" "${shader}")
set(shaderOut ${CMAKE_CURRENT_BINARY_DIR}/${outputName}/${relativeShader}.air)
list(APPEND MetalShaderOut ${shaderOut})
get_filename_component(shaderDir ${shaderOut} DIRECTORY)
add_custom_command(OUTPUT ${shaderOut}
COMMAND ${CMAKE_COMMAND} -E make_directory ${shaderDir}
COMMAND xcrun metal ${flags} -std=${std} -target ${triple} -o ${shaderOut} -c ${shader}
DEPENDS ${shader}
)
set(metallib ${CMAKE_CURRENT_BINARY_DIR}/${outputName}.metallib)
endforeach()
add_custom_command(OUTPUT ${metallib}
COMMAND xcrun metallib -o ${metallib} ${MetalShaderOut}
DEPENDS ${MetalShaderOut}
)
target_sources(${target} PRIVATE ${metallib})
set_source_files_properties(${metallib} PROPERTIES MACOSX_PACKAGE_LOCATION Resources)
endfunction()
generateMetallib(macos-metal2.0 air64-apple-macos10.13 default)
generateMetallib(macos-metal2.2 air64-apple-macos10.15 Metal22)
generateMetallib(macos-metal2.3 air64-apple-macos11.0 Metal23)
endif()
endfunction()
endif()

View File

@ -565,12 +565,17 @@ bool GPU_HW::CreateBuffers()
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
const u8 samples = static_cast<u8>(m_multisamples);
// Needed for Metal resolve.
const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ?
GPUTexture::Type::RWTexture :
GPUTexture::Type::Texture;
if (!(m_vram_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, samples,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_vram_depth_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, samples,
GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT)) ||
!(m_vram_read_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1,
GPUTexture::Type::Texture, VRAM_RT_FORMAT)) ||
!(m_vram_read_texture =
g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) ||
!(m_display_private_texture = g_gpu_device->CreateTexture(
((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) *
m_resolution_scale,

View File

@ -264,6 +264,8 @@ if(WIN32)
target_link_libraries(util PRIVATE WinPixEventRuntime::WinPixEventRuntime)
endif()
elseif(APPLE)
include(AddMetalSources)
set(MAC_SOURCES
cocoa_tools.h
metal_device.h
@ -272,6 +274,10 @@ elseif(APPLE)
metal_stream_buffer.mm
platform_misc_mac.mm
)
set(METAL_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/metal_shaders.metal"
)
set_property(GLOBAL PROPERTY UTIL_METAL_SOURCES ${METAL_SOURCES})
target_sources(util PRIVATE ${MAC_SOURCES})
find_library(IOK_LIBRARY IOKit REQUIRED)
find_library(METAL_LIBRARY Metal)
@ -286,6 +292,9 @@ endif()
function(add_util_resources target)
if(APPLE)
get_property(UTIL_METAL_SOURCES GLOBAL PROPERTY UTIL_METAL_SOURCES)
add_metal_sources(${target} ${UTIL_METAL_SOURCES})
# Copy MoltenVK into the bundle
unset(MOLTENVK_PATH CACHE)
find_file(MOLTENVK_PATH NAMES

View File

@ -299,6 +299,10 @@ private:
ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; }
void SetFeatures();
bool LoadShaders();
id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name);
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source,
const std::string_view& entry_point);
@ -354,6 +358,10 @@ private:
MetalStreamBuffer m_uniform_buffer;
MetalStreamBuffer m_texture_upload_buffer;
id<MTLLibrary> m_shaders = nil;
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>>
m_resolve_pipelines;
id<MTLCommandBuffer> m_upload_cmdbuf = nil;
id<MTLBlitCommandEncoder> m_upload_encoder = nil;
id<MTLBlitCommandEncoder> m_inline_upload_encoder = nil;

View File

@ -174,6 +174,12 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr
CreateCommandBuffer();
RenderBlankFrame();
if (!LoadShaders())
{
Log_ErrorPrint("Failed to load shaders.");
return false;
}
if (!CreateBuffers())
{
Log_ErrorPrintf("Failed to create buffers.");
@ -198,7 +204,7 @@ void MetalDevice::SetFeatures()
}
m_max_multisamples = 0;
for (u32 multisamples = 1; multisamples < 16; multisamples++)
for (u32 multisamples = 1; multisamples < 16; multisamples *= 2)
{
if (![m_device supportsTextureSampleCount:multisamples])
break;
@ -211,11 +217,71 @@ void MetalDevice::SetFeatures()
m_features.supports_texture_buffers = true;
m_features.texture_buffers_emulated_with_ssbo = true;
m_features.geometry_shaders = false;
m_features.partial_msaa_resolve = true;
m_features.partial_msaa_resolve = false;
m_features.shader_cache = true;
m_features.pipeline_cache = false;
}
bool MetalDevice::LoadShaders()
{
@autoreleasepool
{
auto try_lib = [this](NSString* name) -> id<MTLLibrary> {
NSBundle* bundle = [NSBundle mainBundle];
NSString* path = [bundle pathForResource:name ofType:@"metallib"];
if (path == nil)
{
// Xcode places it alongside the binary.
path = [NSString stringWithFormat:@"%@/%@.metallib", [bundle bundlePath], name];
if (![[NSFileManager defaultManager] fileExistsAtPath:path])
return nil;
}
id<MTLLibrary> lib = [m_device newLibraryWithFile:path error:nil];
if (lib == nil)
return nil;
return [lib retain];
};
if (!(m_shaders = try_lib(@"Metal23")) && !(m_shaders = try_lib(@"Metal22")) &&
!(m_shaders = try_lib(@"Metal21")) && !(m_shaders = try_lib(@"default")))
{
return false;
}
return true;
}
}
id<MTLFunction> MetalDevice::GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name)
{
id<MTLFunction> function = [library newFunctionWithName:name];
return function;
}
id<MTLComputePipelineState> MetalDevice::CreateComputePipeline(id<MTLFunction> function, NSString* name)
{
MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new];
if (name != nil)
[desc setLabel:name];
[desc setComputeFunction:function];
NSError* err = nil;
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
options:MTLPipelineOptionNone
reflection:nil
error:&err];
[desc release];
if (pipeline == nil)
{
LogNSError(err, "Create compute pipeline failed:");
return nil;
}
return pipeline;
}
void MetalDevice::DestroyDevice()
{
WaitForPreviousCommandBuffers();
@ -243,6 +309,17 @@ void MetalDevice::DestroyDevice()
[it.second release];
m_cleanup_objects.clear();
for (auto& it : m_resolve_pipelines)
{
if (it.second != nil)
[it.second release];
}
m_resolve_pipelines.clear();
if (m_shaders != nil)
{
[m_shaders release];
m_shaders = nil;
}
if (m_queue != nil)
{
[m_queue release];
@ -736,7 +813,7 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
// General
const MTLPrimitiveType primitive = primitives[static_cast<u8>(config.primitive)];
desc.rasterSampleCount = config.per_sample_shading ? config.samples : 1;
desc.rasterSampleCount = config.samples;
// Metal-specific stuff
desc.vertexBuffers[0].mutability = MTLMutabilityImmutable;
@ -959,6 +1036,15 @@ std::unique_ptr<GPUTexture> MetalDevice::CreateTexture(u32 width, u32 height, u3
desc.depth = levels;
desc.pixelFormat = pixel_format;
desc.mipmapLevelCount = levels;
if (samples > 1)
{
desc.textureType = (layers > 1) ? MTLTextureType2DMultisampleArray : MTLTextureType2DMultisample;
desc.sampleCount = samples;
}
else if (layers > 1)
{
desc.textureType = MTLTextureType2DArray;
}
switch (type)
{
@ -1339,30 +1425,62 @@ void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d
void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height)
{
#if 0
DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers());
DebugAssert((src_x + width) <= src->GetMipWidth(src_level));
DebugAssert((src_y + height) <= src->GetMipHeight(src_level));
DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
DebugAssert((src_x + width) <= src->GetWidth());
DebugAssert((src_y + height) <= src->GetHeight());
DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
// DX11 can't resolve partial rects.
Assert(src_x == dst_x && src_y == dst_y);
// Only does first level for now..
DebugAssert(dst_level == 0 && dst_layer == 0);
MetalTexture* dst11 = static_cast<MetalTexture*>(dst);
MetalTexture* src11 = static_cast<MetalTexture*>(src);
const GPUTexture::Format src_format = dst->GetFormat();
const GPUTexture::Format dst_format = dst->GetFormat();
id<MTLComputePipelineState> resolve_pipeline = nil;
if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(),
[src_format, dst_format](const auto& it) {
return it.first.first == src_format && it.first.second == dst_format;
});
iter != m_resolve_pipelines.end())
{
resolve_pipeline = iter->second;
}
else
{
// Need to compile it.
@autoreleasepool
{
const bool is_depth = GPUTexture::IsDepthFormat(src_format);
id<MTLFunction> function =
[GetFunctionFromLibrary(m_shaders, is_depth ? @"depthResolveKernel" : @"colorResolveKernel") autorelease];
if (function == nil)
Panic("Failed to get resolve kernel");
src11->CommitClear(m_context.Get());
dst11->CommitClear(m_context.Get());
resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease];
if (resolve_pipeline != nil)
[resolve_pipeline retain];
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline);
}
}
if (resolve_pipeline == nil)
Panic("Failed to get resolve pipeline");
m_context->ResolveSubresource(dst11->GetD3DTexture(), MetalCalcSubresource(dst_level, dst_layer, dst->GetLevels()),
src11->GetD3DTexture(), MetalCalcSubresource(src_level, src_layer, src->GetLevels()),
dst11->GetDXGIFormat());
#else
Panic("Fixme");
#endif
if (InRenderPass())
EndRenderPass();
const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth;
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1);
const MTLSize threadgroupsInGrid =
MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width,
(src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1);
id<MTLComputeCommandEncoder> computeEncoder = [m_render_cmdbuf computeCommandEncoder];
[computeEncoder setComputePipelineState:resolve_pipeline];
[computeEncoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
[computeEncoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
[computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
[computeEncoder endEncoding];
}
void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)

View File

@ -0,0 +1,42 @@
/// A custom resolve kernel that averages color at all sample points.
#include <metal_stdlib>
using namespace metal;
// https://developer.apple.com/documentation/metal/metal_sample_code_library/improving_edge-rendering_quality_with_multisample_antialiasing_msaa?language=objc
kernel void
colorResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
texture2d<float, access::write> resolvedTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
const uint count = multisampledTexture.get_num_samples();
float4 resolved_color = 0;
for (uint i = 0; i < count; ++i)
{
resolved_color += multisampledTexture.read(gid, i);
}
resolved_color /= count;
resolvedTexture.write(resolved_color, gid);
}
kernel void
depthResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
texture2d<float, access::write> resolvedTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
const uint count = multisampledTexture.get_num_samples();
float resolved_depth = 0;
for (uint i = 0; i < count; ++i)
{
resolved_depth += multisampledTexture.read(gid, i).r;
}
resolved_depth /= count;
resolvedTexture.write(float4(resolved_depth, 0, 0, 0), gid);
}