metal: avoid an extra buffer allocation and GPU data copy in RunCommandQueue, it's not needed. Improves overall performance.

2025-03-02 23:55:39 +00:00 · 2018-11-01 20:24:21 -03:00 · 2018-11-01 20:24:21 -03:00 · 457390fcf8
commit 457390fcf8
parent 4e86dfd8d1
1 changed files with 9 additions and 14 deletions
--- a/src/render/metal/SDL_render_metal.m
+++ b/src/render/metal/SDL_render_metal.m
@ -1073,24 +1073,19 @@ METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *ver

    // !!! FIXME: have a ring of pre-made MTLBuffers we cycle through? How expensive is creation?
    if (vertsize > 0) {
-        id<MTLBuffer> mtlbufvertexstaging = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
-        #if !__has_feature(objc_arc)
-        [mtlbufvertexstaging autorelease];
-        #endif
-        mtlbufvertexstaging.label = @"SDL vertex staging data";
-        SDL_memcpy([mtlbufvertexstaging contents], vertices, vertsize);
-
-        // Move our new vertex buffer from system RAM to GPU memory so any draw calls can use it.
-        mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModePrivate];
+        /* We can memcpy to a shared buffer from the CPU and read it from the GPU
+         * without any extra copying. It's a bit slower on macOS to read shared
+         * data from the GPU than to read managed/private data, but we avoid the
+         * cost of copying the data and the code's simpler. Apple's best
+         * practices guide recommends this approach for streamed vertex data.
+         * TODO: this buffer is also used for constants. Is performance still
+         * good for those, or should we have a managed buffer for them? */
+        mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
        #if !__has_feature(objc_arc)
        [mtlbufvertex autorelease];
        #endif
        mtlbufvertex.label = @"SDL vertex data";
-        id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
-        id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
-        [blitcmd copyFromBuffer:mtlbufvertexstaging sourceOffset:0 toBuffer:mtlbufvertex destinationOffset:0 size:vertsize];
-        [blitcmd endEncoding];
-        [cmdbuffer commit];
+        SDL_memcpy([mtlbufvertex contents], vertices, vertsize);
    }

    // If there's a command buffer here unexpectedly (app requested one?). Commit it so we can start fresh.