RealityKit blinking magenta artifacts in post-processor due to race condition

3 weeks ago 27
ARTICLE AD BOX

I am working on a "glowing effect" post processor. Basically, it has 4 passes:

render selected entities (that I want to apply glow effect) to a "mask texture"

blur the mask texture horizontally

blur the mask texture vertically

composite the double-blurred mask with the original texture

It worked well on my large screen phones (e.g. iPhone 15 Pro):

enter image description here

However, it shows a grid of magenta square artifacts on my iPhone 13 mini:

enter image description here

My post processor code looks like this:

public func processFrame(context: ARView.PostProcessContext) -> Bool { guard let commandBuffer = commandQueue.makeCommandBuffer() else { return false } // Pass 1: Render selected entities to mask texture let entityPassDescriptor: MTLRenderPassDescriptor = { let descriptor = MTLRenderPassDescriptor() descriptor.colorAttachments[0].texture = entityMaskTexture descriptor.colorAttachments[0].loadAction = .clear descriptor.colorAttachments[0].storeAction = .store descriptor.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 1) return descriptor }() if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: entityPassDescriptor) { encoder.setRenderPipelineState(entityPipeline) encoder.setCullMode(.back) encoder.setFrontFacing(.counterClockwise) renderEntitiesToMask(encoder: encoder, context: context) encoder.endEncoding() } else { return false } // Pass 2: Blur horizontal let texelSize = SIMD2<Float>(1.0 / Float(width), 1.0 / Float(height)) let blurHPassDescriptor: MTLRenderPassDescriptor = { let descriptor = MTLRenderPassDescriptor() descriptor.colorAttachments[0].texture = blurTexture1 descriptor.colorAttachments[0].loadAction = .clear descriptor.colorAttachments[0].storeAction = .store return descriptor }() if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: blurHPassDescriptor) { encoder.setRenderPipelineState(blurHPipeline) encoder.setFragmentTexture(entityMaskTexture, index: 0) var blurParams = SIMD3<Float>(texelSize.x, texelSize.y, blurRadius) encoder.setFragmentBytes(&blurParams, length: MemoryLayout<SIMD3<Float>>.size, index: 0) encoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6) encoder.endEncoding() } else { return false } // Pass 3: Blur vertical let blurVPassDescriptor: MTLRenderPassDescriptor = { let descriptor = MTLRenderPassDescriptor() descriptor.colorAttachments[0].texture = blurTexture2 descriptor.colorAttachments[0].loadAction = .clear descriptor.colorAttachments[0].storeAction = .store return descriptor }() if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: blurVPassDescriptor) { encoder.setRenderPipelineState(blurVPipeline) encoder.setFragmentTexture(blurTexture1, index: 0) var blurParams = SIMD3<Float>(texelSize.x, texelSize.y, blurRadius) encoder.setFragmentBytes(&blurParams, length: MemoryLayout<SIMD3<Float>>.size, index: 0) encoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6) encoder.endEncoding() } else { return false } // Pass 4: Composite with glow color let targetPixelFormat = context.targetColorTexture.pixelFormat let compositePipeline = getOrCreateCompositePipeline(pixelFormat: targetPixelFormat) let compositePassDescriptor: MTLRenderPassDescriptor = { let descriptor = MTLRenderPassDescriptor() descriptor.colorAttachments[0].texture = context.targetColorTexture descriptor.colorAttachments[0].loadAction = .load descriptor.colorAttachments[0].storeAction = .store return descriptor }() if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: compositePassDescriptor) { encoder.setRenderPipelineState(compositePipeline) encoder.setFragmentTexture(context.sourceColorTexture, index: 0) encoder.setFragmentTexture(blurTexture2, index: 1) // Blurred mask encoder.setFragmentTexture(entityMaskTexture, index: 2) // Original unblurred entity mask encoder.setFragmentBytes(&glowColorVector, length: MemoryLayout<SIMD4<Float>>.size, index: 0) encoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6) encoder.endEncoding() } else { return false } // have to manually commit, since it's our custom commandBuffer commandBuffer.commit() return true }

Since it's related to screen size, I suspected it was because of race condition where RealityKit's post processor trying to access the final texture that's either not ready, or already be modified by the next frame . So I added this one line of code at the end:

// this fixes the issue commandBuffer.commit() commandBuffer.waitUntilCompleted() // <- added this return true

And it solves the problem. However, this obviously blocks the CPU while waiting for the GPU, which isn't ideal (Not the end of the world since it's render thread rather than main thread, but still i'd prefer not to block unless when it's absolutely necessary).

So I tried using semaphore and commandBuffer's completion handler:

// this doesn't work, possibly due to race between 2 command buffers frameSemaphore.wait() guard let commandBuffer = commandQueue.makeCommandBuffer() else { frameSemaphore.signal() return false } ... commandBuffer.addCompletedHandler { _ in self.frameSemaphore.signal() } commandBuffer.commit()

Surprisingly, this results in the same artifact. So I suspect there's a race between my custom commandBuffer and RealityKit's internal commandBuffer (aka context.commandBuffer).

However, when I use context.commandBuffer rather than my own custom command buffer, I got a crash:

Execution of the command buffer was aborted due to an error during execution. Caused GPU Timeout Error (00000002:kIOGPUCommandBufferCallbackErrorTimeout) assertion failure: '0' (execute:line 79) User enqueued/committed custom post processing command buffer.

With lots of back-and-forth with ChatGPT, it seems that we can't use context.commandBuffer, which only works for single-pass effects. For multi-pass pipeline, we have to use our own command buffer to control execution order and ensure each pass completes before the next begins (I am new to RealityKit/Metal so I am not sure whether this is true).

So I tried using my custom commandBuffer for the first 3 passes, and use context.commandBuffer for the 4th pass, and it worked magically without using waitUntilCompleted.

// this also works, but feels hacky with 2 buffers guard let intermediateBuffer = commandQueue.makeCommandBuffer() else { return false } // use intermediateBuffer for the first 3 passes // ... // then use finalBuffer for the 4th pass let finalBuffer = context.commandBuffer

While this works, it feels a bit hacky, and error prone since I have to remember the correct command buffer to use to avoid race condition. It is also hard for me to be concern that this avoided race condition completely (it could be just this particular iPhone 13 mini and this particular code setup, very hard to tell). I wonder if there's a better solution?

If you are interested in reproducing it, below is my metal file (I can upload the whole project too if you like, but just any strawman idea is also welcome)

#include <metal_stdlib> using namespace metal; struct GlowVertexOut { float4 position [[position]]; float2 uv; }; // Vertex shader for full-screen quad vertex GlowVertexOut glow_vertex_shader(uint vertexID [[vertex_id]]) { GlowVertexOut out; // Generate full-screen quad const float2 positions[6] = { float2(-1.0, -1.0), float2( 1.0, -1.0), float2(-1.0, 1.0), float2(-1.0, 1.0), float2( 1.0, -1.0), float2( 1.0, 1.0) }; float2 pos = positions[vertexID]; out.position = float4(pos, 0.0, 1.0); out.uv = pos * 0.5 + 0.5; out.uv.y = 1.0 - out.uv.y; // Flip Y return out; } // MARK: - Entity Mask Rendering struct GlowEntityVertexOut { float4 position [[position]]; }; // Vertex shader for rendering entities to mask // Takes vertex positions from buffer (extracted from MeshResource.contents) vertex GlowEntityVertexOut glow_entity_mask_vertex(uint vertexID [[vertex_id]], constant float4x4& mvpMatrix [[buffer(0)]], constant float3* vertices [[buffer(1)]]) { GlowEntityVertexOut out; float3 position = vertices[vertexID]; out.position = mvpMatrix * float4(position, 1.0); return out; } // Fragment shader for entity mask - outputs white for selected entities fragment float glow_entity_mask_fragment() { return 1.0; // White = selected entity } // MARK: - Gaussian Blur // Shared Gaussian blur weights (sum = 1.0 for proper normalization) // {0.2270270270, 0.1945945946, 0.1216216216, 0.0540540541, 0.0162162162}; // However, the gaussian blur weights doesn't look good (too transparent). constant float kGaussianWeights[5] = {0.8, 0.7, 0.5, 0.2, 0.0162162162}; // Base offsets multiplied by blur radius constant float kBaseOffsets[5] = {0.0, 1.0, 2.0, 3.0, 4.0}; // Gaussian blur horizontal pass // Takes single-channel mask (r8) and outputs blurred mask fragment float4 glow_blur_horizontal(GlowVertexOut in [[stage_in]], texture2d<float> inputTexture [[texture(0)]], constant float3& blurParams [[buffer(0)]]) { constexpr sampler blurSampler(mag_filter::linear, min_filter::linear, address::clamp_to_edge); float2 texelSize = blurParams.xy; float blurRadius = blurParams.z; // Sample center pixel float result = inputTexture.sample(blurSampler, in.uv).r * kGaussianWeights[0]; // Sample neighboring pixels (both directions) for (int i = 1; i < 5; i++) { float2 offset = float2(kBaseOffsets[i] * blurRadius * texelSize.x, 0.0); result += inputTexture.sample(blurSampler, in.uv + offset).r * kGaussianWeights[i]; result += inputTexture.sample(blurSampler, in.uv - offset).r * kGaussianWeights[i]; } // Return blurred result (only red channel used) return float4(result, 0.0, 0.0, 1.0); } // Gaussian blur vertical pass fragment float4 glow_blur_vertical(GlowVertexOut in [[stage_in]], texture2d<float> inputTexture [[texture(0)]], constant float3& blurParams [[buffer(0)]]) { constexpr sampler blurSampler(mag_filter::linear, min_filter::linear, address::clamp_to_edge); float2 texelSize = blurParams.xy; float blurRadius = blurParams.z; // Sample center pixel float result = inputTexture.sample(blurSampler, in.uv).r * kGaussianWeights[0]; // Sample neighboring pixels (both directions) for (int i = 1; i < 5; i++) { float2 offset = float2(0.0, kBaseOffsets[i] * blurRadius * texelSize.y); result += inputTexture.sample(blurSampler, in.uv + offset).r * kGaussianWeights[i]; result += inputTexture.sample(blurSampler, in.uv - offset).r * kGaussianWeights[i]; } // Return blurred result (only red channel used) return float4(result, 0.0, 0.0, 1.0); } // Final composite shader // Blends the blurred entity mask with the original scene fragment float4 glow_composite(GlowVertexOut in [[stage_in]], texture2d<float> colorTexture [[texture(0)]], texture2d<float> blurredMaskTexture [[texture(1)]], texture2d<float> entityMaskTexture [[texture(2)]], constant float4& glowColor [[buffer(0)]]) { constexpr sampler textureSampler(mag_filter::linear, min_filter::linear); constexpr sampler sharpSampler(mag_filter::nearest, min_filter::nearest); float4 originalColor = colorTexture.sample(textureSampler, in.uv); float4 blurredMask = blurredMaskTexture.sample(textureSampler, in.uv); float entityMask = entityMaskTexture.sample(sharpSampler, in.uv).r; float blurredAlpha = blurredMask.r; // If entityMask == 1, we're ON the object itself, don't apply glow if (entityMask == 1) { return originalColor; } // Apply glow only around edges, using alpha to control intensity float glowAmount = blurredAlpha * glowColor.a; // If there's a border, blend glow color if (glowAmount > 0.001) { // Blend glow color over original color based on glowAmount float3 finalRGB = mix(originalColor.rgb, glowColor.rgb, glowAmount); // Keep alpha at least as high as original (don't make opaque areas transparent) float finalAlpha = max(originalColor.a, glowAmount); return float4(finalRGB, finalAlpha); } return originalColor; }
Read Entire Article