diff options
author | SRSaunders <82544213+SRSaunders@users.noreply.github.com> | 2024-03-19 00:09:11 -0400 |
---|---|---|
committer | SRSaunders <82544213+SRSaunders@users.noreply.github.com> | 2024-03-19 00:09:11 -0400 |
commit | 10810f41d061362e87d8a4b9d0ab30e7bf97a390 (patch) | |
tree | fb42010b3e89cca71ff96e8013be218b36aab5da | |
parent | 08c1ad705a9224d8d8b2c272b512cc8684fc759e (diff) | |
download | moltenvk-10810f41d061362e87d8a4b9d0ab30e7bf97a390.tar.gz |
Add two new counters in MVKQueuePerformance for async queue submit wait times
-rw-r--r-- | MoltenVK/MoltenVK/API/mvk_private_api.h | 2 | ||||
-rw-r--r-- | MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 6 | ||||
-rw-r--r-- | MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 22 |
3 files changed, 21 insertions, 9 deletions
diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 5f63019f..a2183e56 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -408,9 +408,11 @@ typedef struct { typedef struct { MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */ MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */ + MVKPerformanceTracker waitSubmitCommandBuffers; /** Wait time from initial call to starting the submit and encoding of all VkCommandBuffers in an asynchronous vkQueueSubmit() operation, in milliseconds. */ MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */ MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */ MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */ + MVKPerformanceTracker waitPresentSwapchains; /** Wait time from initial call to starting presentation of the swapchains in an asynchronous vkQueuePresentKHR() operation, in milliseconds. */ MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */ MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS), in milliseconds. */ } MVKQueuePerformance; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index 0adc557f..6459ebca 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -195,7 +195,7 @@ public: * * Upon completion of this function, no further calls should be made to this instance. */ - virtual VkResult execute() = 0; + virtual VkResult execute(uint64_t startTime) = 0; MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreInfoCount, @@ -238,7 +238,7 @@ typedef struct MVKCommandBufferSubmitInfo { class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { public: - VkResult execute() override; + VkResult execute(uint64_t startTime) override; MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo2* pSubmit, @@ -302,7 +302,7 @@ protected: class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission { public: - VkResult execute() override; + VkResult execute(uint64_t startTime) override; MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 40301695..e190e905 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -69,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); } // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased. // This is critical for apps that don't use standard OS autoreleasing runloop threading. -static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } } +static inline VkResult execute(MVKQueueSubmission* qSubmit, uint64_t startTime) { @autoreleasepool { return qSubmit->execute(startTime); } } // Executes the submmission, either immediately, or by dispatching to an execution queue. // Submissions to the execution queue are wrapped in a dedicated autoreleasepool. @@ -83,11 +83,13 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) { // Extract result before submission to avoid race condition with early destruction // Submit regardless of config result, to ensure submission semaphores and fences are signalled. // The submissions will ensure a misconfiguration will be safe to execute. + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); VkResult rslt = qSubmit->getConfigurationResult(); if (_execQueue) { - dispatch_async(_execQueue, ^{ execute(qSubmit); } ); + dispatch_async(_execQueue, ^{ execute(qSubmit, startTime); } ); } else { - rslt = execute(qSubmit); + rslt = execute(qSubmit, startTime); } return rslt; } @@ -442,13 +444,17 @@ MVKQueueSubmission::~MVKQueueSubmission() { #pragma mark - #pragma mark MVKQueueCommandBufferSubmission -VkResult MVKQueueCommandBufferSubmission::execute() { +VkResult MVKQueueCommandBufferSubmission::execute(uint64_t startTime) { _queue->_submissionCaptureScope->beginScope(); // If using encoded semaphore waiting, do so now. for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); } + // Wait time from an async vkQueueSubmit() call to starting submit and encoding of the command buffers + MVKDevice* mvkDev = getDevice(); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, startTime); + // Submit each command buffer. submitCommandBuffers(); @@ -678,8 +684,8 @@ MVKQueueFullCommandBufferSubmission<N>::MVKQueueFullCommandBufferSubmission(MVKQ // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting. // If the semaphores are not encodable, wait on them inline after presenting. // The semaphores know what to do. -VkResult MVKQueuePresentSurfaceSubmission::execute() { - // MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. +VkResult MVKQueuePresentSurfaceSubmission::execute(uint64_t startTime) { + // MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. // Although testing could not determine which objects were being lost, queue present MTLCommandBuffers // are used only once per frame, and retain so few objects, that blanket retention is still performant. id<MTLCommandBuffer> mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true); @@ -689,6 +695,10 @@ VkResult MVKQueuePresentSurfaceSubmission::execute() { ws.encodeWait(nil); // Inline semaphore waits } + // Wait time from an async vkQueuePresentKHR() call to starting presentation of the swapchains + MVKDevice* mvkDev = getDevice(); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, startTime); + for (int i = 0; i < _presentInfo.size(); i++ ) { setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i])); } |