Add two new counters in MVKQueuePerformance for async queue submit wait times

author: SRSaunders <82544213+SRSaunders@users.noreply.github.com> 2024-03-19 00:09:11 -0400
committer: SRSaunders <82544213+SRSaunders@users.noreply.github.com> 2024-03-19 00:09:11 -0400
commit: 10810f41d061362e87d8a4b9d0ab30e7bf97a390 (patch)
tree: fb42010b3e89cca71ff96e8013be218b36aab5da
parent: 08c1ad705a9224d8d8b2c272b512cc8684fc759e (diff)
download: moltenvk-10810f41d061362e87d8a4b9d0ab30e7bf97a390.tar.gz
3 files changed, 21 insertions, 9 deletions
diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h
index 5f63019f..a2183e56 100644
--- a/MoltenVK/MoltenVK/API/mvk_private_api.h
+++ b/MoltenVK/MoltenVK/API/mvk_private_api.h
@@ -408,9 +408,11 @@ typedef struct {
 typedef struct {
 	MVKPerformanceTracker retrieveMTLCommandBuffer;     /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */
 	MVKPerformanceTracker commandBufferEncoding;        /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */
+	MVKPerformanceTracker waitSubmitCommandBuffers;		/** Wait time from initial call to starting the submit and encoding of all VkCommandBuffers in an asynchronous vkQueueSubmit() operation,  in milliseconds. */
 	MVKPerformanceTracker submitCommandBuffers;         /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */
 	MVKPerformanceTracker mtlCommandBufferExecution;    /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */
 	MVKPerformanceTracker retrieveCAMetalDrawable;      /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */
+	MVKPerformanceTracker waitPresentSwapchains;		/** Wait time from initial call to starting presentation of the swapchains in an asynchronous vkQueuePresentKHR() operation,  in milliseconds. */
 	MVKPerformanceTracker presentSwapchains;            /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */
 	MVKPerformanceTracker frameInterval;                /** Frame presentation interval (1000/FPS), in milliseconds. */
 } MVKQueuePerformance;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index 0adc557f..6459ebca 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -195,7 +195,7 @@ public:
 	 *
 	 * Upon completion of this function, no further calls should be made to this instance.
 	 */
-	virtual VkResult execute() = 0;
+	virtual VkResult execute(uint64_t startTime) = 0;
 
 	MVKQueueSubmission(MVKQueue* queue,
 					   uint32_t waitSemaphoreInfoCount,
@@ -238,7 +238,7 @@ typedef struct MVKCommandBufferSubmitInfo {
 class MVKQueueCommandBufferSubmission : public MVKQueueSubmission {
 
 public:
-	VkResult execute() override;
+	VkResult execute(uint64_t startTime) override;
 
 	MVKQueueCommandBufferSubmission(MVKQueue* queue, 
 									const VkSubmitInfo2* pSubmit,
@@ -302,7 +302,7 @@ protected:
 class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission {
 
 public:
-	VkResult execute() override;
+	VkResult execute(uint64_t startTime) override;
 
 	MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 									 const VkPresentInfoKHR* pPresentInfo);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 40301695..e190e905 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -69,7 +69,7 @@ void MVKQueue::propagateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); }
 
 // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased.
 // This is critical for apps that don't use standard OS autoreleasing runloop threading.
-static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } }
+static inline VkResult execute(MVKQueueSubmission* qSubmit, uint64_t startTime) { @autoreleasepool { return qSubmit->execute(startTime); } }
 
 // Executes the submmission, either immediately, or by dispatching to an execution queue.
 // Submissions to the execution queue are wrapped in a dedicated autoreleasepool.
@@ -83,11 +83,13 @@ VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
 	// Extract result before submission to avoid race condition with early destruction
 	// Submit regardless of config result, to ensure submission semaphores and fences are signalled.
 	// The submissions will ensure a misconfiguration will be safe to execute.
+	MVKDevice* mvkDev = getDevice();
+	uint64_t startTime = mvkDev->getPerformanceTimestamp();
 	VkResult rslt = qSubmit->getConfigurationResult();
 	if (_execQueue) {
-		dispatch_async(_execQueue, ^{ execute(qSubmit); } );
+		dispatch_async(_execQueue, ^{ execute(qSubmit, startTime); } );
 	} else {
-		rslt = execute(qSubmit);
+		rslt = execute(qSubmit, startTime);
 	}
 	return rslt;
 }
@@ -442,13 +444,17 @@ MVKQueueSubmission::~MVKQueueSubmission() {
 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission
 
-VkResult MVKQueueCommandBufferSubmission::execute() {
+VkResult MVKQueueCommandBufferSubmission::execute(uint64_t startTime) {
 
 	_queue->_submissionCaptureScope->beginScope();
 
 	// If using encoded semaphore waiting, do so now.
 	for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); }
 
+	// Wait time from an async vkQueueSubmit() call to starting submit and encoding of the command buffers
+	MVKDevice* mvkDev = getDevice();
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitSubmitCommandBuffers, startTime);
+
 	// Submit each command buffer.
 	submitCommandBuffers();
 
@@ -678,8 +684,8 @@ MVKQueueFullCommandBufferSubmission<N>::MVKQueueFullCommandBufferSubmission(MVKQ
 // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting.
 // If the semaphores are not encodable, wait on them inline after presenting.
 // The semaphores know what to do.
-VkResult MVKQueuePresentSurfaceSubmission::execute() {
-	// MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. 
+VkResult MVKQueuePresentSurfaceSubmission::execute(uint64_t startTime) {
+	// MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early.
 	// Although testing could not determine which objects were being lost, queue present MTLCommandBuffers
 	// are used only once per frame, and retain so few objects, that blanket retention is still performant.
 	id<MTLCommandBuffer> mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true);
@@ -689,6 +695,10 @@ VkResult MVKQueuePresentSurfaceSubmission::execute() {
 		ws.encodeWait(nil);			// Inline semaphore waits
 	}
 
+	// Wait time from an async vkQueuePresentKHR() call to starting presentation of the swapchains
+	MVKDevice* mvkDev = getDevice();
+	mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.waitPresentSwapchains, startTime);
+
 	for (int i = 0; i < _presentInfo.size(); i++ ) {
 		setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]));
 	}
author	SRSaunders <82544213+SRSaunders@users.noreply.github.com>	2024-03-19 00:09:11 -0400
committer	SRSaunders <82544213+SRSaunders@users.noreply.github.com>	2024-03-19 00:09:11 -0400
commit	10810f41d061362e87d8a4b9d0ab30e7bf97a390 (patch)
tree	fb42010b3e89cca71ff96e8013be218b36aab5da
parent	08c1ad705a9224d8d8b2c272b512cc8684fc759e (diff)
download	moltenvk-10810f41d061362e87d8a4b9d0ab30e7bf97a390.tar.gz