diff options
Diffstat (limited to 'external/optick/optick_gpu.vulkan.cpp')
-rw-r--r-- | external/optick/optick_gpu.vulkan.cpp | 365 |
1 files changed, 365 insertions, 0 deletions
diff --git a/external/optick/optick_gpu.vulkan.cpp b/external/optick/optick_gpu.vulkan.cpp new file mode 100644 index 0000000..6d6f29d --- /dev/null +++ b/external/optick/optick_gpu.vulkan.cpp @@ -0,0 +1,365 @@ +#include "optick.config.h" + +#if USE_OPTICK +#if OPTICK_ENABLE_GPU_VULKAN +#include <vulkan/vulkan.h> + +#include "optick_core.h" +#include "optick_gpu.h" + +#define OPTICK_VK_CHECK(args) do { VkResult __hr = args; OPTICK_ASSERT(__hr == VK_SUCCESS, "Failed check"); (void)__hr; } while(false); + +namespace Optick +{ + class GPUProfilerVulkan : public GPUProfiler + { + protected: + struct Frame + { + VkCommandBuffer commandBuffer; + VkFence fence; + Frame() : commandBuffer(VK_NULL_HANDLE), fence(VK_NULL_HANDLE) {} + }; + + struct NodePayload + { + VkDevice device; + VkPhysicalDevice physicalDevice; + VkQueue queue; + VkQueryPool queryPool; + VkCommandPool commandPool; + + array<Frame, NUM_FRAMES_DELAY> frames; + + NodePayload() : device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {} + ~NodePayload(); + }; + vector<NodePayload*> nodePayloads; + + void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count); + void WaitForFrame(uint64_t frameNumber); + + public: + GPUProfilerVulkan(); + ~GPUProfilerVulkan(); + + void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount); + void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp); + + + // Interface implementation + ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override; + + void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override + { + QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp); + } + + void Flip(void* swapChain) override; + }; + + void InitGpuVulkan(void* vkDevices, void* vkPhysicalDevices, void* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues) + { + GPUProfilerVulkan* gpuProfiler = Memory::New<GPUProfilerVulkan>(); + gpuProfiler->InitDevice((VkDevice*)vkDevices, (VkPhysicalDevice*)vkPhysicalDevices, (VkQueue*)vkQueues, cmdQueuesFamily, numQueues); + Core::Get().InitGPUProfiler(gpuProfiler); + } + + GPUProfilerVulkan::GPUProfilerVulkan() + { + } + + void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount) + { + VkQueryPoolCreateInfo queryPoolCreateInfo; + queryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + queryPoolCreateInfo.pNext = 0; + queryPoolCreateInfo.flags = 0; + queryPoolCreateInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + queryPoolCreateInfo.queryCount = MAX_QUERIES_COUNT + 1; + + VkCommandPoolCreateInfo commandPoolCreateInfo; + commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + commandPoolCreateInfo.pNext = 0; + commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + + nodes.resize(nodeCount); + nodePayloads.resize(nodeCount); + + VkResult r; + for (uint32_t i = 0; i < nodeCount; ++i) + { + VkPhysicalDeviceProperties properties = { 0 }; + vkGetPhysicalDeviceProperties(physicalDevices[i], &properties); + GPUProfiler::InitNode(properties.deviceName, i); + + NodePayload* nodePayload = Memory::New<NodePayload>(); + nodePayloads[i] = nodePayload; + nodePayload->device = devices[i]; + nodePayload->physicalDevice = physicalDevices[i]; + nodePayload->queue = cmdQueues[i]; + + r = vkCreateQueryPool(devices[i], &queryPoolCreateInfo, 0, &nodePayload->queryPool); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + + commandPoolCreateInfo.queueFamilyIndex = cmdQueuesFamily[i]; + r = vkCreateCommandPool(nodePayload->device, &commandPoolCreateInfo, 0, &nodePayload->commandPool); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + + for (uint32_t j = 0; j < nodePayload->frames.size(); ++j) + { + Frame& frame = nodePayload->frames[j]; + + VkCommandBufferAllocateInfo allocInfo; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.pNext = 0; + allocInfo.commandBufferCount = 1; + allocInfo.commandPool = nodePayload->commandPool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + r = vkAllocateCommandBuffers(nodePayload->device, &allocInfo, &frame.commandBuffer); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + + VkFenceCreateInfo fenceCreateInfo; + fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceCreateInfo.pNext = 0; + fenceCreateInfo.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT; + r = vkCreateFence(nodePayload->device, &fenceCreateInfo, 0, &frame.fence); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + if (j == 0) + { + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(frame.commandBuffer, &commandBufferBeginInfo); + vkCmdResetQueryPool(frame.commandBuffer, nodePayload->queryPool, 0, MAX_QUERIES_COUNT); + vkEndCommandBuffer(frame.commandBuffer); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &frame.commandBuffer; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + vkQueueSubmit(nodePayload->queue, 1, &submitInfo, frame.fence); + vkWaitForFences(nodePayload->device, 1, &frame.fence, 1, (uint64_t)-1); + vkResetCommandBuffer(frame.commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + } + } + } + } + + void GPUProfilerVulkan::QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp) + { + if (currentState == STATE_RUNNING) + { + uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); + vkCmdWriteTimestamp(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index); + } + } + + void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count) + { + if (count) + { + Node* node = nodes[currentNode]; + + NodePayload* payload = nodePayloads[currentNode]; + + OPTICK_VK_CHECK(vkGetQueryPoolResults(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT)); + vkCmdResetQueryPool(commandBuffer, payload->queryPool, startIndex, count); + + // Convert GPU timestamps => CPU Timestamps + for (uint32_t index = startIndex; index < startIndex + count; ++index) + *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); + } + } + + void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait) + { + OPTICK_EVENT(); + + int r = VK_SUCCESS; + do + { + NodePayload& payload = *nodePayloads[currentNode]; + r = vkWaitForFences(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30); + } while (r != VK_SUCCESS); + } + + void GPUProfilerVulkan::Flip(void* /*swapChain*/) + { + OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug); + + std::lock_guard<std::recursive_mutex> lock(updateLock); + + if (currentState == STATE_STARTING) + currentState = STATE_RUNNING; + + if (currentState == STATE_RUNNING) + { + Node& node = *nodes[currentNode]; + NodePayload& payload = *nodePayloads[currentNode]; + + uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY; + uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY; + + QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex]; + QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex]; + + VkCommandBuffer commandBuffer = payload.frames[currentFrameIndex].commandBuffer; + VkFence fence = payload.frames[currentFrameIndex].fence; + VkDevice device = payload.device; + VkQueue queue = payload.queue; + + vkWaitForFences(device, 1, &fence, 1, (uint64_t)-1); + + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + OPTICK_VK_CHECK(vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo)); + vkResetFences(device, 1, &fence); + + if (EventData* frameEvent = currentFrame.frameEvent) + QueryTimestamp(commandBuffer, &frameEvent->finish); + + // Generate GPU Frame event for the next frame + EventData& event = AddFrameEvent(); + QueryTimestamp(commandBuffer, &event.start); + QueryTimestamp(commandBuffer, &AddFrameTag().timestamp); + nextFrame.frameEvent = &event; + + OPTICK_VK_CHECK(vkEndCommandBuffer(commandBuffer)); + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + OPTICK_VK_CHECK(vkQueueSubmit(queue, 1, &submitInfo, fence)); + + uint32_t queryBegin = currentFrame.queryIndexStart; + uint32_t queryEnd = node.queryIndex; + + if (queryBegin != (uint32_t)-1) + { + currentFrame.queryIndexCount = queryEnd - queryBegin; + } + + // Preparing Next Frame + // Try resolve timestamps for the current frame + if (nextFrame.queryIndexStart != (uint32_t)-1) + { + uint32_t startIndex = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; + uint32_t finishIndex = (startIndex + nextFrame.queryIndexCount) % MAX_QUERIES_COUNT; + + if (startIndex < finishIndex) + { + ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex); + } + else if (startIndex > finishIndex) + { + ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex); + ResolveTimestamps(commandBuffer, 0, finishIndex); + } + } + + nextFrame.queryIndexStart = queryEnd; + nextFrame.queryIndexCount = 0; + } + + ++frameNumber; + } + + GPUProfiler::ClockSynchronization GPUProfilerVulkan::GetClockSynchronization(uint32_t nodeIndex) + { + GPUProfiler::ClockSynchronization clock; + + NodePayload& node = *nodePayloads[nodeIndex]; + Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY]; + + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VkCommandBuffer CB = currentFrame.commandBuffer; + VkDevice Device = node.device; + VkFence Fence = currentFrame.fence; + + vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1); + vkResetFences(Device, 1, &Fence); + vkResetCommandBuffer(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + vkBeginCommandBuffer(CB, &commandBufferBeginInfo); + vkCmdResetQueryPool(CB, nodePayloads[nodeIndex]->queryPool, 0, 1); + vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0); + vkEndCommandBuffer(CB); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &CB; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + vkQueueSubmit(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence); + vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1); + + clock.timestampGPU = 0; + vkGetQueryPoolResults(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT); + clock.timestampCPU = GetHighPrecisionTime(); + clock.frequencyCPU = GetHighPrecisionFrequency(); + + VkPhysicalDeviceProperties Properties; + vkGetPhysicalDeviceProperties(nodePayloads[nodeIndex]->physicalDevice, &Properties); + clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod); + + return clock; + } + + GPUProfilerVulkan::NodePayload::~NodePayload() + { + vkDestroyCommandPool(device, commandPool, nullptr); + vkDestroyQueryPool(device, queryPool, nullptr); + } + + GPUProfilerVulkan::~GPUProfilerVulkan() + { + WaitForFrame(frameNumber - 1); + + for (NodePayload* payload : nodePayloads) + { + for (Frame& frame : payload->frames) + { + vkDestroyFence(payload->device, frame.fence, nullptr); + vkFreeCommandBuffers(payload->device, payload->commandPool, 1, &frame.commandBuffer); + } + + Memory::Delete(payload); + } + + nodePayloads.clear(); + } +} +#else +#include "optick_common.h" +namespace Optick +{ + void InitGpuVulkan(void* /*devices*/, void* /*physicalDevices*/, void* /*cmdQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/) + { + OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!"); + } +} +#endif //OPTICK_ENABLE_GPU_D3D12 +#endif //USE_OPTICK
\ No newline at end of file |