Add engine extensions for PerfSDK integration
https://github.com/FlaxEngine/PerfSDK
This commit is contained in:
@@ -6,6 +6,23 @@ using FlaxEditor.GUI;
|
||||
using FlaxEngine;
|
||||
using FlaxEngine.GUI;
|
||||
|
||||
namespace FlaxEngine
|
||||
{
|
||||
partial class ProfilerGPU
|
||||
{
|
||||
/// <summary>
|
||||
/// Delegate for profiler event click callback. Can be used by external profiling tools integration inside Editor.
|
||||
/// </summary>
|
||||
/// <param name="e">Event data.</param>
|
||||
public delegate void EventDelegate(ref Event e);
|
||||
|
||||
/// <summary>
|
||||
/// Called when the user double-clicks a GPU profiler event in Editor to open it. Can be used by external profiling tools integration inside Editor.
|
||||
/// </summary>
|
||||
public static EventDelegate OpenEventEditor;
|
||||
}
|
||||
}
|
||||
|
||||
namespace FlaxEditor.Windows.Profiler
|
||||
{
|
||||
/// <summary>
|
||||
@@ -32,8 +49,8 @@ namespace FlaxEditor.Windows.Profiler
|
||||
Offsets = Margin.Zero,
|
||||
Parent = this,
|
||||
};
|
||||
|
||||
// Chart
|
||||
|
||||
// Charts
|
||||
_drawTimeCPU = new SingleChart
|
||||
{
|
||||
Title = "Draw (CPU)",
|
||||
@@ -44,7 +61,6 @@ namespace FlaxEditor.Windows.Profiler
|
||||
Parent = mainPanel,
|
||||
};
|
||||
_drawTimeCPU.SelectedSampleChanged += OnSelectedSampleChanged;
|
||||
|
||||
_drawTimeGPU = new SingleChart
|
||||
{
|
||||
Title = "Draw (GPU)",
|
||||
@@ -54,7 +70,7 @@ namespace FlaxEditor.Windows.Profiler
|
||||
Parent = mainPanel,
|
||||
};
|
||||
_drawTimeGPU.SelectedSampleChanged += OnSelectedSampleChanged;
|
||||
|
||||
|
||||
var panel = new Panel(ScrollBars.Vertical)
|
||||
{
|
||||
AnchorPreset = AnchorPresets.StretchAll,
|
||||
@@ -216,6 +232,8 @@ namespace FlaxEditor.Windows.Profiler
|
||||
}
|
||||
control.Bounds = new Rectangle(x, e.Depth * Timeline.Event.DefaultHeight, width, Timeline.Event.DefaultHeight - 1);
|
||||
control.Name = name;
|
||||
control.Tag = e;
|
||||
control.EventDoubleClick = OnEventDoubleClick;
|
||||
control.TooltipText = string.Format("{0}, {1} ms", name, ((int)(e.Time * 10000.0) / 10000.0f));
|
||||
control.Parent = parent;
|
||||
|
||||
@@ -354,10 +372,11 @@ namespace FlaxEditor.Windows.Profiler
|
||||
}
|
||||
else
|
||||
{
|
||||
row = new Row
|
||||
row = new ClickableRow
|
||||
{
|
||||
Values = new object[6],
|
||||
BackgroundColors = new Color[6],
|
||||
RowDoubleClick = OnRowDoubleClick,
|
||||
};
|
||||
for (int k = 0; k < row.BackgroundColors.Length; k++)
|
||||
row.BackgroundColors[k] = Color.Transparent;
|
||||
@@ -388,6 +407,25 @@ namespace FlaxEditor.Windows.Profiler
|
||||
row.Visible = e.Depth < 3;
|
||||
row.BackgroundColor = i % 2 == 1 ? rowColor2 : Color.Transparent;
|
||||
row.Parent = _table;
|
||||
row.Tag = e;
|
||||
}
|
||||
}
|
||||
|
||||
private void OnEventDoubleClick(Timeline.Event control)
|
||||
{
|
||||
if (ProfilerGPU.OpenEventEditor != null)
|
||||
{
|
||||
var e = (ProfilerGPU.Event)control.Tag;
|
||||
ProfilerGPU.OpenEventEditor(ref e);
|
||||
}
|
||||
}
|
||||
|
||||
private void OnRowDoubleClick(ClickableRow row)
|
||||
{
|
||||
if (ProfilerGPU.OpenEventEditor != null)
|
||||
{
|
||||
var e = (ProfilerGPU.Event)row.Tag;
|
||||
ProfilerGPU.OpenEventEditor(ref e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// Copyright (c) Wojciech Figat. All rights reserved.
|
||||
|
||||
using System;
|
||||
using FlaxEngine;
|
||||
using FlaxEngine.GUI;
|
||||
|
||||
@@ -42,6 +43,38 @@ namespace FlaxEditor.Windows.Profiler
|
||||
private Color _color;
|
||||
private string _name;
|
||||
private float _nameLength = -1;
|
||||
private bool _leftClick;
|
||||
private bool _isRightDown;
|
||||
|
||||
/// <summary>
|
||||
/// The double click event.
|
||||
/// </summary>
|
||||
public Action DoubleClick;
|
||||
|
||||
/// <summary>
|
||||
/// The left mouse button click event.
|
||||
/// </summary>
|
||||
public Action LeftClick;
|
||||
|
||||
/// <summary>
|
||||
/// The right mouse button click event.
|
||||
/// </summary>
|
||||
public Action RightClick;
|
||||
|
||||
/// <summary>
|
||||
/// The double click event.
|
||||
/// </summary>
|
||||
public Action<Event> EventDoubleClick;
|
||||
|
||||
/// <summary>
|
||||
/// The left mouse button click event.
|
||||
/// </summary>
|
||||
public Action<Event> EventLeftClick;
|
||||
|
||||
/// <summary>
|
||||
/// The right mouse button click event.
|
||||
/// </summary>
|
||||
public Action<Event> EventRightClick;
|
||||
|
||||
/// <summary>
|
||||
/// The default height of the event.
|
||||
@@ -94,6 +127,68 @@ namespace FlaxEditor.Windows.Profiler
|
||||
Render2D.PopClip();
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override bool OnMouseDown(Float2 location, MouseButton button)
|
||||
{
|
||||
if (InteractBounds.Contains(ref location))
|
||||
{
|
||||
if (button == MouseButton.Left)
|
||||
_leftClick = true;
|
||||
else if (button == MouseButton.Right)
|
||||
_isRightDown = true;
|
||||
Focus();
|
||||
}
|
||||
|
||||
return base.OnMouseDown(location, button);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override bool OnMouseUp(Float2 location, MouseButton button)
|
||||
{
|
||||
if (button == MouseButton.Left && _leftClick && InteractBounds.Contains(ref location))
|
||||
{
|
||||
_leftClick = false;
|
||||
LeftClick?.Invoke();
|
||||
EventLeftClick?.Invoke(this);
|
||||
return true;
|
||||
}
|
||||
if (button == MouseButton.Right && _isRightDown && InteractBounds.Contains(ref location))
|
||||
{
|
||||
_isRightDown = false;
|
||||
RightClick?.Invoke();
|
||||
EventRightClick?.Invoke(this);
|
||||
return true;
|
||||
}
|
||||
|
||||
return base.OnMouseUp(location, button);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override bool OnMouseDoubleClick(Float2 location, MouseButton button)
|
||||
{
|
||||
if (button == MouseButton.Left && InteractBounds.Contains(ref location))
|
||||
{
|
||||
_leftClick = false;
|
||||
_isRightDown = false;
|
||||
DoubleClick?.Invoke();
|
||||
EventDoubleClick?.Invoke(this);
|
||||
return true;
|
||||
}
|
||||
|
||||
return base.OnMouseDoubleClick(location, button);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override void OnMouseLeave()
|
||||
{
|
||||
_leftClick = false;
|
||||
_isRightDown = false;
|
||||
|
||||
base.OnMouseLeave();
|
||||
}
|
||||
|
||||
private Rectangle InteractBounds => new Rectangle(Float2.Zero, Mathf.Max(Width, 4), Height);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -357,8 +357,8 @@ GPUDevice* GPUDevice::Instance = nullptr;
|
||||
|
||||
void GPUDevice::OnRequestingExit()
|
||||
{
|
||||
if (Engine::FatalError != FatalErrorType::GPUCrash &&
|
||||
Engine::FatalError != FatalErrorType::GPUHang &&
|
||||
if (Engine::FatalError != FatalErrorType::GPUCrash &&
|
||||
Engine::FatalError != FatalErrorType::GPUHang &&
|
||||
Engine::FatalError != FatalErrorType::GPUOutOfMemory)
|
||||
return;
|
||||
OnCrash();
|
||||
@@ -509,6 +509,7 @@ void GPUDevice::DumpResourcesToLog() const
|
||||
|
||||
const bool printTypes[(int32)GPUResourceType::MAX] =
|
||||
{
|
||||
// @formatter:off
|
||||
true, // RenderTarget
|
||||
true, // Texture
|
||||
true, // CubeTexture
|
||||
@@ -519,6 +520,7 @@ void GPUDevice::DumpResourcesToLog() const
|
||||
false, // Descriptor
|
||||
false, // Query
|
||||
false, // Sampler
|
||||
// @formatter:on
|
||||
};
|
||||
for (int32 typeIndex = 0; typeIndex < (int32)GPUResourceType::MAX; typeIndex++)
|
||||
{
|
||||
@@ -672,6 +674,12 @@ void GPUDevice::DrawBegin()
|
||||
|
||||
// Clear stats
|
||||
RenderTask::TasksDoneLastFrame = 0;
|
||||
|
||||
#if COMPILE_WITH_PROFILER
|
||||
// External profiler
|
||||
if (ProfilerGPU::FrameBegin)
|
||||
ProfilerGPU::FrameBegin();
|
||||
#endif
|
||||
}
|
||||
|
||||
void GPUDevice::DrawEnd()
|
||||
@@ -748,6 +756,12 @@ void GPUDevice::DrawEnd()
|
||||
_wasVSyncUsed = anyVSync;
|
||||
_isRendering = false;
|
||||
|
||||
#if COMPILE_WITH_PROFILER
|
||||
// External profiler
|
||||
if (ProfilerGPU::FrameEnd)
|
||||
ProfilerGPU::FrameEnd();
|
||||
#endif
|
||||
|
||||
RenderTargetPool::Flush();
|
||||
}
|
||||
|
||||
@@ -831,6 +845,11 @@ void GPUDevice::Dispose()
|
||||
VideoOutputModes.Resize(0);
|
||||
}
|
||||
|
||||
GPUDevice::QueueInfo GPUDevice::GetNativeQueue() const
|
||||
{
|
||||
return QueueInfo();
|
||||
}
|
||||
|
||||
uint64 GPUDevice::GetMemoryUsage() const
|
||||
{
|
||||
uint64 result = 0;
|
||||
|
||||
@@ -142,6 +142,17 @@ public:
|
||||
API_FIELD() int32 VideoOutputIndex;
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Collection of internal graphics API command queue pointers. Depend on the platform.
|
||||
/// </summary>
|
||||
struct QueueInfo
|
||||
{
|
||||
// The main graphics command queue (ID3D12CommandQueue* or VkQueue as void*). Null if not applicable.
|
||||
void* MainQueue = nullptr;
|
||||
// Vulkan queue family index for MainQueue. Zero for non-Vulkan backends.
|
||||
uint32 MainQueueFamilyIndex = 0;
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// The singleton instance of the graphics device.
|
||||
/// </summary>
|
||||
@@ -303,6 +314,11 @@ public:
|
||||
/// </summary>
|
||||
API_PROPERTY() virtual void* GetNativePtr() const = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the native command queue info of the underlying graphics device. It contains a low-level platform-specific data.
|
||||
/// </summary>
|
||||
virtual QueueInfo GetNativeQueue() const;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the amount of memory usage by all the GPU resources (in bytes). Returned value is estimated based on resources created by the engine and might not be accurate. Use GPUMemoryStats for more detailed memory budget usage.
|
||||
/// </summary>
|
||||
|
||||
@@ -586,6 +586,11 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
|
||||
{
|
||||
}
|
||||
|
||||
GPUDevice::QueueInfo GPUDeviceDX12::GetNativeQueue() const
|
||||
{
|
||||
return { _commandQueue->GetCommandQueue() };
|
||||
}
|
||||
|
||||
GPUMemoryStats GPUDeviceDX12::GetMemoryStats()
|
||||
{
|
||||
GPUMemoryStats stats;
|
||||
|
||||
@@ -201,6 +201,7 @@ public:
|
||||
{
|
||||
return _device;
|
||||
}
|
||||
QueueInfo GetNativeQueue() const override;
|
||||
GPUMemoryStats GetMemoryStats() override;
|
||||
bool Init() override;
|
||||
void DrawBegin() override;
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#define VULKAN_HASH_POOLS_WITH_LAYOUT_TYPES 1
|
||||
#define VULKAN_USE_DEBUG_LAYER GPU_ENABLE_DEBUG_LAYER
|
||||
#define VULKAN_USE_DEBUG_DATA (GPU_ENABLE_DEBUG_LAYER && COMPILE_WITH_DEV_ENV)
|
||||
#define VULKAN_USE_PERF_SDK (!BUILD_RELEASE && PLATFORM_WINDOWS)
|
||||
|
||||
#ifndef VULKAN_USE_PIPELINE_CACHE
|
||||
#define VULKAN_USE_PIPELINE_CACHE 1
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
// Copyright (c) Wojciech Figat. All rights reserved.
|
||||
|
||||
#include "GPUDeviceVulkan.h"
|
||||
#include "GPUAdapterVulkan.h"
|
||||
#include "RenderToolsVulkan.h"
|
||||
#include "Config.h"
|
||||
#include "Engine/Core/Log.h"
|
||||
#include "Engine/Core/Collections/ArrayExtensions.h"
|
||||
#include "Engine/Core/Collections/Sorting.h"
|
||||
#include "Engine/Engine/CommandLine.h"
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
#include "Engine/Platform/FileSystem.h"
|
||||
#endif
|
||||
|
||||
#if GRAPHICS_API_VULKAN
|
||||
|
||||
@@ -216,6 +219,186 @@ static bool ListContains(const Array<StringAnsi>& list, const char* name)
|
||||
return false;
|
||||
}
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef int NVPA_Status;
|
||||
typedef unsigned char NVPA_Bool;
|
||||
|
||||
struct NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params
|
||||
{
|
||||
size_t structSize;
|
||||
void* pPriv;
|
||||
const char* const* ppInstanceExtensionNames;
|
||||
size_t numInstanceExtensionNames;
|
||||
uint32 apiVersion;
|
||||
NVPA_Bool isOfficiallySupportedVersion;
|
||||
};
|
||||
|
||||
struct NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params
|
||||
{
|
||||
size_t structSize;
|
||||
void* pPriv;
|
||||
const char* const* ppDeviceExtensionNames;
|
||||
size_t numDeviceExtensionNames;
|
||||
uint32 apiVersion;
|
||||
NVPA_Bool isOfficiallySupportedVersion;
|
||||
VkInstance instance;
|
||||
VkPhysicalDevice physicalDevice;
|
||||
void* pfnGetInstanceProcAddr;
|
||||
};
|
||||
|
||||
struct NVPW_InitializeHost_Params
|
||||
{
|
||||
size_t structSize;
|
||||
void* pPriv;
|
||||
};
|
||||
|
||||
typedef NVPA_Status(*PFN_NVPW_VK_Profiler_GetRequiredInstanceExtensions)(NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params* pParams);
|
||||
typedef NVPA_Status(*PFN_NVPW_VK_Profiler_GetRequiredDeviceExtensions)(NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params* pParams);
|
||||
typedef NVPA_Status(*PFN_NVPW_InitializeHost)(NVPW_InitializeHost_Params* pParams);
|
||||
|
||||
struct NvPerfHostApi
|
||||
{
|
||||
String Path;
|
||||
void* Module = nullptr;
|
||||
PFN_NVPW_VK_Profiler_GetRequiredInstanceExtensions GetRequiredInstanceExtensions = nullptr;
|
||||
PFN_NVPW_VK_Profiler_GetRequiredDeviceExtensions GetRequiredDeviceExtensions = nullptr;
|
||||
};
|
||||
|
||||
NvPerfHostApi PerfSDK;
|
||||
}
|
||||
|
||||
bool GPUDeviceVulkan::UsePerfSDK = false;
|
||||
|
||||
void GPUDeviceVulkan::PerfSDKInit()
|
||||
{
|
||||
// Check if PerfSDK path has been set and is correct
|
||||
if (Platform::GetEnvironmentVariable(TEXT("NVPERF_SDK_PATH"), PerfSDK.Path) || PerfSDK.Path.IsEmpty() || !FileSystem::DirectoryExists(PerfSDK.Path))
|
||||
return;
|
||||
|
||||
// The Nsight Perf SDK host library is named 'nvperf_grfx_host.dll' (see nvperf_host_impl.h). Older/other
|
||||
// packages used 'nvperf.dll'. Try the modern name first, then fall back, so the engine can query the
|
||||
// Range Profiler's REQUIRED Vulkan instance/device extensions at device creation. Without these the
|
||||
// VkDevice is created with only the mini-trace fallback extensions and NVPW_VK_Profiler_Queue_BeginSession
|
||||
// fails with NVPA_STATUS_INVALID_CONTEXT_STATE.
|
||||
const String binDir = PerfSDK.Path / TEXT("NvPerf") / TEXT("bin") / TEXT("x64");
|
||||
const Char* dllNames[] = { TEXT("nvperf_grfx_host.dll"), TEXT("nvperf.dll") };
|
||||
String loadedPath;
|
||||
for (const Char* dllName : dllNames)
|
||||
{
|
||||
const String dllPath = binDir / dllName;
|
||||
PerfSDK.Module = Platform::LoadLibrary(dllPath.Get());
|
||||
if (PerfSDK.Module)
|
||||
{
|
||||
loadedPath = dllPath;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!PerfSDK.Module)
|
||||
{
|
||||
LOG(Warning, "Nsight PerfSDK: failed to load NvPerf host library from '{0}' (tried nvperf_grfx_host.dll and nvperf.dll)", binDir);
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize host library
|
||||
auto initializeHost = (PFN_NVPW_InitializeHost)Platform::GetProcAddress(PerfSDK.Module, "NVPW_InitializeHost");
|
||||
if (initializeHost)
|
||||
{
|
||||
NVPW_InitializeHost_Params initParams = {};
|
||||
initParams.structSize = sizeof(NVPW_InitializeHost_Params);
|
||||
const NVPA_Status initStatus = initializeHost(&initParams);
|
||||
if (initStatus != 0)
|
||||
{
|
||||
LOG(Warning, "Nsight PerfSDK: NVPW_InitializeHost failed (status={0})", (int32)initStatus);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(Warning, "Nsight PerfSDK: NVPW_InitializeHost not found in host library");
|
||||
return;
|
||||
}
|
||||
PerfSDK.GetRequiredInstanceExtensions = (PFN_NVPW_VK_Profiler_GetRequiredInstanceExtensions)Platform::GetProcAddress(PerfSDK.Module, "NVPW_VK_Profiler_GetRequiredInstanceExtensions");
|
||||
PerfSDK.GetRequiredDeviceExtensions = (PFN_NVPW_VK_Profiler_GetRequiredDeviceExtensions)Platform::GetProcAddress(PerfSDK.Module, "NVPW_VK_Profiler_GetRequiredDeviceExtensions");
|
||||
LOG(Info, "Nsight PerfSDK: loaded NvPerf host library '{0}' (GetRequiredInstanceExtensions={1}, GetRequiredDeviceExtensions={2}).", loadedPath, PerfSDK.GetRequiredInstanceExtensions ? 1 : 0, PerfSDK.GetRequiredDeviceExtensions ? 1 : 0);
|
||||
|
||||
// Inform PerfSDK plugin that the Vulkan backend has been setup to use it
|
||||
Platform::SetEnvironmentVariable(TEXT("VULKAN_USE_PERF_SDK"), TEXT("*"));
|
||||
UsePerfSDK = true;
|
||||
|
||||
// The Vulkan loader reads VK_LOADER_LAYERS_DISABLE / VK_LOADER_DRIVERS_DISABLE exactly once, when it
|
||||
// initializes its global config (the first call into vulkan-1.dll, i.e. volkInitialize). Setting them later
|
||||
// (e.g. during instance-extension enumeration) is silently ignored. NvPerf's Vulkan Range Profiler is not
|
||||
// compatible with object-wrapping layers, so the implicit capture/overlay/Optimus layers must be gone from
|
||||
// the device dispatch chain before the loader caches its config, otherwise BeginSession fails with
|
||||
// NVPA_STATUS_INVALID_CONTEXT_STATE. Likewise the non-NVIDIA ICD must be dropped so there is no multi-ICD
|
||||
// trampoline. Always force these values when PerfSDK is active: a partial user-provided
|
||||
// VK_LOADER_LAYERS_DISABLE (e.g. ~implicit~) still leaves validation/capture wrappers and breaks BeginSession.
|
||||
String existingLayersDisable;
|
||||
Platform::GetEnvironmentVariable(TEXT("VK_LOADER_LAYERS_DISABLE"), existingLayersDisable);
|
||||
if (!existingLayersDisable.IsEmpty() && existingLayersDisable != TEXT("*"))
|
||||
LOG(Warning, "Nsight PerfSDK: overriding VK_LOADER_LAYERS_DISABLE='{0}' with '*' for Range Profiler.", existingLayersDisable);
|
||||
Platform::SetEnvironmentVariable(TEXT("VK_LOADER_LAYERS_DISABLE"), TEXT("*"));
|
||||
#if PLATFORM_WINDOWS
|
||||
_wputenv_s(TEXT("VK_LOADER_LAYERS_DISABLE"), TEXT("*"));
|
||||
#endif
|
||||
LOG(Info, "Nsight PerfSDK: disabled all Vulkan layers before loader init (VK_LOADER_LAYERS_DISABLE=*).");
|
||||
|
||||
// Intel's ICD manifest is igvk64.json (does NOT contain "intel" in the filename), so *intel* alone is not enough.
|
||||
const Char* disablePatterns = TEXT("*igvk*,*igd*,*intel*,*Intel*,*amd*,*amdvlk*,*radeon*");
|
||||
String existingDriversDisable;
|
||||
Platform::GetEnvironmentVariable(TEXT("VK_LOADER_DRIVERS_DISABLE"), existingDriversDisable);
|
||||
if (!existingDriversDisable.IsEmpty() && existingDriversDisable != disablePatterns)
|
||||
LOG(Warning, "Nsight PerfSDK: overriding VK_LOADER_DRIVERS_DISABLE for Range Profiler.");
|
||||
Platform::SetEnvironmentVariable(TEXT("VK_LOADER_DRIVERS_DISABLE"), disablePatterns);
|
||||
#if PLATFORM_WINDOWS
|
||||
_wputenv_s(TEXT("VK_LOADER_DRIVERS_DISABLE"), disablePatterns);
|
||||
#endif
|
||||
LOG(Info, "Nsight PerfSDK: disabled non-NVIDIA Vulkan ICDs before loader init (VK_LOADER_DRIVERS_DISABLE={0}).", String(disablePatterns));
|
||||
}
|
||||
|
||||
void GPUDeviceVulkan::PerfSDKInitDeviceInfo(VkDeviceCreateInfo& deviceInfo, const Array<const char*>& deviceExtensions, VkPhysicalDeviceFeatures2* enabledFeatures2)
|
||||
{
|
||||
// Chains required feature structs onto VkDeviceCreateInfo for enabled PerfSDK extensions
|
||||
const uint32 deviceApiMajorMinor = VK_MAKE_VERSION(VK_VERSION_MAJOR(Adapter->GpuProps.apiVersion), VK_VERSION_MINOR(Adapter->GpuProps.apiVersion), 0);
|
||||
if (deviceApiMajorMinor >= VK_API_VERSION_1_2)
|
||||
{
|
||||
void* next = const_cast<void*>(deviceInfo.pNext);
|
||||
PhysicalDeviceFeatures12.bufferDeviceAddress = VK_TRUE;
|
||||
PhysicalDeviceFeatures12.pNext = next;
|
||||
|
||||
if (enabledFeatures2)
|
||||
{
|
||||
enabledFeatures2->pNext = &PhysicalDeviceFeatures12;
|
||||
deviceInfo.pNext = enabledFeatures2;
|
||||
deviceInfo.pEnabledFeatures = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
deviceInfo.pNext = &PhysicalDeviceFeatures12;
|
||||
}
|
||||
|
||||
LOG(Info, "Nsight PerfSDK: enabled Vulkan bufferDeviceAddress feature.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ListContains(deviceExtensions, "VK_EXT_buffer_device_address"))
|
||||
{
|
||||
static VkPhysicalDeviceBufferDeviceAddressFeaturesEXT bufferDeviceAddressFeatures;
|
||||
void* next = const_cast<void*>(deviceInfo.pNext);
|
||||
bufferDeviceAddressFeatures = {};
|
||||
bufferDeviceAddressFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT;
|
||||
bufferDeviceAddressFeatures.bufferDeviceAddress = VK_TRUE;
|
||||
bufferDeviceAddressFeatures.pNext = next;
|
||||
deviceInfo.pNext = &bufferDeviceAddressFeatures;
|
||||
LOG(Info, "Nsight PerfSDK: enabled VK_EXT_buffer_device_address feature.");
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void GPUDeviceVulkan::GetInstanceLayersAndExtensions(Array<const char*>& outInstanceExtensions, Array<const char*>& outInstanceLayers, bool& outDebugUtils, bool useDebugLayer)
|
||||
{
|
||||
VkResult result;
|
||||
@@ -405,6 +588,29 @@ void GPUDeviceVulkan::GetInstanceLayersAndExtensions(Array<const char*>& outInst
|
||||
}
|
||||
}
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
if (UsePerfSDK && PerfSDK.GetRequiredInstanceExtensions)
|
||||
{
|
||||
NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params params = {};
|
||||
params.structSize = sizeof(NVPW_VK_Profiler_GetRequiredInstanceExtensions_Params);
|
||||
params.apiVersion = VULKAN_API_VERSION;
|
||||
const NVPA_Status status = PerfSDK.GetRequiredInstanceExtensions(¶ms);
|
||||
if (status == 0)
|
||||
{
|
||||
for (size_t i = 0; i < params.numInstanceExtensionNames; i++)
|
||||
{
|
||||
const char* name = params.ppInstanceExtensionNames[i];
|
||||
const bool available = ListContains(foundUniqueExtensions, name);
|
||||
LOG(Info, "Nsight PerfSDK: Range Profiler requires instance extension '{0}' (available={1}).", String(name), available ? 1 : 0);
|
||||
if (available)
|
||||
outInstanceExtensions.Add(name);
|
||||
}
|
||||
}
|
||||
else
|
||||
LOG(Warning, "Nsight PerfSDK: NVPW_VK_Profiler_GetRequiredInstanceExtensions failed (status={0}); instance will be missing Range Profiler extensions.", (int32)status);
|
||||
}
|
||||
#endif
|
||||
|
||||
TrimDuplicates(outInstanceLayers);
|
||||
if (outInstanceLayers.HasItems())
|
||||
{
|
||||
@@ -524,6 +730,32 @@ void GPUDeviceVulkan::GetDeviceExtensions(VkPhysicalDevice gpu, Array<const char
|
||||
}
|
||||
}
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
if (UsePerfSDK && PerfSDK.GetRequiredDeviceExtensions && Adapter->IsNVIDIA())
|
||||
{
|
||||
NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params params = {};
|
||||
params.structSize = sizeof(NVPW_VK_Profiler_GetRequiredDeviceExtensions_Params);
|
||||
params.apiVersion = Adapter->GpuProps.apiVersion;
|
||||
params.instance = Instance;
|
||||
params.physicalDevice = gpu;
|
||||
params.pfnGetInstanceProcAddr = (void*)vkGetInstanceProcAddr;
|
||||
const NVPA_Status status = PerfSDK.GetRequiredDeviceExtensions(¶ms);
|
||||
if (status == 0)
|
||||
{
|
||||
for (size_t i = 0; i < params.numDeviceExtensionNames; i++)
|
||||
{
|
||||
const char* name = params.ppDeviceExtensionNames[i];
|
||||
const bool available = ListContains(availableExtensions, name);
|
||||
LOG(Info, "Nsight PerfSDK: Range Profiler requires device extension '{0}' (available={1}).", String(name), available ? 1 : 0);
|
||||
if (available)
|
||||
outDeviceExtensions.Add(name);
|
||||
}
|
||||
}
|
||||
else
|
||||
LOG(Warning, "Nsight PerfSDK: NVPW_VK_Profiler_GetRequiredDeviceExtensions failed (status={0}); device will be missing Range Profiler extensions.", (int32)status);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (outDeviceExtensions.HasItems())
|
||||
{
|
||||
LOG(Info, "Using device extensions:");
|
||||
|
||||
@@ -1021,6 +1021,11 @@ GPUDevice* GPUDeviceVulkan::Create()
|
||||
|
||||
VkResult result;
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
// Configure the Vulkan loader for NVIDIA PerfSDK before the loader initializes its global config
|
||||
PerfSDKInit();
|
||||
#endif
|
||||
|
||||
#if !PLATFORM_SWITCH
|
||||
// Initialize bindings
|
||||
result = volkInitialize();
|
||||
@@ -1561,12 +1566,22 @@ GPUAdapter* GPUDeviceVulkan::GetAdapter() const
|
||||
|
||||
void* GPUDeviceVulkan::GetNativePtr() const
|
||||
{
|
||||
// Return both Instance and Device as pointer to void*[2]
|
||||
// Returns pointer to array of pointers with multiple device handles used by external tools and plugins:
|
||||
_nativePtr[0] = (void*)Instance;
|
||||
_nativePtr[1] = (void*)Device;
|
||||
_nativePtr[2] = (void*)vkGetInstanceProcAddr;
|
||||
_nativePtr[3] = (void*)vkGetDeviceProcAddr;
|
||||
return _nativePtr;
|
||||
}
|
||||
|
||||
GPUDevice::QueueInfo GPUDeviceVulkan::GetNativeQueue() const
|
||||
{
|
||||
QueueInfo result;
|
||||
if (GraphicsQueue)
|
||||
result = { (void*)GraphicsQueue->GetHandle(), GraphicsQueue->GetFamilyIndex() };
|
||||
return result;
|
||||
}
|
||||
|
||||
GPUMemoryStats GPUDeviceVulkan::GetMemoryStats()
|
||||
{
|
||||
GPUMemoryStats stats;
|
||||
@@ -1732,6 +1747,18 @@ bool GPUDeviceVulkan::Init()
|
||||
VulkanPlatform::RestrictEnabledPhysicalDeviceFeatures(PhysicalDeviceFeatures, enabledFeatures);
|
||||
deviceInfo.pEnabledFeatures = &enabledFeatures;
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
VkPhysicalDeviceFeatures2 enabledFeatures2 = {};
|
||||
VkPhysicalDeviceFeatures2* enabledFeatures2Ptr = nullptr;
|
||||
if (Adapter && UsePerfSDK)
|
||||
{
|
||||
RenderToolsVulkan::ZeroStruct(enabledFeatures2, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2);
|
||||
enabledFeatures2.features = enabledFeatures;
|
||||
enabledFeatures2Ptr = &enabledFeatures2;
|
||||
deviceInfo.pEnabledFeatures = nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if VULKAN_USE_TRACY_GPU && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset
|
||||
VkPhysicalDeviceHostQueryResetFeatures resetFeatures;
|
||||
if (PhysicalDeviceFeatures12.hostQueryReset)
|
||||
@@ -1742,8 +1769,14 @@ bool GPUDeviceVulkan::Init()
|
||||
}
|
||||
#endif
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
if (UsePerfSDK)
|
||||
PerfSDKInitDeviceInfo(deviceInfo, deviceExtensions, enabledFeatures2Ptr);
|
||||
#endif
|
||||
|
||||
// Create the device
|
||||
VALIDATE_VULKAN_RESULT(vkCreateDevice(gpu, &deviceInfo, nullptr, &Device));
|
||||
PhysicalDeviceFeatures12.pNext = nullptr;
|
||||
|
||||
#if !PLATFORM_SWITCH
|
||||
// Optimize bindings
|
||||
|
||||
@@ -359,7 +359,7 @@ class GPUDeviceVulkan : public GPUDevice
|
||||
|
||||
private:
|
||||
CriticalSection _fenceLock;
|
||||
mutable void* _nativePtr[2];
|
||||
mutable void* _nativePtr[4];
|
||||
|
||||
Dictionary<RenderTargetLayoutVulkan, RenderPassVulkan*> _renderPasses;
|
||||
Dictionary<FramebufferVulkan::Key, FramebufferVulkan*> _framebuffers;
|
||||
@@ -531,6 +531,13 @@ public:
|
||||
PipelineLayoutVulkan* GetOrCreateLayout(DescriptorSetLayoutInfoVulkan& key);
|
||||
void OnImageViewDestroy(VkImageView imageView);
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
// NVIDIA Perf SDK integration for usage in plugin
|
||||
static bool UsePerfSDK;
|
||||
static void PerfSDKInit();
|
||||
void PerfSDKInitDeviceInfo(VkDeviceCreateInfo& deviceInfo, const Array<const char*>& deviceExtensions, VkPhysicalDeviceFeatures2* enabledFeatures2);
|
||||
#endif
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
/// Setups the present queue to be ready for the given window surface.
|
||||
@@ -569,6 +576,7 @@ public:
|
||||
GPUContext* GetMainContext() override;
|
||||
GPUAdapter* GetAdapter() const override;
|
||||
void* GetNativePtr() const override;
|
||||
QueueInfo GetNativeQueue() const override;
|
||||
GPUMemoryStats GetMemoryStats() override;
|
||||
bool Init() override;
|
||||
void DrawBegin() override;
|
||||
|
||||
@@ -384,7 +384,7 @@ bool GPUSwapChainVulkan::CreateSwapChain(int32 width, int32 height)
|
||||
if (Platform::UsesWayland())
|
||||
backbuffersCount = Math::Max<uint32_t>(backbuffersCount, 3);
|
||||
#endif
|
||||
|
||||
|
||||
ASSERT(surfProperties.minImageCount <= VULKAN_BACK_BUFFERS_COUNT_MAX);
|
||||
VkSwapchainCreateInfoKHR swapChainInfo;
|
||||
RenderToolsVulkan::ZeroStruct(swapChainInfo, VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR);
|
||||
@@ -420,11 +420,11 @@ bool GPUSwapChainVulkan::CreateSwapChain(int32 width, int32 height)
|
||||
VALIDATE_VULKAN_RESULT(vkGetPhysicalDeviceSurfaceSupportKHR(gpu, _device->PresentQueue->GetFamilyIndex(), _surface, &supportsPresent));
|
||||
ASSERT(supportsPresent);
|
||||
#if PLATFORM_IOS
|
||||
Function<void()> func = [this, &device, &swapChainInfo]()
|
||||
{
|
||||
Function<void()> func = [this, &device, &swapChainInfo]()
|
||||
{
|
||||
VALIDATE_VULKAN_RESULT(vkCreateSwapchainKHR(device, &swapChainInfo, nullptr, &_swapChain));
|
||||
};
|
||||
iOSPlatform::RunOnUIThread(func, true);
|
||||
};
|
||||
iOSPlatform::RunOnUIThread(func, true);
|
||||
#else
|
||||
VALIDATE_VULKAN_RESULT(vkCreateSwapchainKHR(device, &swapChainInfo, nullptr, &_swapChain));
|
||||
#endif
|
||||
@@ -483,6 +483,12 @@ GPUSwapChainVulkan::Status GPUSwapChainVulkan::Present(QueueVulkan* presentQueue
|
||||
presentInfo.pSwapchains = &_swapChain;
|
||||
presentInfo.pImageIndices = (uint32*)&_currentImageIndex;
|
||||
|
||||
#if VULKAN_USE_PERF_SDK
|
||||
// NvPerf Vulkan profiling workaround: drain the present queue before presenting so NvPerf's per-queue tracking stays in the state RangeProfiler BeginSession requires
|
||||
if (GPUDeviceVulkan::UsePerfSDK)
|
||||
vkQueueWaitIdle(presentQueue->GetHandle());
|
||||
#endif
|
||||
|
||||
const VkResult presentResult = vkQueuePresentKHR(presentQueue->GetHandle(), &presentInfo);
|
||||
if (presentResult == VK_ERROR_OUT_OF_DATE_KHR)
|
||||
{
|
||||
|
||||
@@ -14,7 +14,12 @@
|
||||
|
||||
RenderStatsData RenderStatsData::Counter;
|
||||
|
||||
int32 ProfilerGPU::_depth = 0;
|
||||
namespace
|
||||
{
|
||||
int32 Depth = 0;
|
||||
int32 ExternalCounter = 0;
|
||||
}
|
||||
|
||||
bool ProfilerGPU::Enabled = false;
|
||||
#if GPU_AUTO_PROFILE_EVENTS
|
||||
bool ProfilerGPU::EventsEnabled = true;
|
||||
@@ -23,6 +28,10 @@ bool ProfilerGPU::EventsEnabled = false;
|
||||
#endif
|
||||
int32 ProfilerGPU::CurrentBuffer = 0;
|
||||
ProfilerGPU::EventBuffer ProfilerGPU::Buffers[PROFILER_GPU_EVENTS_FRAMES];
|
||||
ProfilerGPU::EventBeginDelegate ProfilerGPU::EventBegin = nullptr;
|
||||
ProfilerGPU::EventEndDelegate ProfilerGPU::EventEnd = nullptr;
|
||||
ProfilerGPU::FrameDelegate ProfilerGPU::FrameBegin = nullptr;
|
||||
ProfilerGPU::FrameDelegate ProfilerGPU::FrameEnd = nullptr;
|
||||
|
||||
bool ProfilerGPU::EventBuffer::HasData() const
|
||||
{
|
||||
@@ -99,6 +108,15 @@ int32 ProfilerGPU::BeginEvent(const Char* name)
|
||||
if (EventsEnabled)
|
||||
context->EventBegin(name);
|
||||
#endif
|
||||
|
||||
const int32 depth = Depth++;
|
||||
|
||||
if (EventBegin)
|
||||
{
|
||||
// External event
|
||||
EventBegin(name, depth, ExternalCounter++, context);
|
||||
}
|
||||
|
||||
if (!Enabled)
|
||||
return -1;
|
||||
|
||||
@@ -106,7 +124,7 @@ int32 ProfilerGPU::BeginEvent(const Char* name)
|
||||
e.Name = name;
|
||||
e.Stats = RenderStatsData::Counter;
|
||||
e.Query = context->BeginQuery(GPUQueryType::Timer);
|
||||
e.Depth = _depth++;
|
||||
e.Depth = depth;
|
||||
e.QueryActive = true;
|
||||
|
||||
auto& buffer = Buffers[CurrentBuffer];
|
||||
@@ -121,9 +139,17 @@ void ProfilerGPU::EndEvent(int32 index)
|
||||
if (EventsEnabled)
|
||||
context->EventEnd();
|
||||
#endif
|
||||
|
||||
Depth--;
|
||||
|
||||
if (EventEnd)
|
||||
{
|
||||
// External event
|
||||
EventEnd(--ExternalCounter, context);
|
||||
}
|
||||
|
||||
if (index == -1)
|
||||
return;
|
||||
_depth--;
|
||||
|
||||
auto& buffer = Buffers[CurrentBuffer];
|
||||
auto e = buffer.Get(index);
|
||||
@@ -136,7 +162,8 @@ void ProfilerGPU::BeginFrame()
|
||||
{
|
||||
// Clear stats
|
||||
RenderStatsData::Counter = RenderStatsData();
|
||||
_depth = 0;
|
||||
Depth = 0;
|
||||
ExternalCounter = 0;
|
||||
auto& buffer = Buffers[CurrentBuffer];
|
||||
buffer.FrameIndex = Engine::FrameCount;
|
||||
buffer.PresentTime = 0.0f;
|
||||
@@ -163,7 +190,7 @@ void ProfilerGPU::OnPresentTime(float time)
|
||||
|
||||
void ProfilerGPU::EndFrame()
|
||||
{
|
||||
if (_depth)
|
||||
if (Depth != 0)
|
||||
{
|
||||
LOG(Warning, "GPU Profiler events start/end mismatch");
|
||||
}
|
||||
|
||||
@@ -123,9 +123,6 @@ public:
|
||||
void Clear();
|
||||
};
|
||||
|
||||
private:
|
||||
static int32 _depth;
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
/// True if GPU profiling is enabled, otherwise false to disable events collecting and GPU timer queries usage. Can be changed during rendering.
|
||||
@@ -147,6 +144,18 @@ public:
|
||||
/// </summary>
|
||||
static EventBuffer Buffers[PROFILER_GPU_EVENTS_FRAMES];
|
||||
|
||||
public:
|
||||
// Callbacks used by the profiler to notify about GPU events. These can be used to integrate with external profilers like NVIDIA Nsight PerfSDK.
|
||||
typedef void (*EventBeginDelegate)(const Char* name, int32 depth, int32 index, class GPUContext* context);
|
||||
typedef void (*EventEndDelegate)(int32 index, class GPUContext* context);
|
||||
static EventBeginDelegate EventBegin;
|
||||
static EventEndDelegate EventEnd;
|
||||
|
||||
// Callbacks used by the profiler to notify about GPU frame begin (before commands recording) and end (after submit/present). These can be used to integrate with external profilers like NVIDIA Nsight PerfSDK.
|
||||
typedef void (*FrameDelegate)();
|
||||
static FrameDelegate FrameBegin;
|
||||
static FrameDelegate FrameEnd;
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
/// Begins the event. Call EndEvent with index parameter equal to the returned value by BeginEvent function.
|
||||
@@ -174,7 +183,7 @@ public:
|
||||
/// Profiles next frame(s) rendering performance and dumps the results to the log (as a hierarchy structure). When using more than 1 frame, the results are averaged for more accurate profiling (especially for A/B testing).
|
||||
/// </summary>
|
||||
/// <param name="frames">Amount of frames to profile for more stable results (event durations are averaged). Value 0 uses default of 4 frames.</param>
|
||||
API_FUNCTION(Attributes = "DebugCommand") static void Dump(int32 frames = 4);
|
||||
API_FUNCTION(Attributes="DebugCommand") static void Dump(int32 frames = 4);
|
||||
|
||||
private:
|
||||
static void BeginFrame();
|
||||
|
||||
Reference in New Issue
Block a user