summaryrefslogtreecommitdiffstats
path: root/external
diff options
context:
space:
mode:
authorLaG1924 <12997935+LaG1924@users.noreply.github.com>2019-04-30 13:12:35 +0200
committerLaG1924 <12997935+LaG1924@users.noreply.github.com>2019-04-30 13:12:35 +0200
commit868ba6279a20e4d1412c2d576c67400167de6694 (patch)
treea7090b2da96987c4c532c5bedf812df20f604964 /external
parentSecond iteration of changing to single-threaded model (diff)
downloadAltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar
AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.gz
AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.bz2
AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.lz
AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.xz
AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.tar.zst
AltCraft-868ba6279a20e4d1412c2d576c67400167de6694.zip
Diffstat (limited to 'external')
-rw-r--r--external/CMakeLists.txt5
-rw-r--r--external/optick/optick.config.h51
-rw-r--r--external/optick/optick.h872
-rw-r--r--external/optick/optick_common.h142
-rw-r--r--external/optick/optick_core.cpp1657
-rw-r--r--external/optick/optick_core.h568
-rw-r--r--external/optick/optick_core.linux.h410
-rw-r--r--external/optick/optick_core.macos.h289
-rw-r--r--external/optick/optick_core.platform.h92
-rw-r--r--external/optick/optick_core.win.h1664
-rw-r--r--external/optick/optick_gpu.cpp136
-rw-r--r--external/optick/optick_gpu.d3d12.cpp382
-rw-r--r--external/optick/optick_gpu.h129
-rw-r--r--external/optick/optick_gpu.vulkan.cpp365
-rw-r--r--external/optick/optick_memory.h419
-rw-r--r--external/optick/optick_message.cpp172
-rw-r--r--external/optick/optick_message.h130
-rw-r--r--external/optick/optick_serialization.cpp178
-rw-r--r--external/optick/optick_serialization.h120
-rw-r--r--external/optick/optick_server.cpp338
-rw-r--r--external/optick/optick_server.h42
21 files changed, 8160 insertions, 1 deletions
diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt
index d370f1a..b3cc884 100644
--- a/external/CMakeLists.txt
+++ b/external/CMakeLists.txt
@@ -7,4 +7,7 @@ file(GLOB_RECURSE HEADERS "./include/*")
include_directories(./include/)
-add_library(deps STATIC ${SOURCES} ${HEADERS}) \ No newline at end of file
+add_library(deps STATIC ${SOURCES} ${HEADERS})
+
+file(GLOB OPTICK_SRC "./optick/*.cpp")
+add_library(optick STATIC ${OPTICK_SRC}) \ No newline at end of file
diff --git a/external/optick/optick.config.h b/external/optick/optick.config.h
new file mode 100644
index 0000000..dcc6e98
--- /dev/null
+++ b/external/optick/optick.config.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#define OPTICK_ENABLE_GPU_D3D12 false
+#define OPTICK_ENABLE_GPU_VULKAN false
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// GLOBAL SETTINGS
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// [x] USE_OPTICK - (Master Switch)
+// [x] OPTICK_ENABLE_TRACING - (Enable Kernel-level tracing)
+// [x] OPTICK_ENABLE_GPU_D3D12 - (GPU D3D12)
+// [ ] OPTICK_ENABLE_GPU_VULKAN - (GPU VULKAN)
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// MASTER SWITCH - use it for disabling profiler in final builds //
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if !defined(USE_OPTICK)
+#define USE_OPTICK (1)
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Enable Low-level platform-specific tracing (Switch Contexts, Autosampling, etc.)
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if !defined(OPTICK_ENABLE_TRACING)
+#define OPTICK_ENABLE_TRACING (USE_OPTICK /*&& 0*/)
+#endif //OPTICK_ENABLE_TRACING
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// GPU Counters
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if !defined(OPTICK_ENABLE_GPU)
+#define OPTICK_ENABLE_GPU (USE_OPTICK /*&& 0*/)
+#endif //OPTICK_ENABLE_GPU
+
+// D3D12
+#if !defined(OPTICK_ENABLE_GPU_D3D12)
+#if defined(_MSC_VER)
+#define OPTICK_ENABLE_GPU_D3D12 (OPTICK_ENABLE_GPU /*&& 0*/)
+#else
+#define OPTICK_ENABLE_GPU_D3D12 (0)
+#endif
+#endif
+
+// VUKLAN
+#if !defined(OPTICK_ENABLE_GPU_VULKAN)
+#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU && 0)
+#endif
+
diff --git a/external/optick/optick.h b/external/optick/optick.h
new file mode 100644
index 0000000..e3eb512
--- /dev/null
+++ b/external/optick/optick.h
@@ -0,0 +1,872 @@
+#pragma once
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Config
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#include "optick.config.h"
+
+#if USE_OPTICK
+#include <stdint.h>
+
+#if defined(__clang__) || defined(__GNUC__)
+# define OPTICK_GCC (1)
+# if defined(__APPLE_CC__)
+# define OPTICK_OSX (1)
+# elif defined(__linux__)
+# define OPTICK_LINUX (1)
+# elif defined(__ORBIS__)
+# define OPTICK_PS4 (1)
+# endif
+#elif defined(_MSC_VER)
+# define OPTICK_MSVC (1)
+# if defined(_DURANGO)
+# define OPTICK_XBOX (1)
+# else
+# define OPTICK_PC (1)
+#endif
+#else
+#error Compiler not supported
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// Target Platform
+////////////////////////////////////////////////////////////////////////
+
+#if defined(OPTICK_GCC)
+#define OPTICK_FUNC __PRETTY_FUNCTION__
+#elif defined(OPTICK_MSVC)
+#define OPTICK_FUNC __FUNCSIG__
+#else
+#error Compiler not supported
+#endif
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// EXPORTS
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#ifdef OPTICK_EXPORTS
+#define OPTICK_API __declspec(dllexport)
+#else
+#define OPTICK_API //__declspec(dllimport)
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#define OPTICK_CONCAT_IMPL(x, y) x##y
+#define OPTICK_CONCAT(x, y) OPTICK_CONCAT_IMPL(x, y)
+
+#if defined(OPTICK_MSVC)
+#define OPTICK_INLINE __forceinline
+#elif defined(OPTICK_GCC)
+#define OPTICK_INLINE __attribute__((always_inline)) inline
+#else
+#error Compiler is not supported
+#endif
+
+
+// Vulkan Forward Declarations
+#define OPTICK_DEFINE_HANDLE(object) typedef struct object##_T* object;
+OPTICK_DEFINE_HANDLE(VkDevice);
+OPTICK_DEFINE_HANDLE(VkPhysicalDevice);
+OPTICK_DEFINE_HANDLE(VkQueue);
+OPTICK_DEFINE_HANDLE(VkCommandBuffer);
+
+// D3D12 Forward Declarations
+struct ID3D12CommandList;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace Optick
+{
+ // Source: http://msdn.microsoft.com/en-us/library/system.windows.media.colors(v=vs.110).aspx
+ // Image: http://i.msdn.microsoft.com/dynimg/IC24340.png
+ struct Color
+ {
+ enum
+ {
+ Null = 0x00000000,
+ AliceBlue = 0xFFF0F8FF,
+ AntiqueWhite = 0xFFFAEBD7,
+ Aqua = 0xFF00FFFF,
+ Aquamarine = 0xFF7FFFD4,
+ Azure = 0xFFF0FFFF,
+ Beige = 0xFFF5F5DC,
+ Bisque = 0xFFFFE4C4,
+ Black = 0xFF000000,
+ BlanchedAlmond = 0xFFFFEBCD,
+ Blue = 0xFF0000FF,
+ BlueViolet = 0xFF8A2BE2,
+ Brown = 0xFFA52A2A,
+ BurlyWood = 0xFFDEB887,
+ CadetBlue = 0xFF5F9EA0,
+ Chartreuse = 0xFF7FFF00,
+ Chocolate = 0xFFD2691E,
+ Coral = 0xFFFF7F50,
+ CornflowerBlue = 0xFF6495ED,
+ Cornsilk = 0xFFFFF8DC,
+ Crimson = 0xFFDC143C,
+ Cyan = 0xFF00FFFF,
+ DarkBlue = 0xFF00008B,
+ DarkCyan = 0xFF008B8B,
+ DarkGoldenRod = 0xFFB8860B,
+ DarkGray = 0xFFA9A9A9,
+ DarkGreen = 0xFF006400,
+ DarkKhaki = 0xFFBDB76B,
+ DarkMagenta = 0xFF8B008B,
+ DarkOliveGreen = 0xFF556B2F,
+ DarkOrange = 0xFFFF8C00,
+ DarkOrchid = 0xFF9932CC,
+ DarkRed = 0xFF8B0000,
+ DarkSalmon = 0xFFE9967A,
+ DarkSeaGreen = 0xFF8FBC8F,
+ DarkSlateBlue = 0xFF483D8B,
+ DarkSlateGray = 0xFF2F4F4F,
+ DarkTurquoise = 0xFF00CED1,
+ DarkViolet = 0xFF9400D3,
+ DeepPink = 0xFFFF1493,
+ DeepSkyBlue = 0xFF00BFFF,
+ DimGray = 0xFF696969,
+ DodgerBlue = 0xFF1E90FF,
+ FireBrick = 0xFFB22222,
+ FloralWhite = 0xFFFFFAF0,
+ ForestGreen = 0xFF228B22,
+ Fuchsia = 0xFFFF00FF,
+ Gainsboro = 0xFFDCDCDC,
+ GhostWhite = 0xFFF8F8FF,
+ Gold = 0xFFFFD700,
+ GoldenRod = 0xFFDAA520,
+ Gray = 0xFF808080,
+ Green = 0xFF008000,
+ GreenYellow = 0xFFADFF2F,
+ HoneyDew = 0xFFF0FFF0,
+ HotPink = 0xFFFF69B4,
+ IndianRed = 0xFFCD5C5C,
+ Indigo = 0xFF4B0082,
+ Ivory = 0xFFFFFFF0,
+ Khaki = 0xFFF0E68C,
+ Lavender = 0xFFE6E6FA,
+ LavenderBlush = 0xFFFFF0F5,
+ LawnGreen = 0xFF7CFC00,
+ LemonChiffon = 0xFFFFFACD,
+ LightBlue = 0xFFADD8E6,
+ LightCoral = 0xFFF08080,
+ LightCyan = 0xFFE0FFFF,
+ LightGoldenRodYellow = 0xFFFAFAD2,
+ LightGray = 0xFFD3D3D3,
+ LightGreen = 0xFF90EE90,
+ LightPink = 0xFFFFB6C1,
+ LightSalmon = 0xFFFFA07A,
+ LightSeaGreen = 0xFF20B2AA,
+ LightSkyBlue = 0xFF87CEFA,
+ LightSlateGray = 0xFF778899,
+ LightSteelBlue = 0xFFB0C4DE,
+ LightYellow = 0xFFFFFFE0,
+ Lime = 0xFF00FF00,
+ LimeGreen = 0xFF32CD32,
+ Linen = 0xFFFAF0E6,
+ Magenta = 0xFFFF00FF,
+ Maroon = 0xFF800000,
+ MediumAquaMarine = 0xFF66CDAA,
+ MediumBlue = 0xFF0000CD,
+ MediumOrchid = 0xFFBA55D3,
+ MediumPurple = 0xFF9370DB,
+ MediumSeaGreen = 0xFF3CB371,
+ MediumSlateBlue = 0xFF7B68EE,
+ MediumSpringGreen = 0xFF00FA9A,
+ MediumTurquoise = 0xFF48D1CC,
+ MediumVioletRed = 0xFFC71585,
+ MidnightBlue = 0xFF191970,
+ MintCream = 0xFFF5FFFA,
+ MistyRose = 0xFFFFE4E1,
+ Moccasin = 0xFFFFE4B5,
+ NavajoWhite = 0xFFFFDEAD,
+ Navy = 0xFF000080,
+ OldLace = 0xFFFDF5E6,
+ Olive = 0xFF808000,
+ OliveDrab = 0xFF6B8E23,
+ Orange = 0xFFFFA500,
+ OrangeRed = 0xFFFF4500,
+ Orchid = 0xFFDA70D6,
+ PaleGoldenRod = 0xFFEEE8AA,
+ PaleGreen = 0xFF98FB98,
+ PaleTurquoise = 0xFFAFEEEE,
+ PaleVioletRed = 0xFFDB7093,
+ PapayaWhip = 0xFFFFEFD5,
+ PeachPuff = 0xFFFFDAB9,
+ Peru = 0xFFCD853F,
+ Pink = 0xFFFFC0CB,
+ Plum = 0xFFDDA0DD,
+ PowderBlue = 0xFFB0E0E6,
+ Purple = 0xFF800080,
+ Red = 0xFFFF0000,
+ RosyBrown = 0xFFBC8F8F,
+ RoyalBlue = 0xFF4169E1,
+ SaddleBrown = 0xFF8B4513,
+ Salmon = 0xFFFA8072,
+ SandyBrown = 0xFFF4A460,
+ SeaGreen = 0xFF2E8B57,
+ SeaShell = 0xFFFFF5EE,
+ Sienna = 0xFFA0522D,
+ Silver = 0xFFC0C0C0,
+ SkyBlue = 0xFF87CEEB,
+ SlateBlue = 0xFF6A5ACD,
+ SlateGray = 0xFF708090,
+ Snow = 0xFFFFFAFA,
+ SpringGreen = 0xFF00FF7F,
+ SteelBlue = 0xFF4682B4,
+ Tan = 0xFFD2B48C,
+ Teal = 0xFF008080,
+ Thistle = 0xFFD8BFD8,
+ Tomato = 0xFFFF6347,
+ Turquoise = 0xFF40E0D0,
+ Violet = 0xFFEE82EE,
+ Wheat = 0xFFF5DEB3,
+ White = 0xFFFFFFFF,
+ WhiteSmoke = 0xFFF5F5F5,
+ Yellow = 0xFFFFFF00,
+ YellowGreen = 0xFF9ACD32,
+ };
+ };
+
+ struct Filter
+ {
+ enum Type : uint32_t
+ {
+ None,
+
+ // CPU
+ AI,
+ Animation,
+ Audio,
+ Debug,
+ Camera,
+ Cloth,
+ GameLogic,
+ Input,
+ Navigation,
+ Network,
+ Physics,
+ Rendering,
+ Scene,
+ Script,
+ Streaming,
+ UI,
+ VFX,
+ Visibility,
+ Wait,
+
+ // IO
+ IO,
+
+ // GPU
+ GPU_Cloth,
+ GPU_Lighting,
+ GPU_PostFX,
+ GPU_Reflections,
+ GPU_Scene,
+ GPU_Shadows,
+ GPU_UI,
+ GPU_VFX,
+ GPU_Water,
+
+ };
+ };
+
+ #define OPTICK_MAKE_CATEGORY(filter, color) (((uint64_t)(1ull) << (filter + 32)) | (uint64_t)color)
+
+ struct Category
+ {
+ enum Type : uint64_t
+ {
+ // CPU
+ None = OPTICK_MAKE_CATEGORY(Filter::None, Color::Null),
+ AI = OPTICK_MAKE_CATEGORY(Filter::AI, Color::Purple),
+ Animation = OPTICK_MAKE_CATEGORY(Filter::Animation, Color::LightSkyBlue),
+ Audio = OPTICK_MAKE_CATEGORY(Filter::Audio, Color::HotPink),
+ Debug = OPTICK_MAKE_CATEGORY(Filter::Debug, Color::Black),
+ Camera = OPTICK_MAKE_CATEGORY(Filter::Camera, Color::Black),
+ Cloth = OPTICK_MAKE_CATEGORY(Filter::Cloth, Color::DarkGreen),
+ GameLogic = OPTICK_MAKE_CATEGORY(Filter::GameLogic, Color::RoyalBlue),
+ Input = OPTICK_MAKE_CATEGORY(Filter::Input, Color::Ivory),
+ Navigation = OPTICK_MAKE_CATEGORY(Filter::Navigation, Color::Magenta),
+ Network = OPTICK_MAKE_CATEGORY(Filter::Network, Color::Olive),
+ Physics = OPTICK_MAKE_CATEGORY(Filter::Physics, Color::LawnGreen),
+ Rendering = OPTICK_MAKE_CATEGORY(Filter::Rendering, Color::BurlyWood),
+ Scene = OPTICK_MAKE_CATEGORY(Filter::Scene, Color::RoyalBlue),
+ Script = OPTICK_MAKE_CATEGORY(Filter::Script, Color::Plum),
+ Streaming = OPTICK_MAKE_CATEGORY(Filter::Streaming, Color::Gold),
+ UI = OPTICK_MAKE_CATEGORY(Filter::UI, Color::PaleTurquoise),
+ VFX = OPTICK_MAKE_CATEGORY(Filter::VFX, Color::SaddleBrown),
+ Visibility = OPTICK_MAKE_CATEGORY(Filter::Visibility, Color::Snow),
+ Wait = OPTICK_MAKE_CATEGORY(Filter::Wait, Color::Tomato),
+ WaitEmpty = OPTICK_MAKE_CATEGORY(Filter::Wait, Color::White),
+ // IO
+ IO = OPTICK_MAKE_CATEGORY(Filter::IO, Color::Khaki),
+ // GPU
+ GPU_Cloth = OPTICK_MAKE_CATEGORY(Filter::GPU_Cloth, Color::DarkGreen),
+ GPU_Lighting = OPTICK_MAKE_CATEGORY(Filter::GPU_Lighting, Color::Khaki),
+ GPU_PostFX = OPTICK_MAKE_CATEGORY(Filter::GPU_PostFX, Color::Maroon),
+ GPU_Reflections = OPTICK_MAKE_CATEGORY(Filter::GPU_Reflections, Color::CadetBlue),
+ GPU_Scene = OPTICK_MAKE_CATEGORY(Filter::GPU_Scene, Color::RoyalBlue),
+ GPU_Shadows = OPTICK_MAKE_CATEGORY(Filter::GPU_Shadows, Color::LightSlateGray),
+ GPU_UI = OPTICK_MAKE_CATEGORY(Filter::GPU_UI, Color::PaleTurquoise),
+ GPU_VFX = OPTICK_MAKE_CATEGORY(Filter::GPU_VFX, Color::SaddleBrown),
+ GPU_Water = OPTICK_MAKE_CATEGORY(Filter::GPU_Water, Color::SteelBlue),
+ };
+
+ static uint32_t GetMask(Type t) { return (uint32_t)(t >> 32); }
+ static uint32_t GetColor(Type t) { return (uint32_t)(t); }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct Mode
+{
+ enum Type
+ {
+ OFF = 0x0,
+ INSTRUMENTATION_CATEGORIES = (1 << 0),
+ INSTRUMENTATION_EVENTS = (1 << 1),
+ INSTRUMENTATION = (INSTRUMENTATION_CATEGORIES | INSTRUMENTATION_EVENTS),
+ SAMPLING = (1 << 2),
+ TAGS = (1 << 3),
+ AUTOSAMPLING = (1 << 4),
+ SWITCH_CONTEXT = (1 << 5),
+ IO = (1 << 6),
+ GPU = (1 << 7),
+ END_SCREENSHOT = (1 << 8),
+ RESERVED_0 = (1 << 9),
+ RESERVED_1 = (1 << 10),
+ HW_COUNTERS = (1 << 11),
+ LIVE = (1 << 12),
+ RESERVED_2 = (1 << 13),
+ RESERVED_3 = (1 << 14),
+ RESERVED_4 = (1 << 15),
+ SYS_CALLS = (1 << 16),
+ OTHER_PROCESSES = (1 << 17),
+
+ TRACER = AUTOSAMPLING | SWITCH_CONTEXT | SYS_CALLS,
+ DEFAULT = INSTRUMENTATION | TAGS | AUTOSAMPLING | SWITCH_CONTEXT | IO | GPU | SYS_CALLS | OTHER_PROCESSES,
+ };
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API int64_t GetHighPrecisionTime();
+OPTICK_API int64_t GetHighPrecisionFrequency();
+OPTICK_API uint32_t NextFrame();
+OPTICK_API bool IsActive(Mode::Type mode = Mode::INSTRUMENTATION_EVENTS);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct EventStorage;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool RegisterFiber(uint64_t fiberId, EventStorage** slot);
+OPTICK_API bool RegisterThread(const char* name);
+OPTICK_API bool RegisterThread(const wchar_t* name);
+OPTICK_API bool UnRegisterThread(bool keepAlive);
+OPTICK_API EventStorage** GetEventStorageSlotForCurrentThread();
+OPTICK_API bool IsFiberStorage(EventStorage* fiberStorage);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ThreadMask
+{
+ enum Type
+ {
+ None = 0,
+ Main = 1 << 0,
+ GPU = 1 << 1,
+ IO = 1 << 2,
+ Idle = 1 << 3,
+ };
+};
+
+OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID = uint64_t(-1), ThreadMask::Type type = ThreadMask::None);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct State
+{
+ enum Type
+ {
+ // Starting a new capture
+ START_CAPTURE,
+
+ // Stopping current capture
+ STOP_CAPTURE,
+
+ // Dumping capture to the GUI
+ // Useful for attaching summary and screenshot to the capture
+ DUMP_CAPTURE,
+
+ // Cancel current capture
+ CANCEL_CAPTURE,
+ };
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Sets a state change callback
+typedef bool (*StateCallback)(State::Type state);
+OPTICK_API bool SetStateChangedCallback(StateCallback cb);
+
+// Attaches a key-value pair to the capture's summary
+// Example: AttachSummary("Version", "v12.0.1");
+// AttachSummary("Platform", "Windows");
+// AttachSummary("Config", "Release_x64");
+// AttachSummary("Settings", "Ultra");
+// AttachSummary("Map", "Atlantida");
+// AttachSummary("Position", "123.0,120.0,41.1");
+// AttachSummary("CPU", "Intel(R) Xeon(R) CPU E5410@2.33GHz");
+// AttachSummary("GPU", "NVIDIA GeForce GTX 980 Ti");
+OPTICK_API bool AttachSummary(const char* key, const char* value);
+
+struct File
+{
+ enum Type
+ {
+ // Supported formats: PNG, JPEG, BMP, TIFF
+ OPTICK_IMAGE,
+
+ // Text file
+ OPTICK_TEXT,
+
+ // Any other type
+ OPTICK_OTHER,
+ };
+};
+// Attaches a file to the current capture
+OPTICK_API bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size);
+OPTICK_API bool AttachFile(File::Type type, const char* name, const char* path);
+OPTICK_API bool AttachFile(File::Type type, const char* name, const wchar_t* path);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct EventDescription;
+struct Frame;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct EventTime
+{
+ static const int64_t INVALID_TIMESTAMP = (int64_t)-1;
+
+ int64_t start;
+ int64_t finish;
+
+ OPTICK_INLINE void Start() { start = Optick::GetHighPrecisionTime(); }
+ OPTICK_INLINE void Stop() { finish = Optick::GetHighPrecisionTime(); }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct EventData : public EventTime
+{
+ const EventDescription* description;
+
+ bool operator<(const EventData& other) const
+ {
+ if (start != other.start)
+ return start < other.start;
+
+ // Reversed order for finish intervals (parent first)
+ return finish > other.finish;
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API SyncData : public EventTime
+{
+ uint64_t newThreadId;
+ uint64_t oldThreadId;
+ uint8_t core;
+ int8_t reason;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API FiberSyncData : public EventTime
+{
+ uint64_t threadId;
+
+ static void AttachToThread(EventStorage* storage, uint64_t threadId);
+ static void DetachFromThread(EventStorage* storage);
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<class T>
+struct TagData
+{
+ const EventDescription* description;
+ int64_t timestamp;
+ T data;
+ TagData() {}
+ TagData(const EventDescription& desc, T d) : description(&desc), timestamp(Optick::GetHighPrecisionTime()), data(d) {}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API EventDescription
+{
+ // HOT \\
+ // Have to place "hot" variables at the beginning of the class (here will be some padding)
+ // COLD //
+
+ const char* name;
+ const char* file;
+ uint32_t line;
+ uint32_t index;
+ uint32_t color;
+ uint32_t filter;
+ float budget;
+
+ static EventDescription* Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor = Color::Null, const unsigned long filter = 0);
+ static EventDescription* CreateShared(const char* eventName, const char* fileName = nullptr, const unsigned long fileLine = 0, const unsigned long eventColor = Color::Null, const unsigned long filter = 0);
+
+ EventDescription();
+private:
+ friend class EventDescriptionBoard;
+ EventDescription& operator=(const EventDescription&);
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API Event
+{
+ EventData* data;
+
+ static EventData* Start(const EventDescription& description);
+ static void Stop(EventData& data);
+
+ static void Push(const char* name);
+ static void Push(const EventDescription& description);
+ static void Pop();
+
+ static void Add(EventStorage* storage, const EventDescription* description, int64_t timestampStart, int64_t timestampFinish);
+ static void Push(EventStorage* storage, const EventDescription* description, int64_t timestampStart);
+ static void Pop(EventStorage* storage, int64_t timestampStart);
+
+
+ Event(const EventDescription& description)
+ {
+ data = Start(description);
+ }
+
+ ~Event()
+ {
+ if (data)
+ Stop(*data);
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionName, const char* fileName, int fileLine, const char* eventName = nullptr, const ::Optick::Category::Type category = ::Optick::Category::None)
+{
+ return ::Optick::EventDescription::Create(eventName != nullptr ? eventName : functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category));
+}
+OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionName, const char* fileName, int fileLine, const ::Optick::Category::Type category)
+{
+ return ::Optick::EventDescription::Create(functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category));
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API GPUEvent
+{
+ EventData* data;
+
+ static EventData* Start(const EventDescription& description);
+ static void Stop(EventData& data);
+
+ GPUEvent(const EventDescription& description)
+ {
+ data = Start(description);
+ }
+
+ ~GPUEvent()
+ {
+ if (data)
+ Stop(*data);
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API Tag
+{
+ static void Attach(const EventDescription& description, float val);
+ static void Attach(const EventDescription& description, int32_t val);
+ static void Attach(const EventDescription& description, uint32_t val);
+ static void Attach(const EventDescription& description, uint64_t val);
+ static void Attach(const EventDescription& description, float val[3]);
+ static void Attach(const EventDescription& description, const char* val);
+
+ // Derived
+ static void Attach(const EventDescription& description, float x, float y, float z)
+ {
+ float p[3] = { x, y, z }; Attach(description, p);
+ }
+
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ThreadScope
+{
+ bool keepAlive;
+
+ ThreadScope(const char* name, bool bKeepAlive = false) : keepAlive(bKeepAlive)
+ {
+ RegisterThread(name);
+ }
+
+ ThreadScope(const wchar_t* name)
+ {
+ RegisterThread(name);
+ }
+
+ ~ThreadScope()
+ {
+ UnRegisterThread(keepAlive);
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+enum OPTICK_API GPUQueueType
+{
+ GPU_QUEUE_GRAPHICS,
+ GPU_QUEUE_COMPUTE,
+ GPU_QUEUE_TRANSFER,
+ GPU_QUEUE_VSYNC,
+
+ GPU_QUEUE_COUNT,
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API GPUContext
+{
+ void* cmdBuffer;
+ GPUQueueType queue;
+ int node;
+ GPUContext(void* c = nullptr, GPUQueueType q = GPU_QUEUE_GRAPHICS, int n = 0) : cmdBuffer(c), queue(q), node(n) {}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API void InitGpuD3D12(void* device, void** cmdQueues, uint32_t numQueues);
+OPTICK_API void InitGpuVulkan(void* vkDevices, void* vkPhysicalDevices, void* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues);
+OPTICK_API void GpuFlip(void* swapChain);
+OPTICK_API GPUContext SetGpuContext(GPUContext context);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct OPTICK_API GPUContextScope
+{
+ GPUContext prevContext;
+
+ GPUContextScope(ID3D12CommandList* cmdList, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0)
+ {
+ prevContext = SetGpuContext(GPUContext(cmdList, queue, node));
+ }
+
+ GPUContextScope(VkCommandBuffer cmdBuffer, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0)
+ {
+ prevContext = SetGpuContext(GPUContext(cmdBuffer, queue, node));
+ }
+
+ ~GPUContextScope()
+ {
+ SetGpuContext(prevContext);
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct FrameType
+{
+ enum Type
+ {
+ CPU,
+ GPU,
+ Render,
+ COUNT,
+ };
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API const EventDescription* GetFrameDescription(FrameType::Type frame);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#define OPTICK_UNUSED(x) (void)(x)
+// Workaround for gcc compiler
+#define OPTICK_VA_ARGS(...) , ##__VA_ARGS__
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Scoped profiling event which automatically grabs current function name.
+// Use tis macro 95% of the time.
+// Example A:
+// void Function()
+// {
+// OPTICK_EVENT();
+// ... code ...
+// }
+// or
+// void Function()
+// {
+// OPTICK_EVENT("CustomFunctionName");
+// ... code ...
+// }
+// Notes:
+// Optick captures full name of the function including name space and arguments.
+// Full name is usually shortened in the Optick GUI in order to highlight the most important bits.
+#define OPTICK_EVENT(...) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_description_, __LINE__) = nullptr; \
+ if (OPTICK_CONCAT(autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_description_, __LINE__) = ::Optick::CreateDescription(OPTICK_FUNC, __FILE__, __LINE__ OPTICK_VA_ARGS(__VA_ARGS__)); \
+ ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)( *(OPTICK_CONCAT(autogen_description_, __LINE__)) );
+
+// Backward compatibility with previous versions of Optick
+//#if !defined(PROFILE)
+//#define PROFILE OPTICK_EVENT()
+//#endif
+
+// Scoped profiling macro with predefined color.
+// Use this macro for high-level function calls (e.g. AI, Physics, Audio, Render etc.).
+// Example:
+// void UpdateAI()
+// {
+// OPTICK_CATEGORY("UpdateAI", Optick::Category::AI);
+// ... code ...
+// }
+//
+// Macro could automatically capture current function name:
+// void UpdateAI()
+// {
+// OPTICK_CATEGORY(OPTICK_FUNC, Optick::Category::AI);
+// ... code ...
+// }
+#define OPTICK_CATEGORY(NAME, CATEGORY) OPTICK_EVENT(NAME, CATEGORY)
+
+// Profiling event for Main Loop update.
+// You need to call this function in the beginning of the each new frame.
+// Example:
+// while (true)
+// {
+// OPTICK_FRAME("MainThread");
+// ... code ...
+// }
+#define OPTICK_FRAME(FRAME_NAME) static ::Optick::ThreadScope mainThreadScope(FRAME_NAME); \
+ OPTICK_UNUSED(mainThreadScope); \
+ uint32_t frameNumber = ::Optick::NextFrame(); \
+ ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)(*::Optick::GetFrameDescription(::Optick::FrameType::CPU)); \
+ OPTICK_TAG("Frame", frameNumber);
+
+
+// Thread registration macro.
+// Example:
+// void WorkerThread(...)
+// {
+// OPTICK_THREAD("Worker");
+// while (isRunning)
+// {
+// ...
+// }
+// }
+#define OPTICK_THREAD(THREAD_NAME) ::Optick::ThreadScope brofilerThreadScope(THREAD_NAME); \
+ OPTICK_UNUSED(brofilerThreadScope); \
+
+
+// Thread registration macros.
+// Useful for integration with custom job-managers.
+#define OPTICK_START_THREAD(FRAME_NAME) ::Optick::RegisterThread(FRAME_NAME);
+#define OPTICK_STOP_THREAD() ::Optick::UnRegisterThread(false);
+
+// Attaches a custom data-tag.
+// Supported types: int32, uint32, uint64, vec3, string (cut to 32 characters)
+// Example:
+// OPTICK_TAG("PlayerName", name[index]);
+// OPTICK_TAG("Health", 100);
+// OPTICK_TAG("Score", 0x80000000u);
+// OPTICK_TAG("Height(cm)", 176.3f);
+// OPTICK_TAG("Address", (uint64)*this);
+// OPTICK_TAG("Position", 123.0f, 456.0f, 789.0f);
+#define OPTICK_TAG(NAME, ...) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_tag_, __LINE__) = nullptr; \
+ if (OPTICK_CONCAT(autogen_tag_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_tag_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \
+ ::Optick::Tag::Attach(*OPTICK_CONCAT(autogen_tag_, __LINE__), __VA_ARGS__); \
+
+// Scoped macro with DYNAMIC name.
+// Optick holds a copy of the provided name.
+// Each scope does a search in hashmap for the name.
+// Please use variations with STATIC names where it's possible.
+// Use this macro for quick prototyping or intergratoin with other profiling systems (e.g. UE4)
+// Example:
+// const char* name = ... ;
+// OPTICK_EVENT_DYNAMIC(name);
+#define OPTICK_EVENT_DYNAMIC(NAME) OPTICK_CUSTOM_EVENT(::Optick::EventDescription::CreateShared(NAME, __FILE__, __LINE__));
+// Push\Pop profiling macro with DYNAMIC name.
+#define OPTICK_PUSH_DYNAMIC(NAME) ::Optick::Event::Push(NAME);
+
+// Push\Pop profiling macro with STATIC name.
+// Please avoid using Push\Pop approach in favor for scoped macros.
+// For backward compatibility with some engines.
+// Example:
+// OPTICK_PUSH("ScopeName");
+// ...
+// OPTICK_POP();
+#define OPTICK_PUSH(NAME) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_description_, __LINE__) = nullptr; \
+ if (OPTICK_CONCAT(autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \
+ ::Optick::Event::Push(*OPTICK_CONCAT(autogen_description_, __LINE__));
+#define OPTICK_POP() ::Optick::Event::Pop();
+
+
+// Scoped macro with predefined Optick::EventDescription.
+// Use these events instead of DYNAMIC macros to minimize overhead.
+// Common use-case: integrating Optick with internal script languages (e.g. Lua, Actionscript(Scaleform), etc.).
+// Example:
+// Generating EventDescription once during initialization:
+// Optick::EventDescription* description = Optick::EventDescription::CreateShared("FunctionName");
+//
+// Then we could just use a pointer to cached description later for profiling:
+// OPTICK_CUSTOM_EVENT(description);
+#define OPTICK_CUSTOM_EVENT(DESCRIPTION) ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)( *DESCRIPTION ); \
+
+// Registration of a custom EventStorage (e.g. GPU, IO, etc.)
+// Use it to present any extra information on the timeline.
+// Example:
+// Optick::EventStorage* IOStorage = Optick::RegisterStorage("I/O");
+// Notes:
+// Registration of a new storage is thread-safe.
+#define OPTICK_STORAGE_REGISTER(STORAGE_NAME) ::Optick::RegisterStorage(STORAGE_NAME);
+
+// Adding events to the custom storage.
+// Helps to integrate Optick into already existing profiling systems (e.g. GPU Profiler, I/O profiler, etc.).
+// Example:
+// //Registering a storage - should be done once during initialization
+// static Optick::EventStorage* IOStorage = Optick::RegisterStorage("I/O");
+//
+// int64_t cpuTimestampStart = Optick::GetHighPrecisionTime();
+// ...
+// int64_t cpuTimestampFinish = Optick::GetHighPrecisionTime();
+//
+// //Creating a shared event-description
+// static Optick::EventDescription* IORead = Optick::EventDescription::CreateShared("IO Read");
+//
+// OPTICK_STORAGE_EVENT(IOStorage, IORead, cpuTimestampStart, cpuTimestampFinish);
+// Notes:
+// It's not thread-safe to add events to the same storage from multiple threads.
+// Please guarantee thread-safety on the higher level if access from multiple threads to the same storage is required.
+#define OPTICK_STORAGE_EVENT(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH) if (::Optick::IsActive()) { ::Optick::Event::Add(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH); }
+#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START) if (::Optick::IsActive()) { ::Optick::Event::Push(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START); }
+#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH) if (::Optick::IsActive()) { ::Optick::Event::Pop(STORAGE, CPU_TIMESTAMP_FINISH); }
+
+
+// Registers state change callback
+// If callback returns false - the call is repeated the next frame
+#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK) ::Optick::SetStateChangedCallback(CALLBACK);
+
+
+// GPU events
+#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS) ::Optick::InitGpuD3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS);
+#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS) ::Optick::InitGpuVulkan(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS);
+
+// Setup GPU context:
+// Params:
+// (CommandBuffer\CommandList, [Optional] Optick::GPUQueue queue, [Optional] int NodeIndex)
+// Examples:
+// OPTICK_GPU_CONTEXT(cmdBuffer); - all OPTICK_GPU_EVENT will use the same command buffer within the scope
+// OPTICK_GPU_CONTEXT(cmdBuffer, Optick::GPU_QUEUE_COMPUTE); - all events will use the same command buffer and queue for the scope
+// OPTICK_GPU_CONTEXT(cmdBuffer, Optick::GPU_QUEUE_COMPUTE, gpuIndex); - all events will use the same command buffer and queue for the scope
+#define OPTICK_GPU_CONTEXT(...) ::Optick::GPUContextScope OPTICK_CONCAT(gpu_autogen_context_, __LINE__)(__VA_ARGS__); \
+ (void)OPTICK_CONCAT(gpu_autogen_context_, __LINE__);
+
+#define OPTICK_GPU_EVENT(NAME) OPTICK_EVENT(NAME); \
+ static ::Optick::EventDescription* OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = nullptr; \
+ if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \
+ ::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \
+
+#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN);
+
+#else
+#define OPTICK_EVENT(...)
+#define OPTICK_CATEGORY(NAME, COLOR)
+#define OPTICK_FRAME(NAME)
+#define OPTICK_THREAD(FRAME_NAME)
+#define OPTICK_START_THREAD(FRAME_NAME)
+#define OPTICK_STOP_THREAD()
+#define OPTICK_TAG(NAME, DATA)
+#define OPTICK_EVENT_DYNAMIC(NAME)
+#define OPTICK_PUSH_DYNAMIC(NAME)
+#define OPTICK_PUSH(NAME)
+#define OPTICK_POP()
+#define OPTICK_CUSTOM_EVENT(DESCRIPTION)
+#define OPTICK_STORAGE_REGISTER(STORAGE_NAME)
+#define OPTICK_STORAGE_EVENT(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH)
+#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START)
+#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH)
+#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK)
+#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS)
+#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS)
+#define OPTICK_GPU_CONTEXT(...)
+#define OPTICK_GPU_EVENT(NAME)
+#define OPTICK_GPU_FLIP(SWAP_CHAIN)
+#endif
diff --git a/external/optick/optick_common.h b/external/optick/optick_common.h
new file mode 100644
index 0000000..4468911
--- /dev/null
+++ b/external/optick/optick_common.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include "optick.h"
+
+#include <cstdio>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(OPTICK_MSVC)
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#define NOMINMAX
+#include <windows.h>
+#endif
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Types
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef signed char int8;
+typedef unsigned char uint8;
+typedef unsigned char byte;
+typedef short int16;
+typedef unsigned short uint16;
+typedef int int32;
+typedef unsigned int uint32;
+#if defined(OPTICK_MSVC)
+typedef __int64 int64;
+typedef unsigned __int64 uint64;
+#elif defined(OPTICK_GCC)
+typedef int64_t int64;
+typedef uint64_t uint64;
+#else
+#error Compiler is not supported
+#endif
+static_assert(sizeof(int8) == 1, "Invalid type size, int8");
+static_assert(sizeof(uint8) == 1, "Invalid type size, uint8");
+static_assert(sizeof(byte) == 1, "Invalid type size, byte");
+static_assert(sizeof(int16) == 2, "Invalid type size, int16");
+static_assert(sizeof(uint16) == 2, "Invalid type size, uint16");
+static_assert(sizeof(int32) == 4, "Invalid type size, int32");
+static_assert(sizeof(uint32) == 4, "Invalid type size, uint32");
+static_assert(sizeof(int64) == 8, "Invalid type size, int64");
+static_assert(sizeof(uint64) == 8, "Invalid type size, uint64");
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef uint64 ThreadID;
+static const ThreadID INVALID_THREAD_ID = (ThreadID)-1;
+typedef uint32 ProcessID;
+static const ProcessID INVALID_PROCESS_ID = (ProcessID)-1;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Memory
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_MSVC)
+#define OPTICK_ALIGN(N) __declspec( align( N ) )
+#elif defined(OPTICK_GCC)
+#define OPTICK_ALIGN(N) __attribute__((aligned(N)))
+#else
+#error Can not define OPTICK_ALIGN. Unknown platform.
+#endif
+#define OPTICK_CACHE_LINE_SIZE 64
+#define OPTICK_ALIGN_CACHE OPTICK_ALIGN(OPTICK_CACHE_LINE_SIZE)
+#define OPTICK_ARRAY_SIZE(ARR) (sizeof(ARR)/sizeof((ARR)[0]))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_MSVC)
+#define OPTICK_NOINLINE __declspec(noinline)
+#elif defined(OPTICK_GCC)
+#define OPTICK_NOINLINE __attribute__((__noinline__))
+#else
+#error Compiler is not supported
+#endif
+////////////////////////////////////////////////////////////////////////
+// OPTICK_THREAD_LOCAL
+////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_MSVC)
+#define OPTICK_THREAD_LOCAL __declspec(thread)
+#elif defined(OPTICK_GCC)
+#define OPTICK_THREAD_LOCAL __thread
+#else
+#error Can not define OPTICK_THREAD_LOCAL. Unknown platform.
+#endif
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Asserts
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_MSVC)
+#define OPTICK_DEBUG_BREAK __debugbreak()
+#elif defined(OPTICK_GCC)
+#define OPTICK_DEBUG_BREAK __builtin_trap()
+#else
+ #error Can not define OPTICK_DEBUG_BREAK. Unknown platform.
+#endif
+#define OPTICK_UNUSED(x) (void)(x)
+#ifdef _DEBUG
+ #define OPTICK_ASSERT(arg, description) if (!(arg)) { OPTICK_DEBUG_BREAK; }
+ #define OPTICK_FAILED(description) { OPTICK_DEBUG_BREAK; }
+#else
+ #define OPTICK_ASSERT(arg, description)
+ #define OPTICK_FAILED(description)
+#endif
+#define OPTICK_VERIFY(arg, description, operation) if (!(arg)) { OPTICK_DEBUG_BREAK; operation; }
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Safe functions
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_LINUX) || defined(OPTICK_OSX)
+template<size_t sizeOfBuffer>
+inline int sprintf_s(char(&buffer)[sizeOfBuffer], const char* format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ int result = vsnprintf(buffer, sizeOfBuffer, format, ap);
+ va_end(ap);
+ return result;
+}
+#endif
+
+#if defined(OPTICK_GCC)
+template<size_t sizeOfBuffer>
+inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount)
+{
+ return wcstombs(buffer, src, maxCount);
+}
+#endif
+
+#if defined(OPTICK_MSVC)
+template<size_t sizeOfBuffer>
+inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount)
+{
+ size_t converted = 0;
+ return wcstombs_s(&converted, buffer, src, maxCount);
+}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_core.cpp b/external/optick/optick_core.cpp
new file mode 100644
index 0000000..1d533d0
--- /dev/null
+++ b/external/optick/optick_core.cpp
@@ -0,0 +1,1657 @@
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include "optick_core.h"
+#include "optick_server.h"
+
+#include <algorithm>
+#include <fstream>
+
+//////////////////////////////////////////////////////////////////////////
+// Start of the Platform-specific stuff
+//////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_MSVC)
+#include "optick_core.win.h"
+#endif
+#if defined(OPTICK_LINUX)
+#include "optick_core.linux.h"
+#endif
+#if defined(OPTICK_OSX)
+#include "optick_core.macos.h"
+#endif
+#if defined(OPTICK_PS4)
+#include "optick_core.ps4.h"
+#endif
+//////////////////////////////////////////////////////////////////////////
+// End of the Platform-specific stuff
+//////////////////////////////////////////////////////////////////////////
+
+extern "C" Optick::EventData* NextEvent()
+{
+ if (Optick::EventStorage* storage = Optick::Core::storage)
+ {
+ return &storage->NextEvent();
+ }
+
+ return nullptr;
+}
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void* (*Memory::allocate)(size_t) = operator new;
+void (*Memory::deallocate)(void* p) = operator delete;
+std::atomic<uint64_t> Memory::memAllocated;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
+{
+ const uint64_t m = 0xc6a4a7935bd1e995;
+ const int r = 47;
+
+ uint64_t h = seed ^ (len * m);
+
+ const uint64_t * data = (const uint64_t *)key;
+ const uint64_t * end = data + (len / 8);
+
+ while (data != end)
+ {
+ uint64_t k = *data++;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ const unsigned char * data2 = (const unsigned char*)data;
+
+ switch (len & 7)
+ {
+ case 7: h ^= uint64_t(data2[6]) << 48;
+ case 6: h ^= uint64_t(data2[5]) << 40;
+ case 5: h ^= uint64_t(data2[4]) << 32;
+ case 4: h ^= uint64_t(data2[3]) << 24;
+ case 3: h ^= uint64_t(data2[2]) << 16;
+ case 2: h ^= uint64_t(data2[1]) << 8;
+ case 1: h ^= uint64_t(data2[0]);
+ h *= m;
+ };
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+
+ return h;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+uint64_t StringHash::CalcHash(const char* str)
+{
+ return MurmurHash64A(str, (int)strlen(str), 0);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Base 64
+// https://renenyffenegger.ch/notes/development/Base64/Encoding-and-decoding-base-64-with-cpp
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+static inline bool is_base64(unsigned char c) {
+ return (isalnum(c) || (c == '+') || (c == '/'));
+}
+string base64_decode(string const& encoded_string) {
+ static string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ int in_len = (int)encoded_string.size();
+ int i = 0;
+ int j = 0;
+ int in_ = 0;
+ unsigned char char_array_4[4], char_array_3[3];
+ string ret;
+
+ while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
+ char_array_4[i++] = encoded_string[in_]; in_++;
+ if (i == 4) {
+ for (i = 0; i < 4; i++)
+ char_array_4[i] = (unsigned char)base64_chars.find(char_array_4[i]);
+
+ char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+ char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+ for (i = 0; (i < 3); i++)
+ ret += char_array_3[i];
+ i = 0;
+ }
+ }
+
+ if (i) {
+ for (j = i; j < 4; j++)
+ char_array_4[j] = 0;
+
+ for (j = 0; j < 4; j++)
+ char_array_4[j] = (unsigned char)base64_chars.find(char_array_4[j]);
+
+ char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+ char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+ for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
+ }
+
+ return ret;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Get current time in milliseconds
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int64 GetTimeMilliSeconds()
+{
+ return Platform::GetTime() * 1000 / Platform::GetFrequency();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int64 TicksToMs(int64 ticks)
+{
+ return ticks * 1000 / Platform::GetFrequency();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int64 TicksToUs(int64 ticks)
+{
+ return ticks * 1000000 / Platform::GetFrequency();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<class T>
+OutputDataStream& operator<<(OutputDataStream& stream, const TagData<T>& ob)
+{
+ return stream << ob.timestamp << ob.description->index << ob.data;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& os, const Symbol * const symbol)
+{
+ OPTICK_VERIFY(symbol, "Can't serialize NULL symbol!", return os);
+ return os << symbol->address << symbol->function << symbol->file << symbol->line;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& os, const Module& module)
+{
+ return os << module.path << (uint64)module.address << (uint64)module.size;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// VS TODO: Replace with random access iterator for MemoryPool
+template<class T, uint32 SIZE>
+void SortMemoryPool(MemoryPool<T, SIZE>& memoryPool)
+{
+ size_t count = memoryPool.Size();
+ if (count == 0)
+ return;
+
+ vector<T> memoryArray;
+ memoryArray.resize(count);
+ memoryPool.ToArray(&memoryArray[0]);
+
+ std::sort(memoryArray.begin(), memoryArray.end());
+
+ memoryPool.Clear(true);
+
+ for (const T& item : memoryArray)
+ memoryPool.Add(item);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventDescription* EventDescription::Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/)
+{
+ return EventDescriptionBoard::Get().CreateDescription(eventName, fileName, fileLine, eventColor, filter);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventDescription* EventDescription::CreateShared(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/)
+{
+ return EventDescriptionBoard::Get().CreateSharedDescription(eventName, fileName, fileLine, eventColor, filter);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventDescription::EventDescription() : name(""), file(""), line(0), color(0)
+{
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventDescription& EventDescription::operator=(const EventDescription&)
+{
+ OPTICK_FAILED("It is pointless to copy EventDescription. Please, check you logic!"); return *this;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventData* Event::Start(const EventDescription& description)
+{
+ EventData* result = nullptr;
+
+ if (EventStorage* storage = Core::storage)
+ {
+ result = &storage->NextEvent();
+ result->description = &description;
+ result->Start();
+ }
+ return result;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Event::Stop(EventData& data)
+{
+ if (EventStorage* storage = Core::storage)
+ {
+ data.Stop();
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void OPTICK_INLINE PushEvent(EventStorage* pStorage, const EventDescription* description, int64_t timestampStart)
+{
+ if (EventStorage* storage = pStorage)
+ {
+ EventData& result = storage->NextEvent();
+ result.description = description;
+ result.start = timestampStart;
+ result.finish = EventTime::INVALID_TIMESTAMP;
+ storage->pushPopEventStack[storage->pushPopEventStackIndex++] = &result;
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void OPTICK_INLINE PopEvent(EventStorage* pStorage, int64_t timestampFinish)
+{
+ if (EventStorage* storage = pStorage)
+ if (storage->pushPopEventStackIndex > 0)
+ storage->pushPopEventStack[--storage->pushPopEventStackIndex]->finish = timestampFinish;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Event::Push(const char* name)
+{
+ if (EventStorage* storage = Core::storage)
+ {
+ EventDescription* desc = EventDescription::CreateShared(name);
+ Push(*desc);
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Event::Push(const EventDescription& description)
+{
+ PushEvent(Core::storage, &description, GetHighPrecisionTime());
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Event::Pop()
+{
+ PopEvent(Core::storage, GetHighPrecisionTime());
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Event::Add(EventStorage* storage, const EventDescription* description, int64_t timestampStart, int64_t timestampFinish)
+{
+ EventData& data = storage->eventBuffer.Add();
+ data.description = description;
+ data.start = timestampStart;
+ data.finish = timestampFinish;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Event::Push(EventStorage* storage, const EventDescription* description, int64_t timestampStart)
+{
+ PushEvent(storage, description, timestampStart);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Event::Pop(EventStorage* storage, int64_t timestampFinish)
+{
+ PopEvent(storage, timestampFinish);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventData* GPUEvent::Start(const EventDescription& description)
+{
+ EventData* result = nullptr;
+
+ if (EventStorage* storage = Core::storage)
+ result = storage->gpuStorage.Start(description);
+
+ return result;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void GPUEvent::Stop(EventData& data)
+{
+ if (EventStorage* storage = Core::storage)
+ storage->gpuStorage.Stop(data);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void FiberSyncData::AttachToThread(EventStorage* storage, uint64_t threadId)
+{
+ if (storage)
+ {
+ FiberSyncData& data = storage->fiberSyncBuffer.Add();
+ data.Start();
+ data.finish = EventTime::INVALID_TIMESTAMP;
+ data.threadId = threadId;
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void FiberSyncData::DetachFromThread(EventStorage* storage)
+{
+ if (storage)
+ {
+ if (FiberSyncData* syncData = storage->fiberSyncBuffer.Back())
+ {
+ syncData->Stop();
+ }
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Tag::Attach(const EventDescription& description, float val)
+{
+ if (EventStorage* storage = Core::storage)
+ if (storage->currentMode & Mode::TAGS)
+ storage->tagFloatBuffer.Add(TagFloat(description, val));
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Tag::Attach(const EventDescription& description, int32_t val)
+{
+ if (EventStorage* storage = Core::storage)
+ if (storage->currentMode & Mode::TAGS)
+ storage->tagS32Buffer.Add(TagS32(description, val));
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Tag::Attach(const EventDescription& description, uint32_t val)
+{
+ if (EventStorage* storage = Core::storage)
+ if (storage->currentMode & Mode::TAGS)
+ storage->tagU32Buffer.Add(TagU32(description, val));
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Tag::Attach(const EventDescription& description, uint64_t val)
+{
+ if (EventStorage* storage = Core::storage)
+ if (storage->currentMode & Mode::TAGS)
+ storage->tagU64Buffer.Add(TagU64(description, val));
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Tag::Attach(const EventDescription& description, float val[3])
+{
+ if (EventStorage* storage = Core::storage)
+ if (storage->currentMode & Mode::TAGS)
+ storage->tagPointBuffer.Add(TagPoint(description, val));
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Tag::Attach(const EventDescription& description, const char* val)
+{
+ if (EventStorage* storage = Core::storage)
+ if (storage->currentMode & Mode::TAGS)
+ storage->tagStringBuffer.Add(TagString(description, val));
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream & operator<<(OutputDataStream &stream, const EventDescription &ob)
+{
+ byte flags = 0;
+ return stream << ob.name << ob.file << ob.line << ob.filter << ob.color << (float)0.0f << flags;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const EventTime& ob)
+{
+ return stream << ob.start << ob.finish;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const EventData& ob)
+{
+ return stream << (EventTime)(ob) << (ob.description ? ob.description->index : (uint32)-1);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const SyncData& ob)
+{
+ return stream << (EventTime)(ob) << ob.core << ob.reason << ob.newThreadId;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const FiberSyncData& ob)
+{
+ return stream << (EventTime)(ob) << ob.threadId;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+static std::mutex& GetBoardLock()
+{
+ // Initialize as static local variable to prevent problems with static initialization order
+ static std::mutex lock;
+ return lock;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventDescriptionBoard& EventDescriptionBoard::Get()
+{
+ static EventDescriptionBoard instance;
+ return instance;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+const EventDescriptionList& EventDescriptionBoard::GetEvents() const
+{
+ return boardDescriptions;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventDescription* EventDescriptionBoard::CreateDescription(const char* name, const char* file /*= nullptr*/, uint32_t line /*= 0*/, uint32_t color /*= Color::Null*/, uint32_t filter /*= 0*/)
+{
+ std::lock_guard<std::mutex> lock(GetBoardLock());
+
+ size_t index = boardDescriptions.Size();
+
+ EventDescription& desc = boardDescriptions.Add();
+ desc.index = (uint32)index;
+ desc.name = name;
+ desc.file = file;
+ desc.line = line;
+ desc.color = color;
+ desc.filter = filter;
+
+ return &desc;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventDescription* EventDescriptionBoard::CreateSharedDescription(const char* name, const char* file /*= nullptr*/, uint32_t line /*= 0*/, uint32_t color /*= Color::Null*/, uint32_t filter /*= 0*/)
+{
+ StringHash nameHash(name);
+
+ std::lock_guard<std::mutex> lock(sharedLock);
+
+ std::pair<DescriptionMap::iterator, bool> cached = sharedDescriptions.insert({ nameHash, nullptr });
+
+ if (cached.second)
+ {
+ const char* nameCopy = sharedNames.Add(name, strlen(name) + 1, false);
+ cached.first->second = CreateDescription(nameCopy, file, line, color, filter);
+ }
+
+ return cached.first->second;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator << (OutputDataStream& stream, const EventDescriptionBoard& ob)
+{
+ std::lock_guard<std::mutex> lock(GetBoardLock());
+ stream << ob.GetEvents();
+ return stream;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ProcessDescription::ProcessDescription(const char* processName, ProcessID pid, uint64 key) : name(processName), processID(pid), uniqueKey(key)
+{
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ThreadDescription::ThreadDescription(const char* threadName, ThreadID tid, ProcessID pid, int32 _maxDepth /*= 1*/, int32 _priority /*= 0*/, uint32 _mask /*= 0*/)
+ : name(threadName), threadID(tid), processID(pid), maxDepth(_maxDepth), priority(_priority), mask(_mask)
+{
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int64_t GetHighPrecisionTime()
+{
+ return Platform::GetTime();
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int64_t GetHighPrecisionFrequency()
+{
+ return Platform::GetFrequency();
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream & operator<<(OutputDataStream &stream, const SysCallData &ob)
+{
+ return stream << (const EventData&)ob << ob.threadID << ob.id;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+SysCallData& SysCallCollector::Add()
+{
+ return syscallPool.Add();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void SysCallCollector::Clear()
+{
+ syscallPool.Clear(false);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool SysCallCollector::Serialize(OutputDataStream& stream)
+{
+ stream << syscallPool;
+
+ if (!syscallPool.IsEmpty())
+ {
+ syscallPool.Clear(false);
+ return true;
+ }
+
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void CallstackCollector::Add(const CallstackDesc& desc)
+{
+ if (uint64* storage = callstacksPool.TryAdd(desc.count + 3))
+ {
+ storage[0] = desc.threadID;
+ storage[1] = desc.timestamp;
+ storage[2] = desc.count;
+
+ for (uint64 i = 0; i < desc.count; ++i)
+ {
+ storage[3 + i] = desc.callstack[desc.count - i - 1];
+ }
+ }
+ else
+ {
+ uint64& item0 = callstacksPool.Add();
+ uint64& item1 = callstacksPool.Add();
+ uint64& item2 = callstacksPool.Add();
+
+ item0 = desc.threadID;
+ item1 = desc.timestamp;
+ item2 = desc.count;
+
+ for (uint64 i = 0; i < desc.count; ++i)
+ {
+ callstacksPool.Add() = desc.callstack[desc.count - i - 1];
+ }
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void CallstackCollector::Clear()
+{
+ callstacksPool.Clear(false);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool CallstackCollector::SerializeModules(OutputDataStream& stream)
+{
+ if (SymbolEngine* symEngine = Core::Get().symbolEngine)
+ {
+ stream << symEngine->GetModules();
+ return true;
+ }
+ else
+ {
+ stream << (int)0;
+ }
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool CallstackCollector::SerializeSymbols(OutputDataStream& stream)
+{
+ typedef unordered_set<uint64> SymbolSet;
+ SymbolSet symbolSet;
+
+ for (CallstacksPool::const_iterator it = callstacksPool.begin(); it != callstacksPool.end();)
+ {
+ CallstacksPool::const_iterator startIt = it;
+ OPTICK_UNUSED(startIt);
+
+ uint64 threadID = *it;
+ OPTICK_UNUSED(threadID);
+ ++it; //Skip ThreadID
+ uint64 timestamp = *it;
+ OPTICK_UNUSED
+ (timestamp);
+ ++it; //Skip Timestamp
+ uint64 count = *it;
+ count = (count & 0xFF);
+ ++it; //Skip Count
+
+ bool isBadAddrFound = false;
+
+ for (uint64 i = 0; i < count; ++i)
+ {
+ uint64 address = *it;
+ ++it;
+
+ if (address == 0)
+ {
+ isBadAddrFound = true;
+ }
+
+ if (!isBadAddrFound)
+ {
+ symbolSet.insert(address);
+ }
+ }
+ }
+
+ SymbolEngine* symEngine = Core::Get().symbolEngine;
+
+ vector<const Symbol*> symbols;
+ symbols.reserve(symbolSet.size());
+
+ size_t callstackIndex = 0;
+
+ Core::Get().DumpProgress("Resolving addresses... ");
+
+ if (symEngine)
+ {
+ for (auto it = symbolSet.begin(); it != symbolSet.end(); ++it)
+ {
+ callstackIndex++;
+
+ uint64 address = *it;
+ if (const Symbol* symbol = symEngine->GetSymbol(address))
+ {
+ symbols.push_back(symbol);
+ }
+ }
+ }
+
+ stream << symbols;
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool CallstackCollector::SerializeCallstacks(OutputDataStream& stream)
+{
+ stream << callstacksPool;
+
+ if (!callstacksPool.IsEmpty())
+ {
+ callstacksPool.Clear(false);
+ return true;
+ }
+
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool CallstackCollector::IsEmpty() const
+{
+ return callstacksPool.IsEmpty();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream & operator<<(OutputDataStream &stream, const SwitchContextDesc &ob)
+{
+ return stream << ob.timestamp << ob.oldThreadId << ob.newThreadId << ob.cpuId << ob.reason;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void SwitchContextCollector::Add(const SwitchContextDesc& desc)
+{
+ switchContextPool.Add() = desc;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void SwitchContextCollector::Clear()
+{
+ switchContextPool.Clear(false);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool SwitchContextCollector::Serialize(OutputDataStream& stream)
+{
+ stream << switchContextPool;
+
+ if (!switchContextPool.IsEmpty())
+ {
+ switchContextPool.Clear(false);
+ return true;
+ }
+
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_MSVC)
+#define CPUID(INFO, ID) __cpuid(INFO, ID)
+#include <intrin.h>
+#elif defined(OPTICK_GCC)
+#include <cpuid.h>
+#define CPUID(INFO, ID) __cpuid(ID, INFO[0], INFO[1], INFO[2], INFO[3])
+#else
+#error Platform is not supported!
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+string GetCPUName()
+{
+ int cpuInfo[4] = { -1 };
+ char cpuBrandString[0x40] = { 0 };
+ CPUID(cpuInfo, 0x80000000);
+ unsigned nExIds = cpuInfo[0];
+ for (unsigned i = 0x80000000; i <= nExIds; ++i)
+ {
+ CPUID(cpuInfo, i);
+ if (i == 0x80000002)
+ memcpy(cpuBrandString, cpuInfo, sizeof(cpuInfo));
+ else if (i == 0x80000003)
+ memcpy(cpuBrandString + 16, cpuInfo, sizeof(cpuInfo));
+ else if (i == 0x80000004)
+ memcpy(cpuBrandString + 32, cpuInfo, sizeof(cpuInfo));
+ }
+ return string(cpuBrandString);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Core& Core::Get()
+{
+ static Core instance;
+ return instance;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::StartCapture()
+{
+ pendingState = State::START_CAPTURE;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::StopCapture()
+{
+ pendingState = State::STOP_CAPTURE;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::CancelCapture()
+{
+ pendingState = State::CANCEL_CAPTURE;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpCapture()
+{
+ pendingState = State::DUMP_CAPTURE;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpProgress(const char* message)
+{
+ progressReportedLastTimestampMS = GetTimeMilliSeconds();
+
+ OutputDataStream stream;
+ stream << message;
+
+ Server::Get().Send(DataResponse::ReportProgress, stream);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpEvents(EventStorage& entry, const EventTime& timeSlice, ScopeData& scope)
+{
+ if (!entry.eventBuffer.IsEmpty())
+ {
+ const EventData* rootEvent = nullptr;
+
+ entry.eventBuffer.ForEach([&](const EventData& data)
+ {
+ if (data.finish >= data.start && data.start >= timeSlice.start && timeSlice.finish >= data.finish)
+ {
+ if (!rootEvent)
+ {
+ rootEvent = &data;
+ scope.InitRootEvent(*rootEvent);
+ }
+ else if (rootEvent->finish < data.finish)
+ {
+ scope.Send();
+
+ rootEvent = &data;
+ scope.InitRootEvent(*rootEvent);
+ }
+ else
+ {
+ scope.AddEvent(data);
+ }
+ }
+ });
+
+ scope.Send();
+
+ entry.eventBuffer.Clear(false);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpTags(EventStorage& entry, ScopeData& scope)
+{
+ if (!entry.tagFloatBuffer.IsEmpty() ||
+ !entry.tagS32Buffer.IsEmpty() ||
+ !entry.tagU32Buffer.IsEmpty() ||
+ !entry.tagU64Buffer.IsEmpty() ||
+ !entry.tagPointBuffer.IsEmpty() ||
+ !entry.tagStringBuffer.IsEmpty())
+ {
+ OutputDataStream tagStream;
+ tagStream << scope.header.boardNumber << scope.header.threadNumber;
+ tagStream
+ << (uint32)0
+ << entry.tagFloatBuffer
+ << entry.tagU32Buffer
+ << entry.tagS32Buffer
+ << entry.tagU64Buffer
+ << entry.tagPointBuffer
+ << (uint32)0
+ << (uint32)0
+ << entry.tagStringBuffer;
+ Server::Get().Send(DataResponse::TagsPack, tagStream);
+
+ entry.ClearTags(false);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpThread(ThreadEntry& entry, const EventTime& timeSlice, ScopeData& scope)
+{
+ // We need to sort events for all the custom thread storages
+ if (entry.description.threadID == INVALID_THREAD_ID)
+ entry.Sort();
+
+ // Events
+ DumpEvents(entry.storage, timeSlice, scope);
+ DumpTags(entry.storage, scope);
+ OPTICK_ASSERT(entry.storage.fiberSyncBuffer.IsEmpty(), "Fiber switch events in native threads?");
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpFiber(FiberEntry& entry, const EventTime& timeSlice, ScopeData& scope)
+{
+ // Events
+ DumpEvents(entry.storage, timeSlice, scope);
+
+ if (!entry.storage.fiberSyncBuffer.IsEmpty())
+ {
+ OutputDataStream fiberSynchronizationStream;
+ fiberSynchronizationStream << scope.header.boardNumber;
+ fiberSynchronizationStream << scope.header.fiberNumber;
+ fiberSynchronizationStream << entry.storage.fiberSyncBuffer;
+ Server::Get().Send(DataResponse::FiberSynchronizationData, fiberSynchronizationStream);
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventTime CalculateRange(const ThreadEntry& entry, const EventDescription* rootDescription)
+{
+ EventTime timeSlice = { INT64_MAX, INT64_MIN };
+ entry.storage.eventBuffer.ForEach([&](const EventData& data)
+ {
+ if (data.description == rootDescription)
+ {
+ timeSlice.start = std::min(timeSlice.start, data.start);
+ timeSlice.finish = std::max(timeSlice.finish, data.finish);
+ }
+ });
+ return timeSlice;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpFrames(uint32 mode)
+{
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+
+ if (frames.empty() || threads.empty())
+ return;
+
+ ++boardNumber;
+
+ DumpProgress("Generating summary...");
+
+ GenerateCommonSummary();
+ DumpSummary();
+
+ DumpProgress("Collecting Frame Events...");
+
+
+ ThreadID mainThreadID = Platform::GetThreadID();
+ uint32 mainThreadIndex = 0;
+ for (size_t i = 0; i < threads.size(); ++i)
+ if (threads[i]->description.threadID == mainThreadID)
+ mainThreadIndex = (uint32)i;
+
+ EventTime timeSlice = CalculateRange(*threads[mainThreadIndex], GetFrameDescription(FrameType::CPU));
+ if (timeSlice.start >= timeSlice.finish)
+ {
+ timeSlice.start = frames.front().start;
+ timeSlice.finish = frames.back().finish;
+ }
+
+ DumpBoard(mode, timeSlice, mainThreadIndex);
+
+ ScopeData threadScope;
+ threadScope.header.boardNumber = boardNumber;
+ threadScope.header.fiberNumber = -1;
+
+ if (gpuProfiler)
+ gpuProfiler->Dump(mode);
+
+ for (size_t i = 0; i < threads.size(); ++i)
+ {
+ threadScope.header.threadNumber = (uint32)i;
+ DumpThread(*threads[i], timeSlice, threadScope);
+ }
+
+ ScopeData fiberScope;
+ fiberScope.header.boardNumber = (uint32)boardNumber;
+ fiberScope.header.threadNumber = -1;
+ for (size_t i = 0; i < fibers.size(); ++i)
+ {
+ fiberScope.header.fiberNumber = (uint32)i;
+ DumpFiber(*fibers[i], timeSlice, fiberScope);
+ }
+
+ frames.clear();
+ CleanupThreadsAndFibers();
+
+ {
+ DumpProgress("Serializing SwitchContexts");
+ OutputDataStream switchContextsStream;
+ switchContextsStream << boardNumber;
+ switchContextCollector.Serialize(switchContextsStream);
+ Server::Get().Send(DataResponse::SynchronizationData, switchContextsStream);
+ }
+
+ {
+ DumpProgress("Serializing SysCalls");
+ OutputDataStream callstacksStream;
+ callstacksStream << boardNumber;
+ syscallCollector.Serialize(callstacksStream);
+ Server::Get().Send(DataResponse::SyscallPack, callstacksStream);
+ }
+
+ if (!callstackCollector.IsEmpty())
+ {
+ DumpProgress("Resolving callstacks");
+ OutputDataStream symbolsStream;
+ symbolsStream << boardNumber;
+ callstackCollector.SerializeModules(symbolsStream);
+ callstackCollector.SerializeSymbols(symbolsStream);
+ Server::Get().Send(DataResponse::CallstackDescriptionBoard, symbolsStream);
+
+ DumpProgress("Serializing callstacks");
+ OutputDataStream callstacksStream;
+ callstacksStream << boardNumber;
+ callstackCollector.SerializeCallstacks(callstacksStream);
+ Server::Get().Send(DataResponse::CallstackPack, callstacksStream);
+ }
+
+ Server::Get().Send(DataResponse::NullFrame, OutputDataStream::Empty);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpSummary()
+{
+ OutputDataStream stream;
+
+ // Board Number
+ stream << boardNumber;
+
+ // Frames
+ double frequency = (double)Platform::GetFrequency();
+ stream << (uint32_t)frames.size();
+ for (const EventTime& frame : frames)
+ {
+ double frameTimeMs = 1000.0 * (frame.finish - frame.start) / frequency;
+ stream << (float)frameTimeMs;
+ }
+
+ // Summary
+ stream << (uint32_t)summary.size();
+ for (size_t i = 0; i < summary.size(); ++i)
+ stream << summary[i].first << summary[i].second;
+ summary.clear();
+
+ // Attachments
+ stream << (uint32_t)attachments.size();
+ for (const Attachment& att : attachments)
+ stream << (uint32_t)att.type << att.name << att.data;
+ attachments.clear();
+
+ // Send
+ Server::Get().Send(DataResponse::SummaryPack, stream);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::CleanupThreadsAndFibers()
+{
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+
+ for (ThreadList::iterator it = threads.begin(); it != threads.end();)
+ {
+ if (!(*it)->isAlive)
+ {
+ Memory::Delete(*it);
+ it = threads.erase(it);
+ }
+ else
+ {
+ ++it;
+ }
+ }
+}
+
+void Core::DumpBoard(uint32 mode, EventTime timeSlice, uint32 mainThreadIndex)
+{
+ OutputDataStream boardStream;
+
+ boardStream << boardNumber;
+ boardStream << Platform::GetFrequency();
+ boardStream << (uint64)0; // Origin
+ boardStream << (uint32)0; // Precision
+ boardStream << timeSlice;
+ boardStream << threads;
+ boardStream << fibers;
+ boardStream << mainThreadIndex;
+ boardStream << EventDescriptionBoard::Get();
+ boardStream << (uint32)0; // Tags
+ boardStream << (uint32)0; // Run
+ boardStream << (uint32)0; // Filters
+ boardStream << (uint32)0; // ThreadDescs
+ boardStream << mode; // Mode
+ boardStream << processDescs;
+ boardStream << threadDescs;
+ boardStream << (uint32)Platform::GetProcessID();
+ boardStream << (uint32)std::thread::hardware_concurrency();
+ Server::Get().Send(DataResponse::FrameDescriptionBoard, boardStream);
+
+ // Cleanup
+ processDescs.clear();
+ threadDescs.clear();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::GenerateCommonSummary()
+{
+ AttachSummary("Platform", Platform::GetName());
+ AttachSummary("CPU", GetCPUName().c_str());
+ if (gpuProfiler)
+ AttachSummary("GPU", gpuProfiler->GetName().c_str());
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Core::Core()
+ : progressReportedLastTimestampMS(0)
+ , boardNumber(0)
+ , frameNumber(0)
+ , stateCallback(nullptr)
+ , currentState(State::DUMP_CAPTURE)
+ , pendingState(State::DUMP_CAPTURE)
+ , currentMode(Mode::OFF)
+ , symbolEngine(nullptr)
+ , tracer(nullptr)
+ , gpuProfiler(nullptr)
+{
+#if OPTICK_ENABLE_TRACING
+ tracer = Platform::GetTrace();
+ symbolEngine = Platform::GetSymbolEngine();
+#endif
+
+ frameDescriptions[FrameType::CPU] = EventDescription::Create("CPU Frame", __FILE__, __LINE__);
+ frameDescriptions[FrameType::GPU] = EventDescription::Create("GPU Frame", __FILE__, __LINE__);
+ frameDescriptions[FrameType::Render] = EventDescription::Create("Render Frame", __FILE__, __LINE__);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::UpdateState()
+{
+ if (currentState != pendingState)
+ {
+ State::Type nextState = pendingState;
+ if (pendingState == State::DUMP_CAPTURE && currentState == State::START_CAPTURE)
+ nextState = State::STOP_CAPTURE;
+
+ if ((stateCallback != nullptr) && !stateCallback(nextState))
+ return false;
+
+ switch (nextState)
+ {
+ case State::START_CAPTURE:
+ Activate((Mode::Type)settings.mode);
+ break;
+
+ case State::STOP_CAPTURE:
+ case State::CANCEL_CAPTURE:
+ Activate(Mode::OFF);
+ break;
+
+ case State::DUMP_CAPTURE:
+ DumpFrames();
+ break;
+ }
+ currentState = nextState;
+ return true;
+ }
+ return false;
+}
+
+
+uint32_t Core::Update()
+{
+ std::lock_guard<std::recursive_mutex> lock(coreLock);
+
+ if (currentMode != Mode::OFF)
+ {
+ if (!frames.empty())
+ frames.back().Stop();
+
+ if (settings.frameLimit > 0 && frames.size() >= settings.frameLimit)
+ DumpCapture();
+
+ if (settings.timeLimitUs > 0)
+ {
+ if (TicksToUs(frames.back().finish - frames.front().start) >= settings.timeLimitUs)
+ DumpCapture();
+ }
+
+ if (settings.spikeLimitUs > 0)
+ {
+ if (TicksToUs(frames.back().finish - frames.back().start) >= settings.spikeLimitUs)
+ DumpCapture();
+ }
+
+ if (IsTimeToReportProgress())
+ DumpCapturingProgress();
+ }
+
+ UpdateEvents();
+
+ while (UpdateState()) {}
+
+ if (currentMode != Mode::OFF)
+ {
+ frames.push_back(EventTime());
+ frames.back().Start();
+ }
+
+ return ++frameNumber;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::UpdateEvents()
+{
+ Server::Get().Update();
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::ReportSwitchContext(const SwitchContextDesc& desc)
+{
+ switchContextCollector.Add(desc);
+ return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::ReportStackWalk(const CallstackDesc& desc)
+{
+ callstackCollector.Add(desc);
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::Activate(Mode::Type mode)
+{
+ if (mode != currentMode)
+ {
+ Mode::Type prevMode = currentMode;
+ currentMode = mode;
+
+ {
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+ for(auto it = threads.begin(); it != threads.end(); ++it)
+ {
+ ThreadEntry* entry = *it;
+ entry->Activate(mode);
+ }
+ }
+
+
+ if (mode != Mode::OFF)
+ {
+ CaptureStatus::Type status = CaptureStatus::ERR_TRACER_FAILED;
+
+ if (tracer && (mode & Mode::TRACER))
+ {
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+ tracer->SetPassword(settings.password.c_str());
+ status = tracer->Start(mode, settings.samplingFrequency, threads);
+ // Let's retry with more narrow setup
+ if (status != CaptureStatus::OK && (mode & Mode::AUTOSAMPLING))
+ status = tracer->Start((Mode::Type)(mode & ~Mode::AUTOSAMPLING), settings.samplingFrequency, threads);
+ }
+
+ if (gpuProfiler && (mode & Mode::GPU))
+ gpuProfiler->Start(mode);
+
+ SendHandshakeResponse(status);
+ }
+ else
+ {
+ if (tracer)
+ tracer->Stop();
+
+ if (gpuProfiler)
+ gpuProfiler->Stop(prevMode);
+ }
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::DumpCapturingProgress()
+{
+ stringstream stream;
+
+ if (currentMode != Mode::OFF)
+ {
+ size_t memUsedKb = Memory::GetAllocatedSize() >> 10;
+ float memUsedMb = memUsedKb / 1024.0f;
+ // VS TODO: Format to 3 digits
+ stream << "Capturing Frame " << (uint32)frames.size() << "..." << std::endl << "Memory Used: " << memUsedMb << " Mb";
+ }
+
+ DumpProgress(stream.str().c_str());
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::IsTimeToReportProgress() const
+{
+ return GetTimeMilliSeconds() > progressReportedLastTimestampMS + 200;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::SendHandshakeResponse(CaptureStatus::Type status)
+{
+ OutputDataStream stream;
+ stream << (uint32)status;
+ stream << Platform::GetName();
+ stream << Server::Get().GetHostName();
+ Server::Get().Send(DataResponse::Handshake, stream);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::IsRegistredThread(ThreadID id)
+{
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+
+ for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it)
+ {
+ ThreadEntry* entry = *it;
+ if (entry->description.threadID == id)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ThreadEntry* Core::RegisterThread(const ThreadDescription& description, EventStorage** slot)
+{
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+
+ ThreadEntry* entry = Memory::New<ThreadEntry>(description, slot);
+ threads.push_back(entry);
+
+ if ((currentMode != Mode::OFF) && slot != nullptr)
+ *slot = &entry->storage;
+
+ return entry;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::UnRegisterThread(ThreadID threadID, bool keepAlive)
+{
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+
+ for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it)
+ {
+ ThreadEntry* entry = *it;
+ if (entry->description.threadID == threadID && entry->isAlive)
+ {
+ if ((currentMode == Mode::OFF) && !keepAlive)
+ {
+ Memory::Delete(entry);
+ threads.erase(it);
+ return true;
+ }
+ else
+ {
+ entry->isAlive = false;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::RegisterFiber(const FiberDescription& description, EventStorage** slot)
+{
+ std::lock_guard<std::recursive_mutex> lock(coreLock);
+ FiberEntry* entry = Memory::New<FiberEntry>(description);
+ fibers.push_back(entry);
+ entry->storage.isFiberStorage = true;
+ *slot = &entry->storage;
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::RegisterProcessDescription(const ProcessDescription& description)
+{
+ processDescs.push_back(description);
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::RegisterThreadDescription(const ThreadDescription& description)
+{
+ threadDescs.push_back(description);
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::SetStateChangedCallback(StateCallback cb)
+{
+ stateCallback = cb;
+ return stateCallback != nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::AttachSummary(const char* key, const char* value)
+{
+ summary.push_back(make_pair(string(key), string(value)));
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size)
+{
+ if (size > 0)
+ {
+ attachments.push_back(Attachment(type, name));
+ Attachment& attachment = attachments.back();
+ attachment.data.resize(size);
+ memcpy(&attachment.data[0], data, size);
+ return true;
+ }
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::AttachFile(File::Type type, const char* name, std::istream& stream)
+{
+ std::streampos beg = stream.tellg();
+ stream.seekg(0, std::ios::end);
+ std::streampos end = stream.tellg();
+ stream.seekg(beg, std::ios::beg);
+
+ size_t size =(size_t)(end - beg);
+ void* buffer = Memory::Alloc(size);
+
+ stream.read((char*)buffer, size);
+ bool result = AttachFile(type, name, (uint8*)buffer, (uint32_t)size);
+
+ Memory::Free(buffer);
+ return result;
+
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::AttachFile(File::Type type, const char* name, const char* path)
+{
+ std::ifstream stream(path, std::ios::binary);
+ return AttachFile(type, name, stream);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::AttachFile(File::Type type, const char* name, const wchar_t* path)
+{
+#if defined(OPTICK_MSVC)
+ std::ifstream stream(path, std::ios::binary);
+ return AttachFile(type, name, stream);
+#else
+ char p[256] = { 0 };
+ wcstombs(p, path, sizeof(p));
+ std::ifstream stream(p, std::ios::binary);
+ return AttachFile(type, name, stream);
+#endif
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Core::InitGPUProfiler(GPUProfiler* profiler)
+{
+ OPTICK_ASSERT(gpuProfiler == nullptr, "Can't reinitialize GPU profiler! Not supported yet!");
+ Memory::Delete<GPUProfiler>(gpuProfiler);
+ gpuProfiler = profiler;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Core::SetSettings(const CaptureSettings& captureSettings)
+{
+ settings = captureSettings;
+
+ //if (tracer)
+ //{
+ // string decoded = base64_decode(encodedPassword);
+ // tracer->SetPassword(decoded.c_str());
+ // return true;
+ //}
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+const EventDescription* Core::GetFrameDescription(FrameType::Type frame) const
+{
+ return frameDescriptions[frame];
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Core::~Core()
+{
+ std::lock_guard<std::recursive_mutex> lock(threadsLock);
+
+ for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it)
+ {
+ Memory::Delete(*it);
+ }
+ threads.clear();
+
+ for (FiberList::iterator it = fibers.begin(); it != fibers.end(); ++it)
+ {
+ Memory::Delete(*it);
+ }
+ fibers.clear();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+const vector<ThreadEntry*>& Core::GetThreads() const
+{
+ return threads;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_THREAD_LOCAL EventStorage* Core::storage = nullptr;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ScopeHeader::ScopeHeader() : boardNumber(0), threadNumber(0)
+{
+
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const ScopeHeader& header)
+{
+ return stream << header.boardNumber << header.threadNumber << header.fiberNumber << header.event;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const ScopeData& ob)
+{
+ return stream << ob.header << ob.categories << ob.events;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const ThreadDescription& description)
+{
+ return stream << description.threadID << description.processID << description.name << description.maxDepth << description.priority << description.mask;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const ThreadEntry* entry)
+{
+ return stream << entry->description;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const FiberDescription& description)
+{
+ return stream << description.id;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const FiberEntry* entry)
+{
+ return stream << entry->description;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const ProcessDescription& description)
+{
+ return stream << description.processID << description.name << description.uniqueKey;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool SetStateChangedCallback(StateCallback cb)
+{
+ return Core::Get().SetStateChangedCallback(cb);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool AttachSummary(const char* key, const char* value)
+{
+ return Core::Get().AttachSummary(key, value);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size)
+{
+ return Core::Get().AttachFile(type, name, data, size);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool AttachFile(File::Type type, const char* name, const char* path)
+{
+ return Core::Get().AttachFile(type, name, path);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool AttachFile(File::Type type, const char* name, const wchar_t* path)
+{
+ return Core::Get().AttachFile(type, name, path);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& stream, const Point& ob)
+{
+ return stream << ob.x << ob.y << ob.z;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API uint32_t NextFrame()
+{
+ return Core::NextFrame();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool IsActive(Mode::Type mode /*= Mode::INSTRUMENTATION_EVENTS*/)
+{
+ return (Core::Get().currentMode & mode) != 0;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API EventStorage** GetEventStorageSlotForCurrentThread()
+{
+ return &Core::Get().storage;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool IsFiberStorage(EventStorage* fiberStorage)
+{
+ return fiberStorage->isFiberStorage;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool RegisterThread(const char* name)
+{
+ return Core::Get().RegisterThread(ThreadDescription(name, Platform::GetThreadID(), Platform::GetProcessID()), &Core::storage) != nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool RegisterThread(const wchar_t* name)
+{
+ const int THREAD_NAME_LENGTH = 128;
+ char mbName[THREAD_NAME_LENGTH];
+ wcstombs_s(mbName, name, THREAD_NAME_LENGTH);
+
+ return Core::Get().RegisterThread(ThreadDescription(mbName, Platform::GetThreadID(), Platform::GetProcessID()), &Core::storage) != nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool UnRegisterThread(bool keepAlive)
+{
+ return Core::Get().UnRegisterThread(Platform::GetThreadID(), keepAlive);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API bool RegisterFiber(uint64 fiberId, EventStorage** slot)
+{
+ return Core::Get().RegisterFiber(FiberDescription(fiberId), slot);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, ThreadMask::Type type)
+{
+ ThreadEntry* entry = Core::Get().RegisterThread(ThreadDescription(name, threadID, Platform::GetProcessID(), 1, 0, type), nullptr);
+ return entry ? &entry->storage : nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API void GpuFlip(void* swapChain)
+{
+ if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler)
+ gpuProfiler->Flip(swapChain);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API GPUContext SetGpuContext(GPUContext context)
+{
+ if (EventStorage* storage = Core::storage)
+ {
+ GPUContext prevContext = storage->gpuStorage.context;
+ storage->gpuStorage.context = context;
+ return prevContext;
+ }
+ return GPUContext();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OPTICK_API const EventDescription* GetFrameDescription(FrameType::Type frame)
+{
+ return Core::Get().GetFrameDescription(frame);
+
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventStorage::EventStorage(): currentMode(Mode::OFF), pushPopEventStackIndex(0), isFiberStorage(false)
+{
+
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void ThreadEntry::Activate(Mode::Type mode)
+{
+ if (!isAlive)
+ return;
+
+ if (mode != Mode::OFF)
+ storage.Clear(true);
+
+ if (threadTLS != nullptr)
+ {
+ storage.currentMode = mode;
+ *threadTLS = mode != Mode::OFF ? &storage : nullptr;
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void ThreadEntry::Sort()
+{
+ SortMemoryPool(storage.eventBuffer);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool IsSleepOnlyScope(const ScopeData& scope)
+{
+ //if (!scope.categories.empty())
+ // return false;
+
+ const vector<EventData>& events = scope.events;
+ for(auto it = events.begin(); it != events.end(); ++it)
+ {
+ const EventData& data = *it;
+
+ if (data.description->color != Color::White)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void ScopeData::Send()
+{
+ if (!events.empty() || !categories.empty())
+ {
+ if (!IsSleepOnlyScope(*this))
+ {
+ OutputDataStream frameStream;
+ frameStream << *this;
+ Server::Get().Send(DataResponse::EventFrame, frameStream);
+ }
+ }
+
+ Clear();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void ScopeData::Clear()
+{
+ events.clear();
+ categories.clear();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void EventStorage::GPUStorage::Clear(bool preserveMemory)
+{
+ for (size_t i = 0; i < gpuBuffer.size(); ++i)
+ for (int j = 0; j < GPU_QUEUE_COUNT; ++j)
+ gpuBuffer[i][j].Clear(preserveMemory);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+EventData* EventStorage::GPUStorage::Start(const EventDescription &desc)
+{
+ if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler)
+ {
+ EventData& result = gpuBuffer[context.node][context.queue].Add();
+ result.description = &desc;
+ result.start = EventTime::INVALID_TIMESTAMP;
+ result.finish = EventTime::INVALID_TIMESTAMP;
+ gpuProfiler->QueryTimestamp(context.cmdBuffer, &result.start);
+ return &result;
+ }
+ return nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void EventStorage::GPUStorage::Stop(EventData& data)
+{
+ if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler)
+ {
+ gpuProfiler->QueryTimestamp(context.cmdBuffer, &data.finish);
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_core.h b/external/optick/optick_core.h
new file mode 100644
index 0000000..9ddf46b
--- /dev/null
+++ b/external/optick/optick_core.h
@@ -0,0 +1,568 @@
+#pragma once
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include <mutex>
+#include <thread>
+
+#include "optick_common.h"
+
+#include "optick_memory.h"
+#include "optick_message.h"
+#include "optick_serialization.h"
+
+#include "optick_gpu.h"
+
+#include <atomic>
+
+// We expect to have 1k unique strings going through Optick at once
+// The chances to hit a collision are 1 in 10 trillion (odds of a meteor landing on your house)
+// We should be quite safe here :)
+// https://preshing.com/20110504/hash-collision-probabilities/
+// Feel free to add a seed and wait for another strike if armageddon starts
+namespace Optick
+{
+ struct StringHash
+ {
+ uint64 hash;
+
+ StringHash(size_t h) : hash(h) {}
+ StringHash(const char* str) : hash(CalcHash(str)) {}
+
+ bool operator==(const StringHash& other) const { return hash == other.hash; }
+ bool operator<(const StringHash& other) const { return hash < other.hash; }
+
+ static uint64 CalcHash(const char* str);
+ };
+}
+
+// Overriding default hash function to return hash value directly
+namespace std
+{
+ template<>
+ struct hash<Optick::StringHash>
+ {
+ size_t operator()(const Optick::StringHash& x) const
+ {
+ return (size_t)x.hash;
+ }
+ };
+}
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct Trace;
+struct SymbolEngine;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ScopeHeader
+{
+ EventTime event;
+ uint32 boardNumber;
+ int32 threadNumber;
+ int32 fiberNumber;
+
+ ScopeHeader();
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator << ( OutputDataStream& stream, const ScopeHeader& ob);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ScopeData
+{
+ ScopeHeader header;
+ vector<EventData> categories;
+ vector<EventData> events;
+
+ void AddEvent(const EventData& data)
+ {
+ events.push_back(data);
+ if (data.description->color != Color::Null)
+ {
+ categories.push_back(data);
+ }
+ }
+
+ void InitRootEvent(const EventData& data)
+ {
+ header.event = data;
+ AddEvent(data);
+ }
+
+ void Send();
+ void Clear();
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(OPTICK_MSVC)
+#pragma warning( push )
+#pragma warning( disable : 4996 )
+#endif //OPTICK_MSVC
+template<int N>
+struct OptickString
+{
+ char data[N];
+ OptickString() {}
+ OptickString<N>& operator=(const char* text) { strncpy(data, text ? text : "null", N - 1); data[N - 1] = 0; return *this; }
+ OptickString(const char* text) { *this = text; }
+};
+#if defined(OPTICK_MSVC)
+#pragma warning( pop )
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct Point
+{
+ float x, y, z;
+ Point() {}
+ Point(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {}
+ Point(float pos[3]) : x(pos[0]), y(pos[1]), z(pos[2]) {}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<int N>
+OutputDataStream& operator<<(OutputDataStream &stream, const OptickString<N>& ob)
+{
+ size_t length = strnlen(ob.data, N);
+ stream << (uint32)length;
+ return stream.Write(ob.data, length);
+}
+OutputDataStream& operator<<(OutputDataStream& stream, const Point& ob);
+OutputDataStream& operator<<(OutputDataStream& stream, const ScopeData& ob);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef MemoryPool<EventData, 1024> EventBuffer;
+typedef MemoryPool<const EventData*, 32> CategoryBuffer;
+typedef MemoryPool<SyncData, 1024> SynchronizationBuffer;
+typedef MemoryPool<FiberSyncData, 1024> FiberSyncBuffer;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef OptickString<32> ShortString;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef TagData<float> TagFloat;
+typedef TagData<int32> TagS32;
+typedef TagData<uint32> TagU32;
+typedef TagData<uint64> TagU64;
+typedef TagData<Point> TagPoint;
+typedef TagData<ShortString> TagString;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef MemoryPool<TagFloat, 1024> TagFloatBuffer;
+typedef MemoryPool<TagS32, 1024> TagS32Buffer;
+typedef MemoryPool<TagU32, 1024> TagU32Buffer;
+typedef MemoryPool<TagU64, 1024> TagU64Buffer;
+typedef MemoryPool<TagPoint, 64> TagPointBuffer;
+typedef MemoryPool<TagString, 1024> TagStringBuffer;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Base64
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+string base64_decode(string const& encoded_string);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Board
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef MemoryPool<EventDescription, 4096> EventDescriptionList;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class EventDescriptionBoard
+{
+ // List of stored Event Descriptions
+ EventDescriptionList boardDescriptions;
+
+ // Shared Descriptions
+ typedef unordered_map<StringHash, EventDescription*> DescriptionMap;
+ DescriptionMap sharedDescriptions;
+ MemoryBuffer<64 * 1024> sharedNames;
+ std::mutex sharedLock;
+
+public:
+ EventDescription* CreateDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0);
+ EventDescription* CreateSharedDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0);
+
+ static EventDescriptionBoard& Get();
+
+ const EventDescriptionList& GetEvents() const;
+
+ friend OutputDataStream& operator << (OutputDataStream& stream, const EventDescriptionBoard& ob);
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct EventStorage
+{
+ Mode::Type currentMode;
+ EventBuffer eventBuffer;
+ FiberSyncBuffer fiberSyncBuffer;
+
+ TagFloatBuffer tagFloatBuffer;
+ TagS32Buffer tagS32Buffer;
+ TagU32Buffer tagU32Buffer;
+ TagU64Buffer tagU64Buffer;
+ TagPointBuffer tagPointBuffer;
+ TagStringBuffer tagStringBuffer;
+
+ struct GPUStorage
+ {
+ static const int MAX_GPU_NODES = 2;
+ array<array<EventBuffer, GPU_QUEUE_COUNT>, MAX_GPU_NODES> gpuBuffer;
+ GPUContext context;
+
+ void Clear(bool preserveMemory);
+
+ EventData* Start(const EventDescription& desc);
+ void Stop(EventData& data);
+ };
+ GPUStorage gpuStorage;
+
+ uint32 pushPopEventStackIndex;
+ array<EventData*, 32> pushPopEventStack;
+
+ bool isFiberStorage;
+
+ EventStorage();
+
+ OPTICK_INLINE EventData& NextEvent()
+ {
+ return eventBuffer.Add();
+ }
+
+ // Free all temporary memory
+ void Clear(bool preserveContent)
+ {
+ currentMode = Mode::OFF;
+ eventBuffer.Clear(preserveContent);
+ fiberSyncBuffer.Clear(preserveContent);
+ gpuStorage.Clear(preserveContent);
+ ClearTags(preserveContent);
+
+ while (pushPopEventStackIndex)
+ {
+ pushPopEventStack[--pushPopEventStackIndex] = nullptr;
+ }
+ }
+
+ void ClearTags(bool preserveContent)
+ {
+ tagFloatBuffer.Clear(preserveContent);
+ tagS32Buffer.Clear(preserveContent);
+ tagU32Buffer.Clear(preserveContent);
+ tagU64Buffer.Clear(preserveContent);
+ tagPointBuffer.Clear(preserveContent);
+ tagStringBuffer.Clear(preserveContent);
+ }
+
+ void Reset()
+ {
+ Clear(true);
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ProcessDescription
+{
+ string name;
+ ProcessID processID;
+ uint64 uniqueKey;
+ ProcessDescription(const char* processName, ProcessID pid, uint64 key);
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ThreadDescription
+{
+ string name;
+ ThreadID threadID;
+ ProcessID processID;
+ int32 maxDepth;
+ int32 priority;
+ uint32 mask;
+
+ ThreadDescription(const char* threadName, ThreadID tid, ProcessID pid, int32 maxDepth = 1, int32 priority = 0, uint32 mask = 0);
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct FiberDescription
+{
+ uint64 id;
+
+ FiberDescription(uint64 _id)
+ : id(_id)
+ {}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ThreadEntry
+{
+ ThreadDescription description;
+ EventStorage storage;
+ EventStorage** threadTLS;
+
+ bool isAlive;
+
+ ThreadEntry(const ThreadDescription& desc, EventStorage** tls) : description(desc), threadTLS(tls), isAlive(true) {}
+ void Activate(Mode::Type mode);
+ void Sort();
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct FiberEntry
+{
+ FiberDescription description;
+ EventStorage storage;
+
+ FiberEntry(const FiberDescription& desc) : description(desc) {}
+};
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef vector<ThreadEntry*> ThreadList;
+typedef vector<FiberEntry*> FiberList;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct SysCallData : EventData
+{
+ uint64 id;
+ uint64 threadID;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream &operator << (OutputDataStream &stream, const SysCallData &ob);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class SysCallCollector
+{
+ typedef MemoryPool<SysCallData, 1024 * 32> SysCallPool;
+public:
+ SysCallPool syscallPool;
+
+ SysCallData& Add();
+ void Clear();
+
+ bool Serialize(OutputDataStream& stream);
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct CallstackDesc
+{
+ uint64 threadID;
+ uint64 timestamp;
+ uint64* callstack;
+ uint8 count;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class CallstackCollector
+{
+ // Packed callstack list: {ThreadID, Timestamp, Count, Callstack[Count]}
+ typedef MemoryPool<uint64, 1024 * 32> CallstacksPool;
+ CallstacksPool callstacksPool;
+public:
+ void Add(const CallstackDesc& desc);
+ void Clear();
+
+ bool SerializeModules(OutputDataStream& stream);
+ bool SerializeSymbols(OutputDataStream& stream);
+ bool SerializeCallstacks(OutputDataStream& stream);
+
+ bool IsEmpty() const;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct SwitchContextDesc
+{
+ int64_t timestamp;
+ uint64 oldThreadId;
+ uint64 newThreadId;
+ uint8 cpuId;
+ uint8 reason;
+};
+//////////////////////////////////////////////////////////////////////////
+OutputDataStream &operator << (OutputDataStream &stream, const SwitchContextDesc &ob);
+//////////////////////////////////////////////////////////////////////////
+class SwitchContextCollector
+{
+ typedef MemoryPool<SwitchContextDesc, 1024 * 32> SwitchContextPool;
+ SwitchContextPool switchContextPool;
+public:
+ void Add(const SwitchContextDesc& desc);
+ void Clear();
+ bool Serialize(OutputDataStream& stream);
+};
+//////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct CaptureStatus
+{
+ enum Type
+ {
+ OK = 0,
+ ERR_TRACER_ALREADY_EXISTS = 1,
+ ERR_TRACER_ACCESS_DENIED = 2,
+ ERR_TRACER_FAILED = 3,
+ ERR_TRACER_INVALID_PASSWORD = 4,
+ };
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+class Core
+{
+ std::recursive_mutex coreLock;
+ std::recursive_mutex threadsLock;
+
+ ThreadList threads;
+ FiberList fibers;
+
+ int64 progressReportedLastTimestampMS;
+
+ vector<EventTime> frames;
+ uint32 boardNumber;
+
+ CallstackCollector callstackCollector;
+ SwitchContextCollector switchContextCollector;
+
+ vector<std::pair<string, string>> summary;
+
+ std::atomic<uint32_t> frameNumber;
+
+ struct Attachment
+ {
+ string name;
+ vector<uint8_t> data;
+ File::Type type;
+ Attachment(File::Type t, const char* n) : name(n), type(t) {}
+ };
+ list<Attachment> attachments;
+
+ StateCallback stateCallback;
+
+ vector<ProcessDescription> processDescs;
+ vector<ThreadDescription> threadDescs;
+
+ array<const EventDescription*, FrameType::COUNT> frameDescriptions;
+
+ State::Type currentState;
+ State::Type pendingState;
+
+ CaptureSettings settings;
+
+ void UpdateEvents();
+ uint32_t Update();
+ bool UpdateState();
+
+ Core();
+ ~Core();
+
+ static Core notThreadSafeInstance;
+
+ void DumpCapturingProgress();
+ void SendHandshakeResponse(CaptureStatus::Type status);
+
+
+ void DumpEvents(EventStorage& entry, const EventTime& timeSlice, ScopeData& scope);
+ void DumpTags(EventStorage& entry, ScopeData& scope);
+ void DumpThread(ThreadEntry& entry, const EventTime& timeSlice, ScopeData& scope);
+ void DumpFiber(FiberEntry& entry, const EventTime& timeSlice, ScopeData& scope);
+
+ void CleanupThreadsAndFibers();
+
+ void DumpBoard(uint32 mode, EventTime timeSlice, uint32 mainThreadIndex);
+
+ void GenerateCommonSummary();
+public:
+ void Activate(Mode::Type mode);
+ volatile Mode::Type currentMode;
+
+ // Active Frame (is used as buffer)
+ static OPTICK_THREAD_LOCAL EventStorage* storage;
+
+ // Resolves symbols
+ SymbolEngine* symbolEngine;
+
+ // Controls GPU activity
+ // Graphics graphics;
+
+ // System scheduler trace
+ Trace* tracer;
+
+ // SysCall Collector
+ SysCallCollector syscallCollector;
+
+ // GPU Profiler
+ GPUProfiler* gpuProfiler;
+
+ // Returns thread collection
+ const vector<ThreadEntry*>& GetThreads() const;
+
+ // Request to start a new capture
+ void StartCapture();
+
+ // Request to stop an active capture
+ void StopCapture();
+
+ // Request to stop an active capture
+ void CancelCapture();
+
+ // Requests to dump current capture
+ void DumpCapture();
+
+ // Report switch context event
+ bool ReportSwitchContext(const SwitchContextDesc& desc);
+
+ // Report switch context event
+ bool ReportStackWalk(const CallstackDesc& desc);
+
+ // Serialize and send current profiling progress
+ void DumpProgress(const char* message = "");
+
+ // Too much time from last report
+ bool IsTimeToReportProgress() const;
+
+ // Serialize and send frames
+ void DumpFrames(uint32 mode = Mode::DEFAULT);
+
+ // Serialize and send frames
+ void DumpSummary();
+
+ // Registers thread and create EventStorage
+ ThreadEntry* RegisterThread(const ThreadDescription& description, EventStorage** slot);
+
+ // UnRegisters thread
+ bool UnRegisterThread(ThreadID threadId, bool keepAlive = false);
+
+ // Check is registered thread
+ bool IsRegistredThread(ThreadID id);
+
+ // Registers finer and create EventStorage
+ bool RegisterFiber(const FiberDescription& description, EventStorage** slot);
+
+ // Registers ProcessDescription
+ bool RegisterProcessDescription(const ProcessDescription& description);
+
+ // Registers ThreaDescription (used for threads from other processes)
+ bool RegisterThreadDescription(const ThreadDescription& description);
+
+ // Sets state change callback
+ bool SetStateChangedCallback(StateCallback cb);
+
+ // Attaches a key-value pair to the next capture
+ bool AttachSummary(const char* key, const char* value);
+
+ // Attaches a screenshot to the current capture
+ bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size);
+ bool AttachFile(File::Type type, const char* name, std::istream& stream);
+ bool AttachFile(File::Type type, const char* name, const char* path);
+ bool AttachFile(File::Type type, const char* name, const wchar_t* path);
+
+ // Initalizes GPU profiler
+ void InitGPUProfiler(GPUProfiler* profiler);
+
+ // Initializes root password for the device
+ bool SetSettings(const CaptureSettings& settings);
+
+ // Current Frame Number (since the game started)
+ uint32_t GetCurrentFrame() const { return frameNumber; }
+
+ // Returns Frame Description
+ const EventDescription* GetFrameDescription(FrameType::Type frame) const;
+
+ // NOT Thread Safe singleton (performance)
+ static Core& Get();
+
+ // Main Update Function
+ static uint32_t NextFrame() { return Get().Update(); }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_core.linux.h b/external/optick/optick_core.linux.h
new file mode 100644
index 0000000..e0f4b49
--- /dev/null
+++ b/external/optick/optick_core.linux.h
@@ -0,0 +1,410 @@
+#pragma once
+#if defined(__linux__)
+
+#include "optick.config.h"
+#if USE_OPTICK
+
+#include "optick_core.platform.h"
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <unistd.h>
+
+namespace Optick
+{
+ const char* Platform::GetName()
+ {
+ return "Linux";
+ }
+
+ ThreadID Platform::GetThreadID()
+ {
+ return syscall(SYS_gettid);
+ }
+
+ ProcessID Platform::GetProcessID()
+ {
+ return (ProcessID)getpid();
+ }
+
+ int64 Platform::GetFrequency()
+ {
+ return 1000000000;
+ }
+
+ int64 Platform::GetTime()
+ {
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+ }
+}
+
+#if OPTICK_ENABLE_TRACING
+
+#include "optick_memory.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+namespace ft
+{
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct base_event
+ {
+ int64_t timestamp;
+ short common_type;
+ uint8_t cpu_id;
+ base_event(short type) : timestamp(-1), common_type(type), cpu_id(uint8_t(-1)) {}
+};
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ template<short TYPE>
+ struct event : public base_event
+ {
+ static const short type = TYPE;
+ event() : base_event(TYPE) {}
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct process_state
+ {
+ enum type
+ {
+ Unknown,
+ //D Uninterruptible sleep(usually IO)
+ UninterruptibleSleep,
+ //R Running or runnable(on run queue)
+ Running,
+ //S Interruptible sleep(waiting for an event to complete)
+ InterruptibleSleep,
+ //T Stopped, either by a job control signal or because it is being traced.
+ Stopped,
+ //X dead(should never be seen)
+ Dead,
+ //Z Defunct(“zombie”) process, terminated but not reaped by its parent.
+ Zombie,
+ };
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct sched_switch : public event<305>
+ {
+ char prev_comm[16];
+ pid_t prev_pid;
+ int prev_prio;
+ process_state::type prev_state;
+ char next_comm[16];
+ pid_t next_pid;
+ int next_prio;
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+} // namespace ft
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+static const char* KERNEL_TRACING_PATH = "/sys/kernel/debug/tracing";
+static const char* FTRACE_TRACE = "trace";
+static const char* FTRACE_TRACING_ON = "tracing_on";
+static const char* FTRACE_TRACE_CLOCK = "trace_clock";
+static const char* FTRACE_OPTIONS_IRQ_INFO = "options/irq-info";
+static const char* FTRACE_SCHED_SWITCH = "events/sched/sched_switch/enable";
+static const uint8_t PROCESS_STATE_REASON_START = 38;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class FTrace : public Trace
+{
+ bool isActive;
+ string password;
+
+ bool Parse(const char* line);
+ bool ProcessEvent(const ft::base_event& ev);
+
+ void Set(const char* name, bool value);
+ void Set(const char* name, const char* value);
+ void Exec(const char* cmd);
+public:
+
+ FTrace();
+ ~FTrace();
+
+ virtual void SetPassword(const char* pwd) override { password = pwd; }
+ virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override;
+ virtual bool Stop() override;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+FTrace g_FTrace;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct Parser
+{
+ const char* cursor;
+ const char* finish;
+ size_t length;
+
+ Parser(const char* b) : cursor(b), finish(b + strlen(b)) {}
+
+ bool Skip(size_t count)
+ {
+ if ((size_t)(finish - cursor) > count)
+ {
+ cursor += count;
+ return true;
+ }
+ return false;
+ }
+
+ bool Skip(const char* text, char* output = nullptr, size_t size = 0)
+ {
+ if (const char* ptr = strstr(cursor, text))
+ {
+ if (output != nullptr)
+ {
+ size_t count = std::min(size - 1, (size_t)(ptr - cursor));
+ strncpy(output, cursor, count);
+ output[count] = '\0';
+ }
+ cursor = ptr + strlen(text);
+ return true;
+ }
+ return false;
+ }
+
+ void SkipSpaces()
+ {
+ while (cursor != finish && (*cursor == ' ' || *cursor == '\t' || *cursor == '\n'))
+ ++cursor;
+ }
+
+ bool Starts(const char* text) const
+ {
+ return strncmp(cursor, text, strlen(text)) == 0;
+ }
+
+ int GetInt() const
+ {
+ return atoi(cursor);
+ }
+
+ char GetChar() const
+ {
+ return *cursor;
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+CaptureStatus::Type FTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/)
+{
+ if (!isActive)
+ {
+ // Disable tracing
+ Set(FTRACE_TRACING_ON, false);
+ // Cleanup old data
+ Set(FTRACE_TRACE, "");
+ // Set clock type
+ Set(FTRACE_TRACE_CLOCK, "mono");
+ // Disable irq info
+ Set(FTRACE_OPTIONS_IRQ_INFO, false);
+ // Enable switch events
+ Set(FTRACE_SCHED_SWITCH, (mode & Mode::SWITCH_CONTEXT) != 0);
+
+ // Enable tracing
+ Set(FTRACE_TRACING_ON, true);
+
+ isActive = true;
+ }
+
+ return CaptureStatus::OK;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool FTrace::Stop()
+{
+ if (!isActive)
+ {
+ return false;
+ }
+
+ // Reset variables
+ Set(FTRACE_TRACING_ON, false);
+ Set(FTRACE_SCHED_SWITCH, false);
+
+ // Parsing the output
+ char buffer[256] = { 0 };
+ sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'cat %s/%s\'", password.c_str(), KERNEL_TRACING_PATH, FTRACE_TRACE);
+ if (FILE* pipe = popen(buffer, "r"))
+ {
+ char* line = NULL;
+ size_t len = 0;
+ while ((getline(&line, &len, pipe)) != -1)
+ Parse(line);
+ fclose(pipe);
+ }
+
+ // Cleanup data
+ Set(FTRACE_TRACE, "");
+
+ isActive = false;
+
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool FTrace::Parse(const char * line)
+{
+ // sched_switch:
+ // ConsoleApp-8687 [000] 181944.352057: sched_switch: prev_comm=ConsoleApp prev_pid=8687 prev_prio=120 prev_state=S ==> next_comm=ConsoleApp next_pid=8686 next_prio=120
+
+ Parser p(line);
+ if (p.Starts("#"))
+ return true;
+
+ if (!p.Skip(16))
+ return false;
+
+ if (!p.Skip("["))
+ return false;
+
+ int cpu = p.GetInt();
+ if (!p.Skip("]"))
+ return false;
+
+ int64 timestampInt = p.GetInt();
+ if (!p.Skip("."))
+ return false;
+
+ int64 timestampFraq = p.GetInt();
+ if (!p.Skip(": "))
+ return false;
+
+ int64 timestamp = ((timestampInt * 1000000) + timestampFraq) * 1000;
+
+ if (p.Starts("sched_switch:"))
+ {
+ ft::sched_switch ev;
+ ev.cpu_id = cpu;
+ ev.timestamp = timestamp;
+
+ if (!p.Skip("prev_comm="))
+ return false;
+
+ if (!p.Skip(" prev_pid=", ev.prev_comm, OPTICK_ARRAY_SIZE(ev.prev_comm)))
+ return false;
+
+ ev.prev_pid = p.GetInt();
+
+ if (!p.Skip(" prev_prio="))
+ return false;
+
+ ev.prev_prio = p.GetInt();
+
+ if (!p.Skip(" prev_state="))
+ return false;
+
+ switch (p.GetChar())
+ {
+ case 'D':
+ ev.prev_state = ft::process_state::UninterruptibleSleep;
+ break;
+
+ case 'R':
+ ev.prev_state = ft::process_state::Running;
+ break;
+
+ case 'S':
+ ev.prev_state = ft::process_state::InterruptibleSleep;
+ break;
+
+ case 'T':
+ ev.prev_state = ft::process_state::Stopped;
+ break;
+
+ case 'X':
+ ev.prev_state = ft::process_state::Dead;
+ break;
+
+ case 'Z':
+ ev.prev_state = ft::process_state::Zombie;
+ break;
+
+ default:
+ ev.prev_state = ft::process_state::Unknown;
+ break;
+ }
+
+ if (!p.Skip("==> next_comm="))
+ return false;
+
+ if (!p.Skip(" next_pid=", ev.next_comm, OPTICK_ARRAY_SIZE(ev.prev_comm)))
+ return false;
+
+ ev.next_pid = p.GetInt();
+
+ if (!p.Skip(" next_prio="))
+ return false;
+
+ ev.next_prio = p.GetInt();
+
+ return ProcessEvent(ev);
+ }
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool FTrace::ProcessEvent(const ft::base_event& ev)
+{
+ switch (ev.common_type)
+ {
+ case ft::sched_switch::type:
+ {
+ const ft::sched_switch& switchEv = (const ft::sched_switch&)ev;
+ SwitchContextDesc desc;
+ desc.reason = switchEv.prev_state + PROCESS_STATE_REASON_START;
+ desc.cpuId = switchEv.cpu_id;
+ desc.oldThreadId = (uint64)switchEv.prev_pid;
+ desc.newThreadId = (uint64)switchEv.next_pid;
+ desc.timestamp = switchEv.timestamp;
+ Core::Get().ReportSwitchContext(desc);
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void FTrace::Set(const char * name, bool value)
+{
+ Set(name, value ? "1" : "0");
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void FTrace::Set(const char* name, const char* value)
+{
+ char buffer[256] = { 0 };
+ sprintf_s(buffer, "echo %s > %s/%s", value, KERNEL_TRACING_PATH, name);
+ Exec(buffer);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void FTrace::Exec(const char* cmd)
+{
+ char buffer[256] = { 0 };
+ sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\'", password.c_str(), cmd);
+ std::system(buffer);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+FTrace::FTrace() : isActive(false)
+{
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+FTrace::~FTrace()
+{
+ Stop();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Trace* Platform::GetTrace()
+{
+ return &g_FTrace;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+SymbolEngine* Platform::GetSymbolEngine()
+{
+ return nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+#endif //OPTICK_ENABLE_TRACING
+#endif //USE_OPTICK
+#endif //__linux__ \ No newline at end of file
diff --git a/external/optick/optick_core.macos.h b/external/optick/optick_core.macos.h
new file mode 100644
index 0000000..3d19bfd
--- /dev/null
+++ b/external/optick/optick_core.macos.h
@@ -0,0 +1,289 @@
+#pragma once
+#if defined(__APPLE_CC__)
+
+#include "optick.config.h"
+#if USE_OPTICK
+
+#include "optick_core.platform.h"
+
+#include <mach/mach_time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <unistd.h>
+
+namespace Optick
+{
+ const char* Platform::GetName()
+ {
+ return "MacOS";
+ }
+
+ ThreadID Platform::GetThreadID()
+ {
+ uint64_t tid;
+ pthread_threadid_np(pthread_self(), &tid);
+ return tid;
+ }
+
+ ProcessID Platform::GetProcessID()
+ {
+ return (ProcessID)getpid();
+ }
+
+ int64 Platform::GetFrequency()
+ {
+ return 1000000000;
+ }
+
+ int64 Platform::GetTime()
+ {
+ struct timespec ts;
+ clock_gettime(CLOCK_REALTIME, &ts);
+ return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+ }
+}
+
+#if OPTICK_ENABLE_TRACING
+
+#include "optick_core.h"
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class DTrace : public Trace
+{
+ static const bool isSilent = true;
+
+ std::thread processThread;
+ string password;
+
+ enum State
+ {
+ STATE_IDLE,
+ STATE_RUNNING,
+ STATE_ABORT,
+ };
+
+ volatile State state;
+ volatile int64 timeout;
+
+ struct CoreState
+ {
+ ProcessID pid;
+ ThreadID tid;
+ int prio;
+ bool IsValid() const { return tid != INVALID_THREAD_ID; }
+ CoreState() : pid(INVALID_PROCESS_ID), tid(INVALID_THREAD_ID), prio(0) {}
+ };
+ static const int MAX_CPU_CORES = 256;
+ array<CoreState, MAX_CPU_CORES> cores;
+
+ static void AsyncProcess(DTrace* trace);
+ void Process();
+
+ bool CheckRootAccess();
+
+ enum ParseResult
+ {
+ PARSE_OK,
+ PARSE_TIMEOUT,
+ PARSE_FAILED,
+ };
+ ParseResult Parse(const char* line);
+public:
+
+ DTrace();
+
+ virtual void SetPassword(const char* pwd) override { password = pwd; }
+ virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override;
+ virtual bool Stop() override;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+DTrace g_DTrace;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+DTrace::DTrace() : state(STATE_IDLE), timeout(0)
+{
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool DTrace::CheckRootAccess()
+{
+ char cmd[256] = { 0 };
+ sprintf_s(cmd, "echo \'%s\' | sudo -S echo %s", password.c_str(), isSilent ? "2> /dev/null" : "");
+ return system(cmd) == 0;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+CaptureStatus::Type DTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/)
+{
+ if (state == STATE_IDLE && (mode & Mode::SWITCH_CONTEXT) != 0)
+ {
+ if (!CheckRootAccess())
+ return CaptureStatus::ERR_TRACER_INVALID_PASSWORD;
+
+ state = STATE_RUNNING;
+ timeout = INT64_MAX;
+ cores.fill(CoreState());
+ processThread = std::thread(AsyncProcess, this);
+ }
+
+ return CaptureStatus::OK;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool DTrace::Stop()
+{
+ if (state != STATE_RUNNING)
+ {
+ return false;
+ }
+
+ timeout = Platform::GetTime();
+ processThread.join();
+ state = STATE_IDLE;
+
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+FILE* popen2(const char *program, const char *type, pid_t* outPid)
+{
+ FILE *iop;
+ int pdes[2];
+ pid_t pid;
+ if ((*type != 'r' && *type != 'w') || type[1] != '\0') {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if (pipe(pdes) < 0) {
+ return (NULL);
+ }
+
+ switch (pid = fork()) {
+ case -1: /* Error. */
+ (void)close(pdes[0]);
+ (void)close(pdes[1]);
+ return (NULL);
+ /* NOTREACHED */
+ case 0: /* Child. */
+ {
+ if (*type == 'r') {
+ (void)close(pdes[0]);
+ if (pdes[1] != STDOUT_FILENO) {
+ (void)dup2(pdes[1], STDOUT_FILENO);
+ (void)close(pdes[1]);
+ }
+ }
+ else {
+ (void)close(pdes[1]);
+ if (pdes[0] != STDIN_FILENO) {
+ (void)dup2(pdes[0], STDIN_FILENO);
+ (void)close(pdes[0]);
+ }
+ }
+ execl("/bin/sh", "sh", "-c", program, NULL);
+ perror("execl");
+ exit(1);
+ /* NOTREACHED */
+ }
+ }
+ /* Parent; assume fdopen can't fail. */
+ if (*type == 'r') {
+ iop = fdopen(pdes[0], type);
+ (void)close(pdes[1]);
+ }
+ else {
+ iop = fdopen(pdes[1], type);
+ (void)close(pdes[0]);
+ }
+
+ if (outPid)
+ *outPid = pid;
+
+ return (iop);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void DTrace::Process()
+{
+ const char* command = "dtrace -n fbt::thread_dispatch:return'\\''{printf(\"@%d %d %d %d\", pid, tid, curthread->sched_pri, walltimestamp)}'\\''";
+
+ char buffer[256] = { 0 };
+ sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\' %s", password.c_str(), command, isSilent ? "2> /dev/null" : "");
+ pid_t pid;
+ if (FILE* pipe = popen2(buffer, "r", &pid))
+ {
+ char* line = NULL;
+ size_t len = 0;
+ while (state == STATE_RUNNING && (getline(&line, &len, pipe)) != -1)
+ {
+ if (Parse(line) == PARSE_TIMEOUT)
+ break;
+ }
+ fclose(pipe);
+
+ int internal_stat;
+ waitpid(pid, &internal_stat, 0);
+ }
+ else
+ {
+ OPTICK_FAILED("Failed to open communication pipe!");
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+DTrace::ParseResult DTrace::Parse(const char* line)
+{
+ if (const char* cmd = strchr(line, '@'))
+ {
+ int cpu = atoi(line);
+
+ CoreState currState;
+
+ currState.pid = atoi(cmd + 1);
+ cmd = strchr(cmd, ' ') + 1;
+
+ currState.tid = atoi(cmd);
+ cmd = strchr(cmd, ' ') + 1;
+
+ currState.prio = atoi(cmd);
+ cmd = strchr(cmd, ' ') + 1;
+
+ int64_t timestamp = (int64_t)atoll(cmd);
+
+ if (timestamp > timeout)
+ return PARSE_TIMEOUT;
+
+ const CoreState& prevState = cores[cpu];
+
+ if (prevState.IsValid())
+ {
+ SwitchContextDesc desc;
+ desc.reason = 0;
+ desc.cpuId = cpu;
+ desc.oldThreadId = prevState.tid;
+ desc.newThreadId = currState.tid;
+ desc.timestamp = timestamp;
+ Core::Get().ReportSwitchContext(desc);
+ }
+
+ cores[cpu] = currState;
+ }
+ return PARSE_FAILED;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void DTrace::AsyncProcess(DTrace *trace) {
+ trace->Process();
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Trace* Platform::GetTrace()
+{
+ return &g_DTrace;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+SymbolEngine* Platform::GetSymbolEngine()
+{
+ return nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+#endif //OPTICK_ENABLE_TRACING
+#endif //USE_OPTICK
+#endif //__APPLE_CC__ \ No newline at end of file
diff --git a/external/optick/optick_core.platform.h b/external/optick/optick_core.platform.h
new file mode 100644
index 0000000..683376d
--- /dev/null
+++ b/external/optick/optick_core.platform.h
@@ -0,0 +1,92 @@
+#pragma once
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include "optick_common.h"
+#include "optick_memory.h"
+
+//////////////////////////////////////////////////////////////////////////
+// Platform-specific stuff
+//////////////////////////////////////////////////////////////////////////
+namespace Optick
+{
+ struct Trace;
+ struct Module;
+ struct Symbol;
+ struct SymbolEngine;
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // Platform API
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct Platform
+ {
+ // Platform Name
+ static OPTICK_INLINE const char* GetName();
+ // Thread ID (system thread id)
+ static OPTICK_INLINE ThreadID GetThreadID();
+ // Process ID
+ static OPTICK_INLINE ProcessID GetProcessID();
+ // CPU Frequency
+ static OPTICK_INLINE int64 GetFrequency();
+ // CPU Time (Ticks)
+ static OPTICK_INLINE int64 GetTime();
+ // System Tracer
+ static OPTICK_INLINE Trace* GetTrace();
+ // Symbol Resolver
+ static OPTICK_INLINE SymbolEngine* GetSymbolEngine();
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // Tracing API
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct Trace
+ {
+ virtual void SetPassword(const char* /*pwd*/) {};
+ virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) = 0;
+ virtual bool Stop() = 0;
+ virtual ~Trace() {};
+ };
+
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // Symbol API
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct Module
+ {
+ string path;
+ void* address;
+ size_t size;
+ Module(const char* p, void* a, size_t s) : path(p), address(a), size(s) {}
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct Symbol
+ {
+ uint64 address;
+ uint64 offset;
+ wstring file;
+ wstring function;
+ uint32 line;
+ Symbol()
+ : address(0)
+ , offset(0)
+ , line(0)
+ {}
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct SymbolEngine
+ {
+ // Get list of loaded modules
+ virtual const vector<Module>& GetModules() = 0;
+
+ // Get Symbol from address
+ virtual const Symbol* GetSymbol(uint64 dwAddress) = 0;
+
+ virtual ~SymbolEngine() {};
+ };
+}
+//////////////////////////////////////////////////////////////////////////
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_core.win.h b/external/optick/optick_core.win.h
new file mode 100644
index 0000000..0d8a11a
--- /dev/null
+++ b/external/optick/optick_core.win.h
@@ -0,0 +1,1664 @@
+#pragma once
+#if defined(_MSC_VER)
+
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include "optick_core.platform.h"
+
+namespace Optick
+{
+ const char* Platform::GetName()
+ {
+ #if defined(OPTICK_PC)
+ return "Windows";
+ #else
+ return "XBox";
+ #endif
+ }
+
+ ThreadID Platform::GetThreadID()
+ {
+ return GetCurrentThreadId();
+ }
+
+ ProcessID Platform::GetProcessID()
+ {
+ return GetCurrentProcessId();
+ }
+
+ int64 Platform::GetFrequency()
+ {
+ LARGE_INTEGER frequency;
+ QueryPerformanceFrequency(&frequency);
+ return frequency.QuadPart;
+ }
+
+ int64 Platform::GetTime()
+ {
+ LARGE_INTEGER largeInteger;
+ QueryPerformanceCounter(&largeInteger);
+ return largeInteger.QuadPart;
+ }
+}
+
+#if OPTICK_ENABLE_TRACING
+#include <psapi.h>
+#include "optick_core.h"
+
+/*
+Event Tracing Functions - API
+https://msdn.microsoft.com/en-us/library/windows/desktop/aa363795(v=vs.85).aspx
+*/
+
+#define DECLARE_ETW (!OPTICK_PC)
+
+#if DECLARE_ETW
+// Copied from Windows SDK
+#ifndef WMIAPI
+#ifndef MIDL_PASS
+#ifdef _WMI_SOURCE_
+#define WMIAPI __stdcall
+#else
+#define WMIAPI DECLSPEC_IMPORT __stdcall
+#endif // _WMI_SOURCE
+#endif // MIDL_PASS
+#endif // WMIAPI
+#define INITGUID
+#include <guiddef.h>
+#if defined(_NTDDK_) || defined(_NTIFS_) || defined(_WMIKM_)
+#define _EVNTRACE_KERNEL_MODE
+#endif
+#if !defined(_EVNTRACE_KERNEL_MODE)
+#include <wmistr.h>
+#endif
+
+#if _MSC_VER <= 1600
+#define EVENT_DESCRIPTOR_DEF
+#define EVENT_HEADER_DEF
+#define EVENT_HEADER_EXTENDED_DATA_ITEM_DEF
+#define EVENT_RECORD_DEF
+#endif
+
+#ifndef _TRACEHANDLE_DEFINED
+#define _TRACEHANDLE_DEFINED
+typedef ULONG64 TRACEHANDLE, *PTRACEHANDLE;
+#endif
+
+//
+// EventTraceGuid is used to identify a event tracing session
+//
+DEFINE_GUID( /* 68fdd900-4a3e-11d1-84f4-0000f80464e3 */
+ EventTraceGuid,
+ 0x68fdd900,
+ 0x4a3e,
+ 0x11d1,
+ 0x84, 0xf4, 0x00, 0x00, 0xf8, 0x04, 0x64, 0xe3
+);
+
+//
+// SystemTraceControlGuid. Used to specify event tracing for kernel
+//
+DEFINE_GUID( /* 9e814aad-3204-11d2-9a82-006008a86939 */
+ SystemTraceControlGuid,
+ 0x9e814aad,
+ 0x3204,
+ 0x11d2,
+ 0x9a, 0x82, 0x00, 0x60, 0x08, 0xa8, 0x69, 0x39
+);
+
+//
+// EventTraceConfigGuid. Used to report system configuration records
+//
+DEFINE_GUID( /* 01853a65-418f-4f36-aefc-dc0f1d2fd235 */
+ EventTraceConfigGuid,
+ 0x01853a65,
+ 0x418f,
+ 0x4f36,
+ 0xae, 0xfc, 0xdc, 0x0f, 0x1d, 0x2f, 0xd2, 0x35
+);
+
+//
+// DefaultTraceSecurityGuid. Specifies the default event tracing security
+//
+DEFINE_GUID( /* 0811c1af-7a07-4a06-82ed-869455cdf713 */
+ DefaultTraceSecurityGuid,
+ 0x0811c1af,
+ 0x7a07,
+ 0x4a06,
+ 0x82, 0xed, 0x86, 0x94, 0x55, 0xcd, 0xf7, 0x13
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+#define PROCESS_TRACE_MODE_REAL_TIME 0x00000100
+#define PROCESS_TRACE_MODE_RAW_TIMESTAMP 0x00001000
+#define PROCESS_TRACE_MODE_EVENT_RECORD 0x10000000
+///////////////////////////////////////////////////////////////////////////////
+#define EVENT_HEADER_FLAG_EXTENDED_INFO 0x0001
+#define EVENT_HEADER_FLAG_PRIVATE_SESSION 0x0002
+#define EVENT_HEADER_FLAG_STRING_ONLY 0x0004
+#define EVENT_HEADER_FLAG_TRACE_MESSAGE 0x0008
+#define EVENT_HEADER_FLAG_NO_CPUTIME 0x0010
+#define EVENT_HEADER_FLAG_32_BIT_HEADER 0x0020
+#define EVENT_HEADER_FLAG_64_BIT_HEADER 0x0040
+#define EVENT_HEADER_FLAG_CLASSIC_HEADER 0x0100
+#define EVENT_HEADER_FLAG_PROCESSOR_INDEX 0x0200
+///////////////////////////////////////////////////////////////////////////////
+#define KERNEL_LOGGER_NAMEW L"NT Kernel Logger"
+///////////////////////////////////////////////////////////////////////////////
+#define EVENT_TRACE_REAL_TIME_MODE 0x00000100 // Real time mode on
+///////////////////////////////////////////////////////////////////////////////
+#define EVENT_TRACE_CONTROL_STOP 1
+///////////////////////////////////////////////////////////////////////////////
+
+//
+// Enable flags for Kernel Events
+//
+#define EVENT_TRACE_FLAG_PROCESS 0x00000001 // process start & end
+#define EVENT_TRACE_FLAG_THREAD 0x00000002 // thread start & end
+#define EVENT_TRACE_FLAG_IMAGE_LOAD 0x00000004 // image load
+
+#define EVENT_TRACE_FLAG_DISK_IO 0x00000100 // physical disk IO
+#define EVENT_TRACE_FLAG_DISK_FILE_IO 0x00000200 // requires disk IO
+
+#define EVENT_TRACE_FLAG_MEMORY_PAGE_FAULTS 0x00001000 // all page faults
+#define EVENT_TRACE_FLAG_MEMORY_HARD_FAULTS 0x00002000 // hard faults only
+
+#define EVENT_TRACE_FLAG_NETWORK_TCPIP 0x00010000 // tcpip send & receive
+
+#define EVENT_TRACE_FLAG_REGISTRY 0x00020000 // registry calls
+#define EVENT_TRACE_FLAG_DBGPRINT 0x00040000 // DbgPrint(ex) Calls
+
+//
+// Enable flags for Kernel Events on Vista and above
+//
+#define EVENT_TRACE_FLAG_PROCESS_COUNTERS 0x00000008 // process perf counters
+#define EVENT_TRACE_FLAG_CSWITCH 0x00000010 // context switches
+#define EVENT_TRACE_FLAG_DPC 0x00000020 // deffered procedure calls
+#define EVENT_TRACE_FLAG_INTERRUPT 0x00000040 // interrupts
+#define EVENT_TRACE_FLAG_SYSTEMCALL 0x00000080 // system calls
+
+#define EVENT_TRACE_FLAG_DISK_IO_INIT 0x00000400 // physical disk IO initiation
+#define EVENT_TRACE_FLAG_ALPC 0x00100000 // ALPC traces
+#define EVENT_TRACE_FLAG_SPLIT_IO 0x00200000 // split io traces (VolumeManager)
+
+#define EVENT_TRACE_FLAG_DRIVER 0x00800000 // driver delays
+#define EVENT_TRACE_FLAG_PROFILE 0x01000000 // sample based profiling
+#define EVENT_TRACE_FLAG_FILE_IO 0x02000000 // file IO
+#define EVENT_TRACE_FLAG_FILE_IO_INIT 0x04000000 // file IO initiation
+
+#define EVENT_TRACE_FLAG_PMC_PROFILE 0x80000000 // sample based profiling (PMC) - NOT CONFIRMED!
+
+//
+// Enable flags for Kernel Events on Win7 and above
+//
+#define EVENT_TRACE_FLAG_DISPATCHER 0x00000800 // scheduler (ReadyThread)
+#define EVENT_TRACE_FLAG_VIRTUAL_ALLOC 0x00004000 // VM operations
+
+//
+// Enable flags for Kernel Events on Win8 and above
+//
+#define EVENT_TRACE_FLAG_VAMAP 0x00008000 // map/unmap (excluding images)
+#define EVENT_TRACE_FLAG_NO_SYSCONFIG 0x10000000 // Do not do sys config rundown
+
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma warning(push)
+#pragma warning (disable:4201)
+
+#ifndef EVENT_DESCRIPTOR_DEF
+#define EVENT_DESCRIPTOR_DEF
+typedef struct _EVENT_DESCRIPTOR {
+
+ USHORT Id;
+ UCHAR Version;
+ UCHAR Channel;
+ UCHAR Level;
+ UCHAR Opcode;
+ USHORT Task;
+ ULONGLONG Keyword;
+
+} EVENT_DESCRIPTOR, *PEVENT_DESCRIPTOR;
+typedef const EVENT_DESCRIPTOR *PCEVENT_DESCRIPTOR;
+#endif
+///////////////////////////////////////////////////////////////////////////////
+#ifndef EVENT_HEADER_DEF
+#define EVENT_HEADER_DEF
+typedef struct _EVENT_HEADER {
+
+ USHORT Size;
+ USHORT HeaderType;
+ USHORT Flags;
+ USHORT EventProperty;
+ ULONG ThreadId;
+ ULONG ProcessId;
+ LARGE_INTEGER TimeStamp;
+ GUID ProviderId;
+ EVENT_DESCRIPTOR EventDescriptor;
+ union {
+ struct {
+ ULONG KernelTime;
+ ULONG UserTime;
+ } DUMMYSTRUCTNAME;
+ ULONG64 ProcessorTime;
+
+ } DUMMYUNIONNAME;
+ GUID ActivityId;
+
+} EVENT_HEADER, *PEVENT_HEADER;
+#endif
+///////////////////////////////////////////////////////////////////////////////
+#ifndef EVENT_HEADER_EXTENDED_DATA_ITEM_DEF
+#define EVENT_HEADER_EXTENDED_DATA_ITEM_DEF
+typedef struct _EVENT_HEADER_EXTENDED_DATA_ITEM {
+
+ USHORT Reserved1; // Reserved for internal use
+ USHORT ExtType; // Extended info type
+ struct {
+ USHORT Linkage : 1; // Indicates additional extended
+ // data item
+ USHORT Reserved2 : 15;
+ };
+ USHORT DataSize; // Size of extended info data
+ ULONGLONG DataPtr; // Pointer to extended info data
+
+} EVENT_HEADER_EXTENDED_DATA_ITEM, *PEVENT_HEADER_EXTENDED_DATA_ITEM;
+#endif
+///////////////////////////////////////////////////////////////////////////////
+#ifndef ETW_BUFFER_CONTEXT_DEF
+#define ETW_BUFFER_CONTEXT_DEF
+typedef struct _ETW_BUFFER_CONTEXT {
+ union {
+ struct {
+ UCHAR ProcessorNumber;
+ UCHAR Alignment;
+ } DUMMYSTRUCTNAME;
+ USHORT ProcessorIndex;
+ } DUMMYUNIONNAME;
+ USHORT LoggerId;
+} ETW_BUFFER_CONTEXT, *PETW_BUFFER_CONTEXT;
+#endif
+///////////////////////////////////////////////////////////////////////////////
+#ifndef EVENT_RECORD_DEF
+#define EVENT_RECORD_DEF
+typedef struct _EVENT_RECORD {
+ EVENT_HEADER EventHeader;
+ ETW_BUFFER_CONTEXT BufferContext;
+ USHORT ExtendedDataCount;
+
+ USHORT UserDataLength;
+ PEVENT_HEADER_EXTENDED_DATA_ITEM ExtendedData;
+ PVOID UserData;
+ PVOID UserContext;
+} EVENT_RECORD, *PEVENT_RECORD;
+#endif
+///////////////////////////////////////////////////////////////////////////////
+typedef struct _EVENT_TRACE_PROPERTIES {
+ WNODE_HEADER Wnode;
+ //
+ // data provided by caller
+ ULONG BufferSize; // buffer size for logging (kbytes)
+ ULONG MinimumBuffers; // minimum to preallocate
+ ULONG MaximumBuffers; // maximum buffers allowed
+ ULONG MaximumFileSize; // maximum logfile size (in MBytes)
+ ULONG LogFileMode; // sequential, circular
+ ULONG FlushTimer; // buffer flush timer, in seconds
+ ULONG EnableFlags; // trace enable flags
+ union {
+ LONG AgeLimit; // unused
+ LONG FlushThreshold; // Number of buffers to fill before flushing
+ } DUMMYUNIONNAME;
+
+ // data returned to caller
+ ULONG NumberOfBuffers; // no of buffers in use
+ ULONG FreeBuffers; // no of buffers free
+ ULONG EventsLost; // event records lost
+ ULONG BuffersWritten; // no of buffers written to file
+ ULONG LogBuffersLost; // no of logfile write failures
+ ULONG RealTimeBuffersLost; // no of rt delivery failures
+ HANDLE LoggerThreadId; // thread id of Logger
+ ULONG LogFileNameOffset; // Offset to LogFileName
+ ULONG LoggerNameOffset; // Offset to LoggerName
+} EVENT_TRACE_PROPERTIES, *PEVENT_TRACE_PROPERTIES;
+
+typedef struct _EVENT_TRACE_HEADER { // overlays WNODE_HEADER
+ USHORT Size; // Size of entire record
+ union {
+ USHORT FieldTypeFlags; // Indicates valid fields
+ struct {
+ UCHAR HeaderType; // Header type - internal use only
+ UCHAR MarkerFlags; // Marker - internal use only
+ } DUMMYSTRUCTNAME;
+ } DUMMYUNIONNAME;
+ union {
+ ULONG Version;
+ struct {
+ UCHAR Type; // event type
+ UCHAR Level; // trace instrumentation level
+ USHORT Version; // version of trace record
+ } Class;
+ } DUMMYUNIONNAME2;
+ ULONG ThreadId; // Thread Id
+ ULONG ProcessId; // Process Id
+ LARGE_INTEGER TimeStamp; // time when event happens
+ union {
+ GUID Guid; // Guid that identifies event
+ ULONGLONG GuidPtr; // use with WNODE_FLAG_USE_GUID_PTR
+ } DUMMYUNIONNAME3;
+ union {
+ struct {
+ ULONG KernelTime; // Kernel Mode CPU ticks
+ ULONG UserTime; // User mode CPU ticks
+ } DUMMYSTRUCTNAME;
+ ULONG64 ProcessorTime; // Processor Clock
+ struct {
+ ULONG ClientContext; // Reserved
+ ULONG Flags; // Event Flags
+ } DUMMYSTRUCTNAME2;
+ } DUMMYUNIONNAME4;
+} EVENT_TRACE_HEADER, *PEVENT_TRACE_HEADER;
+
+typedef struct _EVENT_TRACE {
+ EVENT_TRACE_HEADER Header; // Event trace header
+ ULONG InstanceId; // Instance Id of this event
+ ULONG ParentInstanceId; // Parent Instance Id.
+ GUID ParentGuid; // Parent Guid;
+ PVOID MofData; // Pointer to Variable Data
+ ULONG MofLength; // Variable Datablock Length
+ union {
+ ULONG ClientContext;
+ ETW_BUFFER_CONTEXT BufferContext;
+ } DUMMYUNIONNAME;
+} EVENT_TRACE, *PEVENT_TRACE;
+
+typedef struct _TRACE_LOGFILE_HEADER {
+ ULONG BufferSize; // Logger buffer size in Kbytes
+ union {
+ ULONG Version; // Logger version
+ struct {
+ UCHAR MajorVersion;
+ UCHAR MinorVersion;
+ UCHAR SubVersion;
+ UCHAR SubMinorVersion;
+ } VersionDetail;
+ } DUMMYUNIONNAME;
+ ULONG ProviderVersion; // defaults to NT version
+ ULONG NumberOfProcessors; // Number of Processors
+ LARGE_INTEGER EndTime; // Time when logger stops
+ ULONG TimerResolution; // assumes timer is constant!!!
+ ULONG MaximumFileSize; // Maximum in Mbytes
+ ULONG LogFileMode; // specify logfile mode
+ ULONG BuffersWritten; // used to file start of Circular File
+ union {
+ GUID LogInstanceGuid; // For RealTime Buffer Delivery
+ struct {
+ ULONG StartBuffers; // Count of buffers written at start.
+ ULONG PointerSize; // Size of pointer type in bits
+ ULONG EventsLost; // Events losts during log session
+ ULONG CpuSpeedInMHz; // Cpu Speed in MHz
+ } DUMMYSTRUCTNAME;
+ } DUMMYUNIONNAME2;
+#if defined(_WMIKM_)
+ PWCHAR LoggerName;
+ PWCHAR LogFileName;
+ RTL_TIME_ZONE_INFORMATION TimeZone;
+#else
+ LPWSTR LoggerName;
+ LPWSTR LogFileName;
+ TIME_ZONE_INFORMATION TimeZone;
+#endif
+ LARGE_INTEGER BootTime;
+ LARGE_INTEGER PerfFreq; // Reserved
+ LARGE_INTEGER StartTime; // Reserved
+ ULONG ReservedFlags; // ClockType
+ ULONG BuffersLost;
+} TRACE_LOGFILE_HEADER, *PTRACE_LOGFILE_HEADER;
+
+typedef enum _TRACE_QUERY_INFO_CLASS {
+ TraceGuidQueryList,
+ TraceGuidQueryInfo,
+ TraceGuidQueryProcess,
+ TraceStackTracingInfo, // Win7
+ TraceSystemTraceEnableFlagsInfo,
+ TraceSampledProfileIntervalInfo,
+ TraceProfileSourceConfigInfo,
+ TraceProfileSourceListInfo,
+ TracePmcEventListInfo,
+ TracePmcCounterListInfo,
+ MaxTraceSetInfoClass
+} TRACE_QUERY_INFO_CLASS, TRACE_INFO_CLASS;
+
+typedef struct _CLASSIC_EVENT_ID {
+ GUID EventGuid;
+ UCHAR Type;
+ UCHAR Reserved[7];
+} CLASSIC_EVENT_ID, *PCLASSIC_EVENT_ID;
+
+typedef struct _TRACE_PROFILE_INTERVAL {
+ ULONG Source;
+ ULONG Interval;
+} TRACE_PROFILE_INTERVAL, *PTRACE_PROFILE_INTERVAL;
+
+typedef struct _EVENT_TRACE_LOGFILEW
+EVENT_TRACE_LOGFILEW, *PEVENT_TRACE_LOGFILEW;
+
+typedef ULONG(WINAPI * PEVENT_TRACE_BUFFER_CALLBACKW)
+(PEVENT_TRACE_LOGFILEW Logfile);
+
+typedef VOID(WINAPI *PEVENT_CALLBACK)(PEVENT_TRACE pEvent);
+
+typedef struct _EVENT_RECORD
+EVENT_RECORD, *PEVENT_RECORD;
+
+typedef VOID(WINAPI *PEVENT_RECORD_CALLBACK) (PEVENT_RECORD EventRecord);
+
+struct _EVENT_TRACE_LOGFILEW {
+ LPWSTR LogFileName; // Logfile Name
+ LPWSTR LoggerName; // LoggerName
+ LONGLONG CurrentTime; // timestamp of last event
+ ULONG BuffersRead; // buffers read to date
+ union {
+ // Mode of the logfile
+ ULONG LogFileMode;
+ // Processing flags used on Vista and above
+ ULONG ProcessTraceMode;
+ } DUMMYUNIONNAME;
+ EVENT_TRACE CurrentEvent; // Current Event from this stream.
+ TRACE_LOGFILE_HEADER LogfileHeader; // logfile header structure
+ PEVENT_TRACE_BUFFER_CALLBACKW // callback before each buffer
+ BufferCallback; // is read
+ //
+ // following variables are filled for BufferCallback.
+ //
+ ULONG BufferSize;
+ ULONG Filled;
+ ULONG EventsLost;
+ //
+ // following needs to be propaged to each buffer
+ //
+ union {
+ // Callback with EVENT_TRACE
+ PEVENT_CALLBACK EventCallback;
+ // Callback with EVENT_RECORD on Vista and above
+ PEVENT_RECORD_CALLBACK EventRecordCallback;
+ } DUMMYUNIONNAME2;
+
+ ULONG IsKernelTrace; // TRUE for kernel logfile
+
+ PVOID Context; // reserved for internal use
+};
+
+#pragma warning(pop)
+
+#define PEVENT_TRACE_BUFFER_CALLBACK PEVENT_TRACE_BUFFER_CALLBACKW
+#define EVENT_TRACE_LOGFILE EVENT_TRACE_LOGFILEW
+#define PEVENT_TRACE_LOGFILE PEVENT_TRACE_LOGFILEW
+#define KERNEL_LOGGER_NAME KERNEL_LOGGER_NAMEW
+#define GLOBAL_LOGGER_NAME GLOBAL_LOGGER_NAMEW
+#define EVENT_LOGGER_NAME EVENT_LOGGER_NAMEW
+
+EXTERN_C
+ULONG
+WMIAPI
+ProcessTrace(
+ _In_reads_(HandleCount) PTRACEHANDLE HandleArray,
+ _In_ ULONG HandleCount,
+ _In_opt_ LPFILETIME StartTime,
+ _In_opt_ LPFILETIME EndTime
+);
+
+EXTERN_C
+ULONG
+WMIAPI
+StartTraceW(
+ _Out_ PTRACEHANDLE TraceHandle,
+ _In_ LPCWSTR InstanceName,
+ _Inout_ PEVENT_TRACE_PROPERTIES Properties
+);
+
+EXTERN_C
+ULONG
+WMIAPI
+ControlTraceW(
+ _In_ TRACEHANDLE TraceHandle,
+ _In_opt_ LPCWSTR InstanceName,
+ _Inout_ PEVENT_TRACE_PROPERTIES Properties,
+ _In_ ULONG ControlCode
+);
+
+EXTERN_C
+TRACEHANDLE
+WMIAPI
+OpenTraceW(
+ _Inout_ PEVENT_TRACE_LOGFILEW Logfile
+);
+
+EXTERN_C
+ULONG
+WMIAPI
+CloseTrace(
+ _In_ TRACEHANDLE TraceHandle
+);
+
+EXTERN_C
+ULONG
+WMIAPI
+TraceSetInformation(
+ _In_ TRACEHANDLE SessionHandle,
+ _In_ TRACE_INFO_CLASS InformationClass,
+ _In_reads_bytes_(InformationLength) PVOID TraceInformation,
+ _In_ ULONG InformationLength
+);
+
+EXTERN_C
+ULONG
+WMIAPI
+TraceQueryInformation(
+ _In_ TRACEHANDLE SessionHandle,
+ _In_ TRACE_INFO_CLASS InformationClass,
+ _Out_writes_bytes_(InformationLength) PVOID TraceInformation,
+ _In_ ULONG InformationLength,
+ _Out_opt_ PULONG ReturnLength
+);
+
+//////////////////////////////////////////////////////////////////////////
+#define RegisterTraceGuids RegisterTraceGuidsW
+#define StartTrace StartTraceW
+#define ControlTrace ControlTraceW
+#define StopTrace StopTraceW
+#define QueryTrace QueryTraceW
+#define UpdateTrace UpdateTraceW
+#define FlushTrace FlushTraceW
+#define QueryAllTraces QueryAllTracesW
+#define OpenTrace OpenTraceW
+//////////////////////////////////////////////////////////////////////////
+#else
+#define INITGUID // Causes definition of SystemTraceControlGuid in evntrace.h.
+#include <wmistr.h>
+#include <evntrace.h>
+#include <strsafe.h>
+#include <evntcons.h>
+#endif //DECLARE_ETW
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class ETW : public Trace
+{
+ static const int ETW_BUFFER_SIZE = 1024 << 10; // 1Mb
+ static const int ETW_BUFFER_COUNT = 32;
+ static const int ETW_MAXIMUM_SESSION_NAME = 1024;
+
+ EVENT_TRACE_PROPERTIES *traceProperties;
+ EVENT_TRACE_LOGFILE logFile;
+ TRACEHANDLE traceSessionHandle;
+ TRACEHANDLE openedHandle;
+
+ HANDLE processThreadHandle;
+ DWORD currentProcessId;
+
+ bool isActive;
+
+ static DWORD WINAPI RunProcessTraceThreadFunction(LPVOID parameter);
+ static void AdjustPrivileges();
+
+ unordered_map<uint64_t, const EventDescription*> syscallDescriptions;
+
+ void ResolveSysCalls();
+public:
+
+ unordered_set<uint64> activeThreadsIDs;
+
+ ETW();
+ ~ETW();
+
+ virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override;
+ virtual bool Stop() override;
+
+ DWORD GetProcessID() const { return currentProcessId; }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct CSwitch
+{
+ // New thread ID after the switch.
+ uint32 NewThreadId;
+
+ // Previous thread ID.
+ uint32 OldThreadId;
+
+ // Thread priority of the new thread.
+ int8 NewThreadPriority;
+
+ // Thread priority of the previous thread.
+ int8 OldThreadPriority;
+
+ //The index of the C-state that was last used by the processor. A value of 0 represents the lightest idle state with higher values representing deeper C-states.
+ uint8 PreviousCState;
+
+ // Not used.
+ int8 SpareByte;
+
+ // Wait reason for the previous thread. The following are the possible values:
+ // 0 Executive
+ // 1 FreePage
+ // 2 PageIn
+ // 3 PoolAllocation
+ // 4 DelayExecution
+ // 5 Suspended
+ // 6 UserRequest
+ // 7 WrExecutive
+ // 8 WrFreePage
+ // 9 WrPageIn
+ // 10 WrPoolAllocation
+ // 11 WrDelayExecution
+ // 12 WrSuspended
+ // 13 WrUserRequest
+ // 14 WrEventPair
+ // 15 WrQueue
+ // 16 WrLpcReceive
+ // 17 WrLpcReply
+ // 18 WrVirtualMemory
+ // 19 WrPageOut
+ // 20 WrRendezvous
+ // 21 WrKeyedEvent
+ // 22 WrTerminated
+ // 23 WrProcessInSwap
+ // 24 WrCpuRateControl
+ // 25 WrCalloutStack
+ // 26 WrKernel
+ // 27 WrResource
+ // 28 WrPushLock
+ // 29 WrMutex
+ // 30 WrQuantumEnd
+ // 31 WrDispatchInt
+ // 32 WrPreempted
+ // 33 WrYieldExecution
+ // 34 WrFastMutex
+ // 35 WrGuardedMutex
+ // 36 WrRundown
+ // 37 MaximumWaitReason
+ int8 OldThreadWaitReason;
+
+ // Wait mode for the previous thread. The following are the possible values:
+ // 0 KernelMode
+ // 1 UserMode
+ int8 OldThreadWaitMode;
+
+ // State of the previous thread. The following are the possible state values:
+ // 0 Initialized
+ // 1 Ready
+ // 2 Running
+ // 3 Standby
+ // 4 Terminated
+ // 5 Waiting
+ // 6 Transition
+ // 7 DeferredReady (added for Windows Server 2003)
+ int8 OldThreadState;
+
+ // Ideal wait time of the previous thread.
+ int8 OldThreadWaitIdealProcessor;
+
+ // Wait time for the new thread.
+ uint32 NewThreadWaitTime;
+
+ // Reserved.
+ uint32 Reserved;
+
+ static const byte OPCODE = 36;
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct StackWalk_Event
+{
+ // Original event time stamp from the event header
+ uint64 EventTimeStamp;
+
+ // The process identifier of the original event
+ uint32 StackProcess;
+
+ // The thread identifier of the original event
+ uint32 StackThread;
+
+ // Callstack head
+ uint64 Stack0;
+
+ static const byte OPCODE = 32;
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct Thread_TypeGroup1
+{
+ // Process identifier of the thread involved in the event.
+ uint32 ProcessId;
+ // Thread identifier of the thread involved in the event.
+ uint32 TThreadId;
+ // Base address of the thread's stack.
+ uint64 StackBase;
+ // Limit of the thread's stack.
+ uint64 StackLimit;
+ // Base address of the thread's user-mode stack.
+ uint64 UserStackBase;
+ // Limit of the thread's user-mode stack.
+ uint64 UserStackLimit;
+ // The set of processors on which the thread is allowed to run.
+ uint32 Affinity;
+ // Starting address of the function to be executed by this thread.
+ uint64 Win32StartAddr;
+ // Thread environment block base address.
+ uint64 TebBase;
+ // Identifies the service if the thread is owned by a service; otherwise, zero.
+ uint32 SubProcessTag;
+ // The scheduler priority of the thread
+ uint8 BasePriority;
+ // A memory page priority hint for memory pages accessed by the thread.
+ uint8 PagePriority;
+ // An IO priority hint for scheduling IOs generated by the thread.
+ uint8 IoPriority;
+ // Not used.
+ uint8 ThreadFlags;
+
+ enum struct Opcode : uint8
+ {
+ Start = 1,
+ End = 2,
+ DCStart = 3,
+ DCEnd = 4,
+ };
+};
+
+size_t GetSIDSize(uint8* ptr)
+{
+ size_t result = 0;
+
+ int sid = *((int*)ptr);
+
+ if (sid != 0)
+ {
+ size_t tokenSize = 16;
+ ptr += tokenSize;
+ result += tokenSize;
+ result += 8 + (4 * ((SID*)ptr)->SubAuthorityCount);
+ }
+ else
+ {
+ result = 4;
+ }
+
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// https://github.com/Microsoft/perfview/blob/688a8564062d51321bbab53cd71d9e174a77d2ce/src/TraceEvent/TraceEvent.cs
+struct Process_TypeGroup1
+{
+ // The address of the process object in the kernel.
+ uint64 UniqueProcessKey;
+ // Global process identifier that you can use to identify a process.
+ uint32 ProcessId;
+ // Unique identifier of the process that creates this process.
+ uint32 ParentId;
+ // Unique identifier that an operating system generates when it creates a new session.
+ uint32 SessionId;
+ // Exit status of the stopped process.
+ int32 ExitStatus;
+ // The physical address of the page table of the process.
+ uint64 DirectoryTableBase;
+ // (?) uint8 Flags;
+ // object UserSID;
+ // string ImageFileName;
+ // wstring CommandLine;
+
+ static size_t GetSIDOffset(PEVENT_RECORD pEvent)
+ {
+ if (pEvent->EventHeader.EventDescriptor.Version >= 4)
+ return 36;
+
+ if (pEvent->EventHeader.EventDescriptor.Version == 3)
+ return 32;
+
+ return 24;
+ }
+
+ const char* GetProcessName(PEVENT_RECORD pEvent) const
+ {
+ OPTICK_ASSERT((pEvent->EventHeader.Flags & EVENT_HEADER_FLAG_64_BIT_HEADER) != 0, "32-bit is not supported! Disable OPTICK_ENABLE_TRACING on 32-bit platform if needed!");
+ size_t sidOffset = GetSIDOffset(pEvent);
+ size_t sidSize = GetSIDSize((uint8*)this + sidOffset);
+ return (char*)this + sidOffset + sidSize;
+ }
+
+ enum struct Opcode
+ {
+ Start = 1,
+ End = 2,
+ DCStart = 3,
+ DCEnd = 4,
+ Defunct = 39,
+ };
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct SampledProfile
+{
+ uint32 InstructionPointer;
+ uint32 ThreadId;
+ uint32 Count;
+
+ static const byte OPCODE = 46;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct SysCallEnter
+{
+ uintptr_t SysCallAddress;
+
+ static const byte OPCODE = 51;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct SysCallExit
+{
+ uint32 SysCallNtStatus;
+
+ static const byte OPCODE = 52;
+};
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// ce1dbfb4-137e-4da6-87b0-3f59aa102cbc
+DEFINE_GUID(SampledProfileGuid, 0xce1dbfb4, 0x137e, 0x4da6, 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 3d6fa8d1-fe05-11d0-9dda-00c04fd7ba7c
+// https://docs.microsoft.com/en-us/windows/desktop/etw/thread
+DEFINE_GUID(ThreadGuid, 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 3d6fa8d0-fe05-11d0-9dda-00c04fd7ba7c
+// https://docs.microsoft.com/en-us/windows/desktop/etw/process
+DEFINE_GUID(ProcessGuid, 0x3d6fa8d0, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+const int MAX_CPU_CORES = 256;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct ETWRuntime
+{
+ array<ThreadID, MAX_CPU_CORES> activeCores;
+ vector<std::pair<uint8_t, SysCallData*>> activeSyscalls;
+
+ ETWRuntime()
+ {
+ Reset();
+ }
+
+ void Reset()
+ {
+ activeCores.fill(INVALID_THREAD_ID);
+ activeSyscalls.resize(0);;
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ETWRuntime g_ETWRuntime;
+ETW g_ETW;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void WINAPI OnRecordEvent(PEVENT_RECORD eventRecord)
+{
+ //static uint8 cpuCoreIsExecutingThreadFromOurProcess[256] = { 0 };
+
+ const byte opcode = eventRecord->EventHeader.EventDescriptor.Opcode;
+
+ if (opcode == CSwitch::OPCODE)
+ {
+ if (sizeof(CSwitch) == eventRecord->UserDataLength)
+ {
+ CSwitch* pSwitchEvent = (CSwitch*)eventRecord->UserData;
+
+ SwitchContextDesc desc;
+ desc.reason = pSwitchEvent->OldThreadWaitReason;
+ desc.cpuId = eventRecord->BufferContext.ProcessorNumber;
+ desc.oldThreadId = (uint64)pSwitchEvent->OldThreadId;
+ desc.newThreadId = (uint64)pSwitchEvent->NewThreadId;
+ desc.timestamp = eventRecord->EventHeader.TimeStamp.QuadPart;
+ Core::Get().ReportSwitchContext(desc);
+
+ // Assign ThreadID to the cores
+ if (g_ETW.activeThreadsIDs.find(desc.newThreadId) != g_ETW.activeThreadsIDs.end())
+ {
+ g_ETWRuntime.activeCores[desc.cpuId] = desc.newThreadId;
+ }
+ else if (g_ETW.activeThreadsIDs.find(desc.oldThreadId) != g_ETW.activeThreadsIDs.end())
+ {
+ g_ETWRuntime.activeCores[desc.cpuId] = INVALID_THREAD_ID;
+ }
+ }
+ }
+ else if (opcode == StackWalk_Event::OPCODE)
+ {
+ if (eventRecord->UserData && eventRecord->UserDataLength >= sizeof(StackWalk_Event))
+ {
+ //TODO: Support x86 windows kernels
+ const size_t osKernelPtrSize = sizeof(uint64);
+
+ StackWalk_Event* pStackWalkEvent = (StackWalk_Event*)eventRecord->UserData;
+ uint32 count = 1 + (eventRecord->UserDataLength - sizeof(StackWalk_Event)) / osKernelPtrSize;
+
+ if (count && pStackWalkEvent->StackThread != 0)
+ {
+ if (pStackWalkEvent->StackProcess == g_ETW.GetProcessID())
+ {
+ CallstackDesc desc;
+ desc.threadID = pStackWalkEvent->StackThread;
+ desc.timestamp = pStackWalkEvent->EventTimeStamp;
+
+ static_assert(osKernelPtrSize == sizeof(uint64), "Incompatible types!");
+ desc.callstack = &pStackWalkEvent->Stack0;
+
+ desc.count = (uint8)count;
+ Core::Get().ReportStackWalk(desc);
+ }
+ }
+ }
+ }
+ else if (opcode == SampledProfile::OPCODE)
+ {
+ SampledProfile* pEvent = (SampledProfile*)eventRecord->UserData;
+ OPTICK_UNUSED(pEvent);
+ }
+ else if (opcode == SysCallEnter::OPCODE)
+ {
+ if (eventRecord->UserDataLength >= sizeof(SysCallEnter))
+ {
+ uint8_t cpuId = eventRecord->BufferContext.ProcessorNumber;
+ uint64_t threadId = g_ETWRuntime.activeCores[cpuId];
+
+ if (threadId != INVALID_THREAD_ID)
+ {
+ SysCallEnter* pEventEnter = (SysCallEnter*)eventRecord->UserData;
+
+ SysCallData& sysCall = Core::Get().syscallCollector.Add();
+ sysCall.start = eventRecord->EventHeader.TimeStamp.QuadPart;
+ sysCall.finish = EventTime::INVALID_TIMESTAMP;
+ sysCall.threadID = threadId;
+ sysCall.id = pEventEnter->SysCallAddress;
+ sysCall.description = nullptr;
+
+ g_ETWRuntime.activeSyscalls.push_back(std::make_pair(cpuId, &sysCall));
+ }
+ }
+ }
+ else if (opcode == SysCallExit::OPCODE)
+ {
+ if (eventRecord->UserDataLength >= sizeof(SysCallExit))
+ {
+ uint8_t cpuId = eventRecord->BufferContext.ProcessorNumber;
+ if (g_ETWRuntime.activeCores[cpuId] != INVALID_THREAD_ID)
+ {
+ for (int i = (int)g_ETWRuntime.activeSyscalls.size() - 1; i >= 0; --i)
+ {
+ if (g_ETWRuntime.activeSyscalls[i].first == cpuId)
+ {
+ g_ETWRuntime.activeSyscalls[i].second->finish = eventRecord->EventHeader.TimeStamp.QuadPart;
+ g_ETWRuntime.activeSyscalls.erase(g_ETWRuntime.activeSyscalls.begin() + i);
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ // VS TODO: We might have a situation where a thread was deleted and the new thread was created with the same threadID
+ // Ignoring for now - profiling sessions are quite short - not critical
+ if (IsEqualGUID(eventRecord->EventHeader.ProviderId, ThreadGuid))
+ {
+ if (eventRecord->UserDataLength >= sizeof(Thread_TypeGroup1))
+ {
+ const Thread_TypeGroup1* pThreadEvent = (const Thread_TypeGroup1*)eventRecord->UserData;
+ Core::Get().RegisterThreadDescription(ThreadDescription("", pThreadEvent->TThreadId, pThreadEvent->ProcessId, 1, pThreadEvent->BasePriority));
+ }
+
+ }
+ else if (IsEqualGUID(eventRecord->EventHeader.ProviderId, ProcessGuid))
+ {
+ if (eventRecord->UserDataLength >= sizeof(Process_TypeGroup1))
+ {
+ const Process_TypeGroup1* pProcessEvent = (const Process_TypeGroup1*)eventRecord->UserData;
+ Core::Get().RegisterProcessDescription(ProcessDescription(pProcessEvent->GetProcessName(eventRecord), pProcessEvent->ProcessId, pProcessEvent->UniqueProcessKey));
+ }
+ }
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+static ULONG WINAPI OnBufferRecord(_In_ PEVENT_TRACE_LOGFILE Buffer)
+{
+ OPTICK_UNUSED(Buffer);
+ return true;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+const TRACEHANDLE INVALID_TRACEHANDLE = (TRACEHANDLE)-1;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+DWORD WINAPI ETW::RunProcessTraceThreadFunction(LPVOID parameter)
+{
+ Core::Get().RegisterThreadDescription(ThreadDescription("[Optick] ETW", GetCurrentThreadId(), GetCurrentProcessId()));
+ ETW* etw = (ETW*)parameter;
+ ULONG status = ProcessTrace(&etw->openedHandle, 1, 0, 0);
+ OPTICK_UNUSED(status);
+ return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void ETW::AdjustPrivileges()
+{
+#if OPTICK_PC
+ HANDLE token = 0;
+ if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token))
+ {
+ TOKEN_PRIVILEGES tokenPrivileges;
+ memset(&tokenPrivileges, 0, sizeof(tokenPrivileges));
+ tokenPrivileges.PrivilegeCount = 1;
+ tokenPrivileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+ LookupPrivilegeValue(NULL, SE_SYSTEM_PROFILE_NAME, &tokenPrivileges.Privileges[0].Luid);
+
+ AdjustTokenPrivileges(token, FALSE, &tokenPrivileges, 0, (PTOKEN_PRIVILEGES)NULL, 0);
+ CloseHandle(token);
+ }
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void ETW::ResolveSysCalls()
+{
+ if (SymbolEngine* symEngine = Platform::GetSymbolEngine())
+ {
+ Core::Get().syscallCollector.syscallPool.ForEach([this, symEngine](SysCallData& data)
+ {
+ auto it = syscallDescriptions.find(data.id);
+ if (it == syscallDescriptions.end())
+ {
+ const Symbol* symbol = symEngine->GetSymbol(data.id);
+ if (symbol != nullptr)
+ {
+ string name(symbol->function.begin(), symbol->function.end());
+
+ data.description = EventDescription::CreateShared(name.c_str(), "SysCall", (long)data.id);
+ syscallDescriptions.insert(std::pair<const uint64_t, const EventDescription *>(data.id, data.description));
+ }
+ }
+ else
+ {
+ data.description = it->second;
+ }
+ });
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ETW::ETW()
+ : isActive(false)
+ , traceSessionHandle(INVALID_TRACEHANDLE)
+ , openedHandle(INVALID_TRACEHANDLE)
+ , processThreadHandle(INVALID_HANDLE_VALUE)
+ , traceProperties(nullptr)
+{
+ currentProcessId = GetCurrentProcessId();
+}
+
+CaptureStatus::Type ETW::Start(Mode::Type mode, int frequency, const ThreadList& threads)
+{
+ if (!isActive)
+ {
+ AdjustPrivileges();
+
+ g_ETWRuntime.Reset();
+
+ activeThreadsIDs.clear();
+ for (auto it = threads.begin(); it != threads.end(); ++it)
+ {
+ ThreadEntry* entry = *it;
+ if (entry->isAlive)
+ {
+ activeThreadsIDs.insert(entry->description.threadID);
+ }
+ }
+
+
+ ULONG bufferSize = sizeof(EVENT_TRACE_PROPERTIES) + (ETW_MAXIMUM_SESSION_NAME + MAX_PATH) * sizeof(WCHAR);
+ if (traceProperties == nullptr)
+ traceProperties = (EVENT_TRACE_PROPERTIES*)Memory::Alloc(bufferSize);
+ ZeroMemory(traceProperties, bufferSize);
+ traceProperties->Wnode.BufferSize = bufferSize;
+ traceProperties->LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
+ StringCchCopyW((LPWSTR)((PCHAR)traceProperties + traceProperties->LoggerNameOffset), ETW_MAXIMUM_SESSION_NAME, KERNEL_LOGGER_NAMEW);
+ traceProperties->EnableFlags = 0;
+
+ traceProperties->BufferSize = ETW_BUFFER_SIZE;
+ traceProperties->MinimumBuffers = ETW_BUFFER_COUNT;
+
+ if (mode & Mode::SWITCH_CONTEXT)
+ {
+ traceProperties->EnableFlags |= EVENT_TRACE_FLAG_CSWITCH;
+ }
+
+ if (mode & Mode::AUTOSAMPLING)
+ {
+ traceProperties->EnableFlags |= EVENT_TRACE_FLAG_PROFILE;
+ }
+
+ if (mode & Mode::SYS_CALLS)
+ {
+ traceProperties->EnableFlags |= EVENT_TRACE_FLAG_SYSTEMCALL;
+ }
+
+ if (mode & Mode::OTHER_PROCESSES)
+ {
+ traceProperties->EnableFlags |= EVENT_TRACE_FLAG_PROCESS;
+ traceProperties->EnableFlags |= EVENT_TRACE_FLAG_THREAD;
+ }
+
+ traceProperties->LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
+ traceProperties->Wnode.Flags = WNODE_FLAG_TRACED_GUID;
+ //
+ // https://msdn.microsoft.com/en-us/library/windows/desktop/aa364160(v=vs.85).aspx
+ // Clock resolution = QPC
+ traceProperties->Wnode.ClientContext = 1;
+ traceProperties->Wnode.Guid = SystemTraceControlGuid;
+
+ // ERROR_BAD_LENGTH(24): The Wnode.BufferSize member of Properties specifies an incorrect size. Properties does not have sufficient space allocated to hold a copy of SessionName.
+ // ERROR_ALREADY_EXISTS(183): A session with the same name or GUID is already running.
+ // ERROR_ACCESS_DENIED(5): Only users with administrative privileges, users in the Performance Log Users group, and services running as LocalSystem, LocalService, NetworkService can control event tracing sessions.
+ // ERROR_INVALID_PARAMETER(87)
+ // ERROR_BAD_PATHNAME(161)
+ // ERROR_DISK_FULL(112)
+ // ERROR_NO_SUCH_PRIVILEGE(1313)
+ int retryCount = 4;
+ ULONG status = CaptureStatus::OK;
+
+ while (--retryCount >= 0)
+ {
+ status = StartTrace(&traceSessionHandle, KERNEL_LOGGER_NAME, traceProperties);
+
+ switch (status)
+ {
+ case ERROR_NO_SUCH_PRIVILEGE:
+ AdjustPrivileges();
+ break;
+
+ case ERROR_ALREADY_EXISTS:
+ ControlTrace(0, KERNEL_LOGGER_NAME, traceProperties, EVENT_TRACE_CONTROL_STOP);
+ break;
+
+ case ERROR_ACCESS_DENIED:
+ return CaptureStatus::ERR_TRACER_ACCESS_DENIED;
+
+ case ERROR_SUCCESS:
+ retryCount = 0;
+ break;
+
+ default:
+ return CaptureStatus::ERR_TRACER_FAILED;
+ }
+ }
+
+ if (status != ERROR_SUCCESS)
+ {
+ return CaptureStatus::ERR_TRACER_FAILED;
+ }
+
+ CLASSIC_EVENT_ID callstackSamples[4];
+ int callstackCountSamplesCount = 0;
+
+ if (mode & Mode::AUTOSAMPLING)
+ {
+ callstackSamples[callstackCountSamplesCount].EventGuid = SampledProfileGuid;
+ callstackSamples[callstackCountSamplesCount].Type = SampledProfile::OPCODE;
+ ++callstackCountSamplesCount;
+ }
+
+ if (mode & Mode::SYS_CALLS)
+ {
+ callstackSamples[callstackCountSamplesCount].EventGuid = SampledProfileGuid;
+ callstackSamples[callstackCountSamplesCount].Type = SysCallEnter::OPCODE;
+ ++callstackCountSamplesCount;
+ }
+
+ /*
+ callstackSamples[callstackCountSamplesCount].EventGuid = CSwitchProfileGuid;
+ callstackSamples[callstackCountSamplesCount].Type = CSwitch::OPCODE;
+ ++callstackCountSamplesCount;
+ */
+
+
+ /*
+ https://msdn.microsoft.com/en-us/library/windows/desktop/dd392328%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+ Typically, on 64-bit computers, you cannot capture the kernel stack in certain contexts when page faults are not allowed. To enable walking the kernel stack on x64, set
+ the DisablePagingExecutive Memory Management registry value to 1. The DisablePagingExecutive registry value is located under the following registry key:
+ HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\Session Manager\Memory Management
+ */
+ if (callstackCountSamplesCount > 0)
+ {
+ status = TraceSetInformation(traceSessionHandle, TraceStackTracingInfo, &callstackSamples[0], sizeof(CLASSIC_EVENT_ID) * callstackCountSamplesCount);
+ if (status != ERROR_SUCCESS)
+ {
+ OPTICK_FAILED("TraceSetInformation - failed");
+ return CaptureStatus::ERR_TRACER_FAILED;
+ }
+ }
+
+ if (mode & Mode::AUTOSAMPLING)
+ {
+ TRACE_PROFILE_INTERVAL itnerval = { 0 };
+ memset(&itnerval, 0, sizeof(TRACE_PROFILE_INTERVAL));
+ int step = 10000 * 1000 / frequency; // 1ms = 10000 steps
+ itnerval.Interval = step; // std::max(1221, std::min(step, 10000));
+ // The SessionHandle is irrelevant for this information class and must be zero, else the function returns ERROR_INVALID_PARAMETER.
+ status = TraceSetInformation(0, TraceSampledProfileIntervalInfo, &itnerval, sizeof(TRACE_PROFILE_INTERVAL));
+ OPTICK_ASSERT(status == ERROR_SUCCESS, "TraceSetInformation - failed");
+ }
+
+ ZeroMemory(&logFile, sizeof(EVENT_TRACE_LOGFILE));
+ logFile.LoggerName = KERNEL_LOGGER_NAME;
+ logFile.ProcessTraceMode = (PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP);
+ logFile.EventRecordCallback = OnRecordEvent;
+ logFile.BufferCallback = OnBufferRecord;
+ openedHandle = OpenTrace(&logFile);
+ if (openedHandle == INVALID_TRACEHANDLE)
+ {
+ OPTICK_FAILED("OpenTrace - failed");
+ return CaptureStatus::ERR_TRACER_FAILED;
+ }
+
+ DWORD threadID;
+ processThreadHandle = CreateThread(0, 0, RunProcessTraceThreadFunction, this, 0, &threadID);
+
+ isActive = true;
+ }
+
+ return CaptureStatus::OK;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool ETW::Stop()
+{
+ if (!isActive)
+ {
+ return false;
+ }
+
+ ULONG controlTraceResult = ControlTrace(openedHandle, KERNEL_LOGGER_NAME, traceProperties, EVENT_TRACE_CONTROL_STOP);
+
+ // ERROR_CTX_CLOSE_PENDING(7007L): The call was successful. The ProcessTrace function will stop after it has processed all real-time events in its buffers (it will not receive any new events).
+ // ERROR_BUSY(170L): Prior to Windows Vista, you cannot close the trace until the ProcessTrace function completes.
+ // ERROR_INVALID_HANDLE(6L): One of the following is true: TraceHandle is NULL. TraceHandle is INVALID_HANDLE_VALUE.
+ ULONG closeTraceStatus = CloseTrace(openedHandle);
+
+ // Wait for ProcessThread to finish
+ WaitForSingleObject(processThreadHandle, INFINITE);
+ BOOL wasThreadClosed = CloseHandle(processThreadHandle);
+
+ isActive = false;
+
+ //VS TODO: Disabling resolving of the syscalls - we can't use then as EventDescriptions at the moment
+ //ResolveSysCalls();
+
+ activeThreadsIDs.clear();
+
+ return wasThreadClosed && (closeTraceStatus == ERROR_SUCCESS) && (controlTraceResult == ERROR_SUCCESS);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ETW::~ETW()
+{
+ Stop();
+ Memory::Free(traceProperties);
+ traceProperties = nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Trace* Platform::GetTrace()
+{
+ return &g_ETW;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Symbol Resolving
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#define USE_DBG_HELP (OPTICK_PC)
+
+#if USE_DBG_HELP
+#include <DbgHelp.h>
+#pragma comment( lib, "DbgHelp.Lib" )
+#endif
+
+#include "optick_serialization.h"
+
+#if OPTICK_PC
+#include <psapi.h>
+#else
+// Forward declare kernel functions
+#pragma pack(push,8)
+typedef struct _MODULEINFO {
+ LPVOID lpBaseOfDll;
+ DWORD SizeOfImage;
+ LPVOID EntryPoint;
+} MODULEINFO, *LPMODULEINFO;
+#pragma pack(pop)
+#ifndef EnumProcessModulesEx
+#define EnumProcessModulesEx K32EnumProcessModulesEx
+EXTERN_C DWORD WINAPI K32EnumProcessModulesEx(HANDLE hProcess, HMODULE *lphModule, DWORD cb, LPDWORD lpcbNeeded, DWORD dwFilterFlag);
+#endif
+#ifndef GetModuleInformation
+#define GetModuleInformation K32GetModuleInformation
+EXTERN_C DWORD WINAPI K32GetModuleInformation(HANDLE hProcess, HMODULE hModule, LPMODULEINFO lpmodinfo, DWORD cb);
+#endif
+
+#ifndef GetModuleFileNameExA
+#define GetModuleFileNameExA K32GetModuleFileNameExA
+EXTERN_C DWORD WINAPI K32GetModuleFileNameExA(HANDLE hProcess, HMODULE hModule, LPSTR lpFilename, DWORD nSize);
+#endif
+#endif
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//void ReportLastError()
+//{
+// LPVOID lpMsgBuf;
+// DWORD dw = GetLastError();
+//
+// FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+// NULL, dw, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+// (LPTSTR)&lpMsgBuf, 0, NULL);
+//
+// MessageBox(NULL, (LPCTSTR)lpMsgBuf, TEXT("Error"), MB_OK);
+// LocalFree(lpMsgBuf);
+//}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef array<uintptr_t, 512> CallStackBuffer;
+typedef unordered_map<uint64, Symbol> SymbolCache;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class WinSymbolEngine : public SymbolEngine
+{
+ HANDLE hProcess;
+
+ bool isInitialized;
+
+ bool needRestorePreviousSettings;
+ uint32 previousOptions;
+ static const size_t MAX_SEARCH_PATH_LENGTH = 2048;
+ char previousSearchPath[MAX_SEARCH_PATH_LENGTH];
+
+ SymbolCache cache;
+ vector<Module> modules;
+
+ void InitSystemModules();
+ void InitApplicationModules();
+public:
+ WinSymbolEngine();
+ ~WinSymbolEngine();
+
+ void Init();
+ void Close();
+
+ // Get Symbol from PDB file
+ virtual const Symbol * GetSymbol(uint64 dwAddress) override;
+ virtual const vector<Module>& GetModules() override;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+WinSymbolEngine::WinSymbolEngine() : isInitialized(false), hProcess(GetCurrentProcess()), needRestorePreviousSettings(false), previousOptions(0)
+{
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+WinSymbolEngine::~WinSymbolEngine()
+{
+ Close();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+const Symbol* WinSymbolEngine::GetSymbol(uint64 address)
+{
+ if (address == 0)
+ return nullptr;
+
+ Init();
+
+ Symbol& symbol = cache[address];
+
+ if (symbol.address != 0)
+ return &symbol;
+
+ if (!isInitialized)
+ return nullptr;
+
+ symbol.address = address;
+
+#if USE_DBG_HELP
+ DWORD64 dwAddress = static_cast<DWORD64>(address);
+
+ // FileName and Line
+ IMAGEHLP_LINEW64 lineInfo;
+ memset(&lineInfo, 0, sizeof(IMAGEHLP_LINEW64));
+ lineInfo.SizeOfStruct = sizeof(lineInfo);
+ DWORD dwDisp;
+ if (SymGetLineFromAddrW64(hProcess, dwAddress, &dwDisp, &lineInfo))
+ {
+ symbol.file = lineInfo.FileName;
+ symbol.line = lineInfo.LineNumber;
+ }
+
+ const size_t length = (sizeof(SYMBOL_INFOW) + MAX_SYM_NAME * sizeof(WCHAR) + sizeof(ULONG64) - 1) / sizeof(ULONG64) + 1;
+
+ // Function Name
+ ULONG64 buffer[length];
+ PSYMBOL_INFOW dbgSymbol = (PSYMBOL_INFOW)buffer;
+ memset(dbgSymbol, 0, sizeof(buffer));
+ dbgSymbol->SizeOfStruct = sizeof(SYMBOL_INFOW);
+ dbgSymbol->MaxNameLen = MAX_SYM_NAME;
+
+ DWORD64 offset = 0;
+ if (SymFromAddrW(hProcess, dwAddress, &offset, dbgSymbol))
+ {
+ symbol.function.resize(dbgSymbol->NameLen);
+ memcpy(&symbol.function[0], &dbgSymbol->Name[0], sizeof(WCHAR) * dbgSymbol->NameLen);
+ }
+
+ symbol.offset = static_cast<uintptr_t>(offset);
+#endif
+
+ return &symbol;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+const vector<Module>& WinSymbolEngine::GetModules()
+{
+ if (modules.empty())
+ {
+ InitSystemModules();
+ InitApplicationModules();
+ }
+ return modules;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// const char* USER_SYMBOL_SEARCH_PATH = "http://msdl.microsoft.com/download/symbols";
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void WinSymbolEngine::Init()
+{
+ if (!isInitialized)
+ {
+#if USE_DBG_HELP
+ previousOptions = SymGetOptions();
+
+ memset(previousSearchPath, 0, MAX_SEARCH_PATH_LENGTH);
+ SymGetSearchPath(hProcess, previousSearchPath, MAX_SEARCH_PATH_LENGTH);
+
+ SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_DEFERRED_LOADS | SYMOPT_UNDNAME | SYMOPT_INCLUDE_32BIT_MODULES | SYMOPT_LOAD_ANYTHING);
+ if (!SymInitialize(hProcess, NULL, TRUE))
+ {
+ needRestorePreviousSettings = true;
+ SymCleanup(hProcess);
+
+ if (SymInitialize(hProcess, NULL, TRUE))
+ isInitialized = true;
+ }
+ else
+ {
+ isInitialized = true;
+ }
+
+ const vector<Module>& loadedModules = GetModules();
+ for (size_t i = 0; i < loadedModules.size(); ++i)
+ {
+ const Module& module = loadedModules[i];
+ SymLoadModule64(hProcess, NULL, module.path.c_str(), NULL, (DWORD64)module.address, (DWORD)module.size);
+ }
+
+#else
+ isInitialized = true;
+#endif
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+typedef DWORD(__stdcall *pZwQuerySystemInformation)(DWORD, LPVOID, DWORD, DWORD*);
+#define SystemModuleInformation 11 // SYSTEMINFOCLASS
+#define MAXIMUM_FILENAME_LENGTH 256
+
+struct SYSTEM_MODULE_INFORMATION
+{
+ DWORD reserved1;
+ DWORD reserved2;
+ PVOID mappedBase;
+ PVOID imageBase;
+ DWORD imageSize;
+ DWORD flags;
+ WORD loadOrderIndex;
+ WORD initOrderIndex;
+ WORD loadCount;
+ WORD moduleNameOffset;
+ CHAR imageName[MAXIMUM_FILENAME_LENGTH];
+};
+
+#pragma warning (push)
+#pragma warning(disable : 4200)
+struct MODULE_LIST
+{
+ DWORD dwModules;
+ SYSTEM_MODULE_INFORMATION pModulesInfo[];
+};
+#pragma warning (pop)
+
+void WinSymbolEngine::InitSystemModules()
+{
+ ULONG returnLength = 0;
+ ULONG systemInformationLength = 0;
+ MODULE_LIST* pModuleList = nullptr;
+
+#pragma warning (push)
+#pragma warning(disable : 4191)
+ pZwQuerySystemInformation ZwQuerySystemInformation = (pZwQuerySystemInformation)GetProcAddress(GetModuleHandle(TEXT("ntdll.dll")), "ZwQuerySystemInformation");
+#pragma warning (pop)
+
+ ZwQuerySystemInformation(SystemModuleInformation, pModuleList, systemInformationLength, &returnLength);
+ systemInformationLength = returnLength;
+ pModuleList = (MODULE_LIST*)Memory::Alloc(systemInformationLength);
+ DWORD status = ZwQuerySystemInformation(SystemModuleInformation, pModuleList, systemInformationLength, &returnLength);
+ if (status == ERROR_SUCCESS)
+ {
+ char systemRootPath[MAXIMUM_FILENAME_LENGTH] = { 0 };
+#if OPTICK_PC
+ ExpandEnvironmentStringsA("%SystemRoot%", systemRootPath, MAXIMUM_FILENAME_LENGTH);
+#else
+ strcpy_s(systemRootPath, "C:\\Windows");
+#endif
+
+ const char* systemRootPattern = "\\SystemRoot";
+
+ modules.reserve(modules.size() + pModuleList->dwModules);
+
+ for (uint32_t i = 0; i < pModuleList->dwModules; ++i)
+ {
+ SYSTEM_MODULE_INFORMATION& module = pModuleList->pModulesInfo[i];
+
+ char path[MAXIMUM_FILENAME_LENGTH] = { 0 };
+
+ if (strstr(module.imageName, systemRootPattern) == module.imageName)
+ {
+ strcpy_s(path, systemRootPath);
+ strcat_s(path, module.imageName + strlen(systemRootPattern));
+ }
+ else
+ {
+ strcpy_s(path, module.imageName);
+ }
+
+ modules.push_back(Module(path, (void*)module.imageBase, module.imageSize));
+ }
+ }
+ else
+ {
+ OPTICK_FAILED("Can't query System Module Information!");
+ }
+
+ if (pModuleList)
+ {
+ Memory::Free(pModuleList);
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void WinSymbolEngine::InitApplicationModules()
+{
+ HANDLE processHandle = GetCurrentProcess();
+ HMODULE hModules[256];
+ DWORD modulesSize = 0;
+ EnumProcessModulesEx(processHandle, hModules, sizeof(hModules), &modulesSize, 0);
+
+ int moduleCount = modulesSize / sizeof(HMODULE);
+
+ modules.reserve(modules.size() + moduleCount);
+
+ for (int i = 0; i < moduleCount; ++i)
+ {
+ MODULEINFO info = { 0 };
+ if (GetModuleInformation(processHandle, hModules[i], &info, sizeof(MODULEINFO)))
+ {
+ char name[MAX_PATH] = "UnknownModule";
+ GetModuleFileNameExA(processHandle, hModules[i], name, MAX_PATH);
+
+ modules.push_back(Module(name, info.lpBaseOfDll, info.SizeOfImage));
+ }
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void WinSymbolEngine::Close()
+{
+ if (isInitialized)
+ {
+#if USE_DBG_HELP
+ SymCleanup(hProcess);
+ if (needRestorePreviousSettings)
+ {
+ HANDLE currentProcess = GetCurrentProcess();
+
+ SymSetOptions(previousOptions);
+ SymSetSearchPath(currentProcess, previousSearchPath);
+ SymInitialize(currentProcess, NULL, TRUE);
+
+ needRestorePreviousSettings = false;
+ }
+#endif
+ modules.clear();
+ isInitialized = false;
+ }
+}
+//////////////////////////////////////////////////////////////////////////
+SymbolEngine* Platform::GetSymbolEngine()
+{
+ static WinSymbolEngine pdbSymbolEngine;
+ return &pdbSymbolEngine;
+}
+//////////////////////////////////////////////////////////////////////////
+}
+#endif //OPTICK_ENABLE_TRACING
+#endif //USE_OPTICK
+#endif //_MSC_VER \ No newline at end of file
diff --git a/external/optick/optick_gpu.cpp b/external/optick/optick_gpu.cpp
new file mode 100644
index 0000000..d3610c3
--- /dev/null
+++ b/external/optick/optick_gpu.cpp
@@ -0,0 +1,136 @@
+#include "optick.config.h"
+
+#if USE_OPTICK
+#include "optick_gpu.h"
+#include "optick_core.h"
+#include "optick_memory.h"
+
+#include <thread>
+
+namespace Optick
+{
+ static_assert((1ULL << 32) % GPUProfiler::MAX_QUERIES_COUNT == 0, "(1 << 32) should be a multiple of MAX_QUERIES_COUNT to handle query index overflow!");
+
+
+ GPUProfiler::GPUProfiler() : currentState(STATE_OFF), currentNode(0), frameNumber(0)
+ {
+
+ }
+
+ void GPUProfiler::InitNode(const char *nodeName, uint32_t nodeIndex)
+ {
+ Node* node = Memory::New<Node>();
+ for (int i = 0; i < GPU_QUEUE_COUNT; ++i)
+ {
+ char name[128] = { 0 };
+ sprintf_s(name, "%s [%s]", nodeName, GetGPUQueueName((GPUQueueType)i));
+ node->gpuEventStorage[i] = RegisterStorage(name, uint64_t(-1), ThreadMask::GPU);
+ node->name = nodeName;
+ }
+ nodes[nodeIndex] = node;
+ }
+
+ void GPUProfiler::Start(uint32 /*mode*/)
+ {
+ std::lock_guard<std::recursive_mutex> lock(updateLock);
+ Reset();
+ currentState = STATE_STARTING;
+ }
+
+ void GPUProfiler::Stop(uint32 /*mode*/)
+ {
+ std::lock_guard<std::recursive_mutex> lock(updateLock);
+ currentState = STATE_OFF;
+ }
+
+ void GPUProfiler::Dump(uint32 /*mode*/)
+ {
+ for (size_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
+ {
+ Node* node = nodes[nodeIndex];
+
+ for (int queueIndex = 0; queueIndex < GPU_QUEUE_COUNT; ++queueIndex)
+ {
+ EventBuffer& gpuBuffer = node->gpuEventStorage[queueIndex]->eventBuffer;
+
+ const vector<ThreadEntry*>& threads = Core::Get().GetThreads();
+ for (size_t threadIndex = 0; threadIndex < threads.size(); ++threadIndex)
+ {
+ ThreadEntry* thread = threads[threadIndex];
+ thread->storage.gpuStorage.gpuBuffer[nodeIndex][queueIndex].ForEachChunk([&gpuBuffer](const EventData* events, int count)
+ {
+ gpuBuffer.AddRange(events, count);
+ });
+ }
+ }
+ }
+ }
+
+ string GPUProfiler::GetName() const
+ {
+ return !nodes.empty() ? nodes[0]->name : string();
+ }
+
+ GPUProfiler::~GPUProfiler()
+ {
+ for (Node* node : nodes)
+ Memory::Delete(node);
+ nodes.clear();
+ }
+
+ void GPUProfiler::Reset()
+ {
+ for (uint32_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
+ {
+ Node& node = *nodes[nodeIndex];
+ node.Reset();
+ node.clock = GetClockSynchronization(nodeIndex);
+ }
+ }
+
+ EventData& GPUProfiler::AddFrameEvent()
+ {
+ static const EventDescription* GPUFrameDescription = EventDescription::Create("GPU Frame", __FILE__, __LINE__);
+ EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->eventBuffer.Add();
+ event.description = GPUFrameDescription;
+ event.start = EventTime::INVALID_TIMESTAMP;
+ event.finish = EventTime::INVALID_TIMESTAMP;
+ return event;
+ }
+
+ EventData& GPUProfiler::AddVSyncEvent()
+ {
+ static const EventDescription* VSyncDescription = EventDescription::Create("VSync", __FILE__, __LINE__);
+ EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->eventBuffer.Add();
+ event.description = VSyncDescription;
+ event.start = EventTime::INVALID_TIMESTAMP;
+ event.finish = EventTime::INVALID_TIMESTAMP;
+ return event;
+ }
+
+ TagData<uint32>& GPUProfiler::AddFrameTag()
+ {
+ static const EventDescription* FrameTagDescription = EventDescription::CreateShared("Frame");
+ TagData<uint32>& tag = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->tagU32Buffer.Add();
+ tag.description = FrameTagDescription;
+ tag.timestamp = EventTime::INVALID_TIMESTAMP;
+ tag.data = Core::Get().GetCurrentFrame();
+ return tag;
+ }
+
+ const char * GetGPUQueueName(GPUQueueType queue)
+ {
+ const char* GPUQueueToName[GPU_QUEUE_COUNT] = { "Graphics", "Compute", "Transfer", "VSync" };
+ return GPUQueueToName[queue];
+ }
+
+ void GPUProfiler::Node::Reset()
+ {
+ queryIndex = 0;
+
+ for (size_t frameIndex = 0; frameIndex < queryGpuframes.size(); ++frameIndex)
+ queryGpuframes[frameIndex].Reset();
+ }
+}
+#endif //USE_OPTICK
+
diff --git a/external/optick/optick_gpu.d3d12.cpp b/external/optick/optick_gpu.d3d12.cpp
new file mode 100644
index 0000000..1ee4dd9
--- /dev/null
+++ b/external/optick/optick_gpu.d3d12.cpp
@@ -0,0 +1,382 @@
+#include "optick.config.h"
+#if USE_OPTICK
+#if OPTICK_ENABLE_GPU_D3D12
+
+#include "optick_common.h"
+#include "optick_memory.h"
+#include "optick_core.h"
+#include "optick_gpu.h"
+
+#include <atomic>
+#include <thread>
+
+#include <d3d12.h>
+#include <dxgi.h>
+#include <dxgi1_4.h>
+
+
+#define OPTICK_CHECK(args) do { HRESULT __hr = args; (void)__hr; OPTICK_ASSERT(__hr == S_OK, "Failed check"); } while(false);
+
+namespace Optick
+{
+ class GPUProfilerD3D12 : public GPUProfiler
+ {
+ struct Frame
+ {
+ ID3D12CommandAllocator* commandAllocator;
+ ID3D12GraphicsCommandList* commandList;
+
+ Frame() : commandAllocator(nullptr), commandList(nullptr)
+ {
+ Reset();
+ }
+
+ void Reset()
+ {
+ }
+
+ void Shutdown();
+
+ ~Frame()
+ {
+ Shutdown();
+ }
+ };
+
+ struct NodePayload
+ {
+ ID3D12CommandQueue* commandQueue;
+ ID3D12QueryHeap* queryHeap;
+ ID3D12Fence* syncFence;
+ array<Frame, NUM_FRAMES_DELAY> frames;
+
+ NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {}
+ ~NodePayload();
+ };
+ vector<NodePayload*> nodePayloads;
+
+ ID3D12Resource* queryBuffer;
+ ID3D12Device* device;
+
+ // VSync Stats
+ DXGI_FRAME_STATISTICS prevFrameStatistics;
+
+ //void UpdateRange(uint32_t start, uint32_t finish)
+ void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue);
+
+ void ResolveTimestamps(uint32_t startIndex, uint32_t count);
+
+ void WaitForFrame(uint64_t frameNumber);
+
+ public:
+ GPUProfilerD3D12();
+ ~GPUProfilerD3D12();
+
+ void InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues);
+
+ void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp);
+
+ void Flip(IDXGISwapChain* swapChain);
+
+
+ // Interface implementation
+ ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override;
+
+ void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override
+ {
+ QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp);
+ }
+
+ void Flip(void* swapChain) override
+ {
+ Flip(static_cast<IDXGISwapChain*>(swapChain));
+ }
+ };
+
+ template <class T> void SafeRelease(T **ppT)
+ {
+ if (*ppT)
+ {
+ (*ppT)->Release();
+ *ppT = NULL;
+ }
+ }
+
+ void InitGpuD3D12(void* device, void** cmdQueues, uint32_t numQueues)
+ {
+ GPUProfilerD3D12* gpuProfiler = Memory::New<GPUProfilerD3D12>();
+ gpuProfiler->InitDevice((ID3D12Device*)device, (ID3D12CommandQueue**)cmdQueues, numQueues);
+ Core::Get().InitGPUProfiler(gpuProfiler);
+ }
+
+ GPUProfilerD3D12::GPUProfilerD3D12() : queryBuffer(nullptr), device(nullptr)
+ {
+ prevFrameStatistics = { 0 };
+ }
+
+ GPUProfilerD3D12::~GPUProfilerD3D12()
+ {
+ WaitForFrame(frameNumber - 1);
+
+ for (NodePayload* payload : nodePayloads)
+ Memory::Delete(payload);
+ nodePayloads.clear();
+
+ for (Node* node : nodes)
+ Memory::Delete(node);
+ nodes.clear();
+
+ SafeRelease(&queryBuffer);
+ }
+
+ void GPUProfilerD3D12::InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues)
+ {
+ device = pDevice;
+
+ uint32_t nodeCount = numCommandQueues; // device->GetNodeCount();
+
+ nodes.resize(nodeCount);
+ nodePayloads.resize(nodeCount);
+
+ D3D12_HEAP_PROPERTIES heapDesc;
+ heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+ heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+ heapDesc.CreationNodeMask = 0;
+ heapDesc.VisibleNodeMask = (1u << nodeCount) - 1u;
+ heapDesc.Type = D3D12_HEAP_TYPE_READBACK;
+
+ D3D12_RESOURCE_DESC resourceDesc;
+ resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+ resourceDesc.Alignment = 0;
+ resourceDesc.Width = MAX_QUERIES_COUNT * sizeof(int64_t);
+ resourceDesc.Height = 1;
+ resourceDesc.DepthOrArraySize = 1;
+ resourceDesc.MipLevels = 1;
+ resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
+ resourceDesc.SampleDesc.Count = 1;
+ resourceDesc.SampleDesc.Quality = 0;
+ resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+ resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+ OPTICK_CHECK(device->CreateCommittedResource(
+ &heapDesc,
+ D3D12_HEAP_FLAG_NONE,
+ &resourceDesc,
+ D3D12_RESOURCE_STATE_COPY_DEST,
+ nullptr,
+ IID_PPV_ARGS(&queryBuffer)));
+
+ // Get Device Name
+ LUID adapterLUID = pDevice->GetAdapterLuid();
+
+ IDXGIFactory4* factory;
+ OPTICK_CHECK(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory)));
+
+ IDXGIAdapter1* adapter;
+ factory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&adapter));
+
+ DXGI_ADAPTER_DESC1 desc;
+ adapter->GetDesc1(&desc);
+
+ adapter->Release();
+ factory->Release();
+
+ char deviceName[128] = { 0 };
+ wcstombs_s(deviceName, desc.Description, OPTICK_ARRAY_SIZE(deviceName) - 1);
+
+ for (uint32_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex)
+ InitNodeInternal(deviceName, nodeIndex, pCommandQueues[nodeIndex]);
+ }
+
+ void GPUProfilerD3D12::InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue)
+ {
+ GPUProfiler::InitNode(nodeName, nodeIndex);
+
+ NodePayload* node = Memory::New<NodePayload>();
+ nodePayloads[nodeIndex] = node;
+ node->commandQueue = pCmdQueue;
+
+ D3D12_QUERY_HEAP_DESC queryHeapDesc;
+ queryHeapDesc.Count = MAX_QUERIES_COUNT;
+ queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
+ queryHeapDesc.NodeMask = 1u << nodeIndex;
+ OPTICK_CHECK(device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&node->queryHeap)));
+
+ OPTICK_CHECK(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&node->syncFence)));
+
+ for (Frame& frame : node->frames)
+ {
+ OPTICK_CHECK(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame.commandAllocator)));
+ OPTICK_CHECK(device->CreateCommandList(1u << nodeIndex, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.commandAllocator, nullptr, IID_PPV_ARGS(&frame.commandList)));
+ OPTICK_CHECK(frame.commandList->Close());
+ }
+ }
+
+ void GPUProfilerD3D12::QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp)
+ {
+ if (currentState == STATE_RUNNING)
+ {
+ uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
+ context->EndQuery(nodePayloads[currentNode]->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, index);
+ }
+ }
+
+ void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count)
+ {
+ if (count)
+ {
+ Node* node = nodes[currentNode];
+
+ D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) };
+ void* pData = nullptr;
+ queryBuffer->Map(0, &range, &pData);
+ memcpy(&node->queryGpuTimestamps[startIndex], (uint64_t*)pData + startIndex, sizeof(uint64_t) * count);
+ queryBuffer->Unmap(0, 0);
+
+ // Convert GPU timestamps => CPU Timestamps
+ for (uint32_t index = startIndex; index < startIndex + count; ++index)
+ *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
+ }
+ }
+
+ void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait)
+ {
+ OPTICK_EVENT();
+
+ NodePayload* payload = nodePayloads[currentNode];
+ while (frameNumberToWait > payload->syncFence->GetCompletedValue())
+ {
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ }
+ }
+
+ void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain)
+ {
+ OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug);
+
+ std::lock_guard<std::recursive_mutex> lock(updateLock);
+
+ if (currentState == STATE_STARTING)
+ currentState = STATE_RUNNING;
+
+ if (currentState == STATE_RUNNING)
+ {
+ Node& node = *nodes[currentNode];
+ NodePayload& payload = *nodePayloads[currentNode];
+
+ uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY;
+ uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY;
+
+ //Frame& currentFrame = frames[frameNumber % NUM_FRAMES_DELAY];
+ //Frame& nextFrame = frames[(frameNumber + 1) % NUM_FRAMES_DELAY];
+
+ QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex];
+ QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex];
+
+ ID3D12GraphicsCommandList* commandList = payload.frames[currentFrameIndex].commandList;
+ ID3D12CommandAllocator* commandAllocator = payload.frames[currentFrameIndex].commandAllocator;
+ commandAllocator->Reset();
+ commandList->Reset(commandAllocator, nullptr);
+
+ if (EventData* frameEvent = currentFrame.frameEvent)
+ QueryTimestamp(commandList, &frameEvent->finish);
+
+ // Generate GPU Frame event for the next frame
+ EventData& event = AddFrameEvent();
+ QueryTimestamp(commandList, &event.start);
+ QueryTimestamp(commandList, &AddFrameTag().timestamp);
+ nextFrame.frameEvent = &event;
+
+ uint32_t queryBegin = currentFrame.queryIndexStart;
+ uint32_t queryEnd = node.queryIndex;
+
+ if (queryBegin != (uint32_t)-1)
+ {
+ OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!");
+ currentFrame.queryIndexCount = queryEnd - queryBegin;
+
+ uint32_t startIndex = queryBegin % MAX_QUERIES_COUNT;
+ uint32_t finishIndex = queryEnd % MAX_QUERIES_COUNT;
+
+ if (startIndex < finishIndex)
+ {
+ commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, queryEnd - queryBegin, queryBuffer, startIndex * sizeof(int64_t));
+ }
+ else
+ {
+ commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, MAX_QUERIES_COUNT - startIndex, queryBuffer, startIndex * sizeof(int64_t));
+ commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0);
+ }
+ }
+
+ commandList->Close();
+
+ payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList);
+ payload.commandQueue->Signal(payload.syncFence, frameNumber);
+
+ // Preparing Next Frame
+ // Try resolve timestamps for the current frame
+ if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount)
+ {
+ WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY);
+
+ uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
+ uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount;
+ ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
+ if (resolveFinish > MAX_QUERIES_COUNT)
+ ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT);
+ }
+
+ nextFrame.queryIndexStart = queryEnd;
+ nextFrame.queryIndexCount = 0;
+
+ // Process VSync
+ DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
+ HRESULT result = swapChain->GetFrameStatistics(&currentFrameStatistics);
+ if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount))
+ {
+ EventData& data = AddVSyncEvent();
+ data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
+ data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
+ }
+ prevFrameStatistics = currentFrameStatistics;
+ }
+
+ ++frameNumber;
+ }
+
+ GPUProfiler::ClockSynchronization GPUProfilerD3D12::GetClockSynchronization(uint32_t nodeIndex)
+ {
+ ClockSynchronization clock;
+ clock.frequencyCPU = GetHighPrecisionFrequency();
+ nodePayloads[nodeIndex]->commandQueue->GetTimestampFrequency((uint64_t*)&clock.frequencyGPU);
+ nodePayloads[nodeIndex]->commandQueue->GetClockCalibration((uint64_t*)&clock.timestampGPU, (uint64_t*)&clock.timestampCPU);
+ return clock;
+ }
+
+ GPUProfilerD3D12::NodePayload::~NodePayload()
+ {
+ SafeRelease(&queryHeap);
+ SafeRelease(&syncFence);
+ }
+
+ void GPUProfilerD3D12::Frame::Shutdown()
+ {
+ SafeRelease(&commandAllocator);
+ SafeRelease(&commandList);
+ }
+}
+
+#else
+#include "optick_common.h"
+
+namespace Optick
+{
+ void InitGpuD3D12(void* /*device*/, void** /*cmdQueues*/, uint32_t /*numQueues*/)
+ {
+ OPTICK_FAILED("OPTICK_ENABLE_GPU_D3D12 is disabled! Can't initialize GPU Profiler!");
+ }
+}
+
+#endif //OPTICK_ENABLE_GPU_D3D12
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_gpu.h b/external/optick/optick_gpu.h
new file mode 100644
index 0000000..f028f8a
--- /dev/null
+++ b/external/optick/optick_gpu.h
@@ -0,0 +1,129 @@
+#pragma once
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include <atomic>
+#include <mutex>
+
+#include "optick_common.h"
+#include "optick_memory.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+namespace Optick
+{
+ const char* GetGPUQueueName(GPUQueueType queue);
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class GPUProfiler
+ {
+ public:
+ static const int MAX_FRAME_EVENTS = 1024;
+ static const int NUM_FRAMES_DELAY = 4;
+ static const int MAX_QUERIES_COUNT = (2 * MAX_FRAME_EVENTS) * NUM_FRAMES_DELAY;
+ protected:
+
+ enum State
+ {
+ STATE_OFF,
+ STATE_STARTING,
+ STATE_RUNNING,
+ STATE_FINISHING,
+ };
+
+ struct ClockSynchronization
+ {
+ int64_t frequencyCPU;
+ int64_t frequencyGPU;
+ int64_t timestampCPU;
+ int64_t timestampGPU;
+
+ int64_t GetCPUTimestamp(int64_t gpuTimestamp)
+ {
+ return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU;
+ }
+
+ ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {}
+ };
+
+ struct QueryFrame
+ {
+ EventData* frameEvent;
+ uint32_t queryIndexStart;
+ uint32_t queryIndexCount;
+
+ QueryFrame()
+ {
+ Reset();
+ }
+
+ void Reset()
+ {
+ frameEvent = nullptr;
+ queryIndexStart = (uint32_t)-1;
+ queryIndexCount = 0;
+ }
+ };
+
+ struct Node
+ {
+ array<QueryFrame, NUM_FRAMES_DELAY> queryGpuframes;
+ array<int64_t, MAX_QUERIES_COUNT> queryGpuTimestamps;
+ array<int64_t*, MAX_QUERIES_COUNT> queryCpuTimestamps;
+ std::atomic<uint32_t> queryIndex;
+
+ ClockSynchronization clock;
+
+ array<EventStorage*, GPU_QUEUE_COUNT> gpuEventStorage;
+
+ uint32_t QueryTimestamp(int64_t* outCpuTimestamp)
+ {
+ uint32_t index = queryIndex.fetch_add(1) % MAX_QUERIES_COUNT;
+ queryCpuTimestamps[index] = outCpuTimestamp;
+ return index;
+ }
+
+ string name;
+
+ void Reset();
+
+ Node() : queryIndex(0) { gpuEventStorage.fill(nullptr); }
+ };
+
+ std::recursive_mutex updateLock;
+ volatile State currentState;
+
+ vector<Node*> nodes;
+ uint32_t currentNode;
+
+ uint32_t frameNumber;
+
+ void Reset();
+
+ EventData& AddFrameEvent();
+ EventData& AddVSyncEvent();
+ TagData<uint32>& AddFrameTag();
+
+ public:
+ GPUProfiler();
+
+ // Init
+ virtual void InitNode(const char* nodeName, uint32_t nodeIndex);
+
+ // Capture Controls
+ virtual void Start(uint32 mode);
+ virtual void Stop(uint32 mode);
+ virtual void Dump(uint32 mode);
+
+ virtual string GetName() const;
+
+ // Interface to implement
+ virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0;
+ virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
+ virtual void Flip(void* swapChain) = 0;
+
+ virtual ~GPUProfiler();
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#endif //USE_OPTICK
diff --git a/external/optick/optick_gpu.vulkan.cpp b/external/optick/optick_gpu.vulkan.cpp
new file mode 100644
index 0000000..6d6f29d
--- /dev/null
+++ b/external/optick/optick_gpu.vulkan.cpp
@@ -0,0 +1,365 @@
+#include "optick.config.h"
+
+#if USE_OPTICK
+#if OPTICK_ENABLE_GPU_VULKAN
+#include <vulkan/vulkan.h>
+
+#include "optick_core.h"
+#include "optick_gpu.h"
+
+#define OPTICK_VK_CHECK(args) do { VkResult __hr = args; OPTICK_ASSERT(__hr == VK_SUCCESS, "Failed check"); (void)__hr; } while(false);
+
+namespace Optick
+{
+ class GPUProfilerVulkan : public GPUProfiler
+ {
+ protected:
+ struct Frame
+ {
+ VkCommandBuffer commandBuffer;
+ VkFence fence;
+ Frame() : commandBuffer(VK_NULL_HANDLE), fence(VK_NULL_HANDLE) {}
+ };
+
+ struct NodePayload
+ {
+ VkDevice device;
+ VkPhysicalDevice physicalDevice;
+ VkQueue queue;
+ VkQueryPool queryPool;
+ VkCommandPool commandPool;
+
+ array<Frame, NUM_FRAMES_DELAY> frames;
+
+ NodePayload() : device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {}
+ ~NodePayload();
+ };
+ vector<NodePayload*> nodePayloads;
+
+ void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count);
+ void WaitForFrame(uint64_t frameNumber);
+
+ public:
+ GPUProfilerVulkan();
+ ~GPUProfilerVulkan();
+
+ void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount);
+ void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp);
+
+
+ // Interface implementation
+ ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override;
+
+ void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override
+ {
+ QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp);
+ }
+
+ void Flip(void* swapChain) override;
+ };
+
+ void InitGpuVulkan(void* vkDevices, void* vkPhysicalDevices, void* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues)
+ {
+ GPUProfilerVulkan* gpuProfiler = Memory::New<GPUProfilerVulkan>();
+ gpuProfiler->InitDevice((VkDevice*)vkDevices, (VkPhysicalDevice*)vkPhysicalDevices, (VkQueue*)vkQueues, cmdQueuesFamily, numQueues);
+ Core::Get().InitGPUProfiler(gpuProfiler);
+ }
+
+ GPUProfilerVulkan::GPUProfilerVulkan()
+ {
+ }
+
+ void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount)
+ {
+ VkQueryPoolCreateInfo queryPoolCreateInfo;
+ queryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
+ queryPoolCreateInfo.pNext = 0;
+ queryPoolCreateInfo.flags = 0;
+ queryPoolCreateInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
+ queryPoolCreateInfo.queryCount = MAX_QUERIES_COUNT + 1;
+
+ VkCommandPoolCreateInfo commandPoolCreateInfo;
+ commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ commandPoolCreateInfo.pNext = 0;
+ commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+
+ nodes.resize(nodeCount);
+ nodePayloads.resize(nodeCount);
+
+ VkResult r;
+ for (uint32_t i = 0; i < nodeCount; ++i)
+ {
+ VkPhysicalDeviceProperties properties = { 0 };
+ vkGetPhysicalDeviceProperties(physicalDevices[i], &properties);
+ GPUProfiler::InitNode(properties.deviceName, i);
+
+ NodePayload* nodePayload = Memory::New<NodePayload>();
+ nodePayloads[i] = nodePayload;
+ nodePayload->device = devices[i];
+ nodePayload->physicalDevice = physicalDevices[i];
+ nodePayload->queue = cmdQueues[i];
+
+ r = vkCreateQueryPool(devices[i], &queryPoolCreateInfo, 0, &nodePayload->queryPool);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+
+ commandPoolCreateInfo.queueFamilyIndex = cmdQueuesFamily[i];
+ r = vkCreateCommandPool(nodePayload->device, &commandPoolCreateInfo, 0, &nodePayload->commandPool);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+
+ for (uint32_t j = 0; j < nodePayload->frames.size(); ++j)
+ {
+ Frame& frame = nodePayload->frames[j];
+
+ VkCommandBufferAllocateInfo allocInfo;
+ allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ allocInfo.pNext = 0;
+ allocInfo.commandBufferCount = 1;
+ allocInfo.commandPool = nodePayload->commandPool;
+ allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ r = vkAllocateCommandBuffers(nodePayload->device, &allocInfo, &frame.commandBuffer);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+
+ VkFenceCreateInfo fenceCreateInfo;
+ fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ fenceCreateInfo.pNext = 0;
+ fenceCreateInfo.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT;
+ r = vkCreateFence(nodePayload->device, &fenceCreateInfo, 0, &frame.fence);
+ OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
+ if (j == 0)
+ {
+ VkCommandBufferBeginInfo commandBufferBeginInfo;
+ commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ commandBufferBeginInfo.pNext = 0;
+ commandBufferBeginInfo.pInheritanceInfo = 0;
+ commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ vkBeginCommandBuffer(frame.commandBuffer, &commandBufferBeginInfo);
+ vkCmdResetQueryPool(frame.commandBuffer, nodePayload->queryPool, 0, MAX_QUERIES_COUNT);
+ vkEndCommandBuffer(frame.commandBuffer);
+
+ VkSubmitInfo submitInfo = {};
+ submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submitInfo.pNext = nullptr;
+ submitInfo.waitSemaphoreCount = 0;
+ submitInfo.pWaitSemaphores = nullptr;
+ submitInfo.commandBufferCount = 1;
+ submitInfo.pCommandBuffers = &frame.commandBuffer;
+ submitInfo.signalSemaphoreCount = 0;
+ submitInfo.pSignalSemaphores = nullptr;
+ vkQueueSubmit(nodePayload->queue, 1, &submitInfo, frame.fence);
+ vkWaitForFences(nodePayload->device, 1, &frame.fence, 1, (uint64_t)-1);
+ vkResetCommandBuffer(frame.commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
+ }
+ }
+ }
+ }
+
+ void GPUProfilerVulkan::QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp)
+ {
+ if (currentState == STATE_RUNNING)
+ {
+ uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
+ vkCmdWriteTimestamp(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index);
+ }
+ }
+
+ void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count)
+ {
+ if (count)
+ {
+ Node* node = nodes[currentNode];
+
+ NodePayload* payload = nodePayloads[currentNode];
+
+ OPTICK_VK_CHECK(vkGetQueryPoolResults(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
+ vkCmdResetQueryPool(commandBuffer, payload->queryPool, startIndex, count);
+
+ // Convert GPU timestamps => CPU Timestamps
+ for (uint32_t index = startIndex; index < startIndex + count; ++index)
+ *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
+ }
+ }
+
+ void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait)
+ {
+ OPTICK_EVENT();
+
+ int r = VK_SUCCESS;
+ do
+ {
+ NodePayload& payload = *nodePayloads[currentNode];
+ r = vkWaitForFences(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
+ } while (r != VK_SUCCESS);
+ }
+
+ void GPUProfilerVulkan::Flip(void* /*swapChain*/)
+ {
+ OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug);
+
+ std::lock_guard<std::recursive_mutex> lock(updateLock);
+
+ if (currentState == STATE_STARTING)
+ currentState = STATE_RUNNING;
+
+ if (currentState == STATE_RUNNING)
+ {
+ Node& node = *nodes[currentNode];
+ NodePayload& payload = *nodePayloads[currentNode];
+
+ uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY;
+ uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY;
+
+ QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex];
+ QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex];
+
+ VkCommandBuffer commandBuffer = payload.frames[currentFrameIndex].commandBuffer;
+ VkFence fence = payload.frames[currentFrameIndex].fence;
+ VkDevice device = payload.device;
+ VkQueue queue = payload.queue;
+
+ vkWaitForFences(device, 1, &fence, 1, (uint64_t)-1);
+
+ VkCommandBufferBeginInfo commandBufferBeginInfo;
+ commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ commandBufferBeginInfo.pNext = 0;
+ commandBufferBeginInfo.pInheritanceInfo = 0;
+ commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ OPTICK_VK_CHECK(vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo));
+ vkResetFences(device, 1, &fence);
+
+ if (EventData* frameEvent = currentFrame.frameEvent)
+ QueryTimestamp(commandBuffer, &frameEvent->finish);
+
+ // Generate GPU Frame event for the next frame
+ EventData& event = AddFrameEvent();
+ QueryTimestamp(commandBuffer, &event.start);
+ QueryTimestamp(commandBuffer, &AddFrameTag().timestamp);
+ nextFrame.frameEvent = &event;
+
+ OPTICK_VK_CHECK(vkEndCommandBuffer(commandBuffer));
+ VkSubmitInfo submitInfo = {};
+ submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submitInfo.pNext = nullptr;
+ submitInfo.waitSemaphoreCount = 0;
+ submitInfo.pWaitSemaphores = nullptr;
+ submitInfo.commandBufferCount = 1;
+ submitInfo.pCommandBuffers = &commandBuffer;
+ submitInfo.signalSemaphoreCount = 0;
+ submitInfo.pSignalSemaphores = nullptr;
+ OPTICK_VK_CHECK(vkQueueSubmit(queue, 1, &submitInfo, fence));
+
+ uint32_t queryBegin = currentFrame.queryIndexStart;
+ uint32_t queryEnd = node.queryIndex;
+
+ if (queryBegin != (uint32_t)-1)
+ {
+ currentFrame.queryIndexCount = queryEnd - queryBegin;
+ }
+
+ // Preparing Next Frame
+ // Try resolve timestamps for the current frame
+ if (nextFrame.queryIndexStart != (uint32_t)-1)
+ {
+ uint32_t startIndex = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
+ uint32_t finishIndex = (startIndex + nextFrame.queryIndexCount) % MAX_QUERIES_COUNT;
+
+ if (startIndex < finishIndex)
+ {
+ ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex);
+ }
+ else if (startIndex > finishIndex)
+ {
+ ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex);
+ ResolveTimestamps(commandBuffer, 0, finishIndex);
+ }
+ }
+
+ nextFrame.queryIndexStart = queryEnd;
+ nextFrame.queryIndexCount = 0;
+ }
+
+ ++frameNumber;
+ }
+
+ GPUProfiler::ClockSynchronization GPUProfilerVulkan::GetClockSynchronization(uint32_t nodeIndex)
+ {
+ GPUProfiler::ClockSynchronization clock;
+
+ NodePayload& node = *nodePayloads[nodeIndex];
+ Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY];
+
+ VkCommandBufferBeginInfo commandBufferBeginInfo;
+ commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ commandBufferBeginInfo.pNext = 0;
+ commandBufferBeginInfo.pInheritanceInfo = 0;
+ commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ VkCommandBuffer CB = currentFrame.commandBuffer;
+ VkDevice Device = node.device;
+ VkFence Fence = currentFrame.fence;
+
+ vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
+ vkResetFences(Device, 1, &Fence);
+ vkResetCommandBuffer(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
+ vkBeginCommandBuffer(CB, &commandBufferBeginInfo);
+ vkCmdResetQueryPool(CB, nodePayloads[nodeIndex]->queryPool, 0, 1);
+ vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0);
+ vkEndCommandBuffer(CB);
+
+ VkSubmitInfo submitInfo = {};
+ submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submitInfo.pNext = nullptr;
+ submitInfo.waitSemaphoreCount = 0;
+ submitInfo.pWaitSemaphores = nullptr;
+ submitInfo.commandBufferCount = 1;
+ submitInfo.pCommandBuffers = &CB;
+ submitInfo.signalSemaphoreCount = 0;
+ submitInfo.pSignalSemaphores = nullptr;
+ vkQueueSubmit(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
+ vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
+
+ clock.timestampGPU = 0;
+ vkGetQueryPoolResults(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);
+ clock.timestampCPU = GetHighPrecisionTime();
+ clock.frequencyCPU = GetHighPrecisionFrequency();
+
+ VkPhysicalDeviceProperties Properties;
+ vkGetPhysicalDeviceProperties(nodePayloads[nodeIndex]->physicalDevice, &Properties);
+ clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod);
+
+ return clock;
+ }
+
+ GPUProfilerVulkan::NodePayload::~NodePayload()
+ {
+ vkDestroyCommandPool(device, commandPool, nullptr);
+ vkDestroyQueryPool(device, queryPool, nullptr);
+ }
+
+ GPUProfilerVulkan::~GPUProfilerVulkan()
+ {
+ WaitForFrame(frameNumber - 1);
+
+ for (NodePayload* payload : nodePayloads)
+ {
+ for (Frame& frame : payload->frames)
+ {
+ vkDestroyFence(payload->device, frame.fence, nullptr);
+ vkFreeCommandBuffers(payload->device, payload->commandPool, 1, &frame.commandBuffer);
+ }
+
+ Memory::Delete(payload);
+ }
+
+ nodePayloads.clear();
+ }
+}
+#else
+#include "optick_common.h"
+namespace Optick
+{
+ void InitGpuVulkan(void* /*devices*/, void* /*physicalDevices*/, void* /*cmdQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/)
+ {
+ OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!");
+ }
+}
+#endif //OPTICK_ENABLE_GPU_D3D12
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_memory.h b/external/optick/optick_memory.h
new file mode 100644
index 0000000..45249c6
--- /dev/null
+++ b/external/optick/optick_memory.h
@@ -0,0 +1,419 @@
+#pragma once
+
+#include "optick_common.h"
+
+#if USE_OPTICK
+
+#include <cstring>
+#include <new>
+#include <stdlib.h>
+#include <atomic>
+
+#include <array>
+#include <list>
+#include <string>
+#include <sstream>
+#include <unordered_set>
+#include <unordered_map>
+#include <vector>
+
+namespace Optick
+{
+ class Memory
+ {
+ struct Header
+ {
+ uint64_t size;
+ };
+
+ static std::atomic<uint64_t> memAllocated;
+
+ static void* (*allocate)(size_t);
+ static void (*deallocate)(void* p);
+ public:
+ static OPTICK_INLINE void* Alloc(size_t size)
+ {
+ size_t totalSize = size + sizeof(Header);
+ void *ptr = allocate(totalSize);
+ OPTICK_VERIFY(ptr, "Can't allocate memory", return nullptr);
+
+ Header* header = (Header*)ptr;
+ header->size = totalSize;
+ memAllocated += totalSize;
+
+ return (uint8_t*)ptr + sizeof(Header);
+ }
+
+ static OPTICK_INLINE void Free(void* p)
+ {
+ if (p != nullptr)
+ {
+ uint8_t* basePtr = (uint8_t*)p - sizeof(Header);
+ Header* header = (Header*)basePtr;
+ memAllocated -= header->size;
+ deallocate(basePtr);
+ }
+ }
+
+ static OPTICK_INLINE size_t GetAllocatedSize()
+ {
+ return (size_t)memAllocated;
+ }
+
+ template<class T>
+ static T* New()
+ {
+ return new (Memory::Alloc(sizeof(T))) T();
+ }
+
+ template<class T, class P1>
+ static T* New(P1 p1)
+ {
+ return new (Memory::Alloc(sizeof(T))) T(p1);
+ }
+
+ template<class T, class P1, class P2>
+ static T* New(P1 p1, P2 p2)
+ {
+ return new (Memory::Alloc(sizeof(T))) T(p1, p2);
+ }
+
+ template<class T>
+ static void Delete(T* p)
+ {
+ if (p)
+ {
+ p->~T();
+ Free(p);
+ }
+ }
+
+ static void SetAllocator(void* (*allocateFn)(size_t), void(*deallocateFn)(void*))
+ {
+ allocate = allocateFn;
+ deallocate = deallocateFn;
+ }
+
+ template<typename T>
+ struct Allocator : public std::allocator<T>
+ {
+ Allocator() {}
+ template<class U>
+ Allocator(const Allocator<U>&) {}
+ template<typename U> struct rebind { typedef Allocator<U> other; };
+
+ typename std::allocator<T>::pointer allocate(typename std::allocator<T>::size_type n, typename std::allocator<void>::const_pointer = 0)
+ {
+ return reinterpret_cast<typename std::allocator<T>::pointer>(Memory::Alloc(n * sizeof(T)));
+ }
+
+ void deallocate(typename std::allocator<T>::pointer p, typename std::allocator<T>::size_type)
+ {
+ Memory::Free(p);
+ }
+ };
+ };
+
+ // std::* section
+ template <typename T, size_t _Size> class array : public std::array<T, _Size>{};
+ template <typename T> class vector : public std::vector<T, Memory::Allocator<T>>{};
+ template <typename T> class list : public std::list<T, Memory::Allocator<T>>{};
+ template <typename T> class unordered_set : public std::unordered_set<T, std::hash<T>, std::equal_to<T>, Memory::Allocator<T>>{};
+ template <typename T, typename V> class unordered_map : public std::unordered_map<T, V, std::hash<T>, std::equal_to<T>, Memory::Allocator<std::pair<const T, V>>>{};
+
+ using string = std::basic_string<char, std::char_traits<char>, Memory::Allocator<char>>;
+ using wstring = std::basic_string<wchar_t, std::char_traits<wchar_t>, Memory::Allocator<wchar_t>>;
+
+ using istringstream = std::basic_istringstream<char, std::char_traits<char>, Memory::Allocator<char>>;
+ using ostringstream = std::basic_ostringstream<char, std::char_traits<char>, Memory::Allocator<char>>;
+ using stringstream = std::basic_stringstream<char, std::char_traits<char>, Memory::Allocator<char>>;
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ template<class T, uint32 SIZE>
+ struct MemoryChunk
+ {
+ OPTICK_ALIGN_CACHE T data[SIZE];
+ MemoryChunk* next;
+ MemoryChunk* prev;
+
+ MemoryChunk() : next(0), prev(0) {}
+
+ ~MemoryChunk()
+ {
+ MemoryChunk* chunk = this;
+ while (chunk->next)
+ chunk = chunk->next;
+
+ while (chunk != this)
+ {
+ MemoryChunk* toDelete = chunk;
+ chunk = toDelete->prev;
+ Memory::Delete(toDelete);
+ }
+
+ if (prev != nullptr)
+ {
+ prev->next = nullptr;
+ prev = nullptr;
+ }
+ }
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ template<class T, uint32 SIZE = 16>
+ class MemoryPool
+ {
+ typedef MemoryChunk<T, SIZE> Chunk;
+ Chunk* root;
+ Chunk* chunk;
+ uint32 index;
+
+ OPTICK_INLINE void AddChunk()
+ {
+ index = 0;
+ if (!chunk || !chunk->next)
+ {
+ Chunk* newChunk = Memory::New<Chunk>();
+ if (chunk)
+ {
+ chunk->next = newChunk;
+ newChunk->prev = chunk;
+ chunk = newChunk;
+ }
+ else
+ {
+ root = chunk = newChunk;
+ }
+ }
+ else
+ {
+ chunk = chunk->next;
+ }
+ }
+ public:
+ MemoryPool() : root(nullptr), chunk(nullptr), index(SIZE) {}
+
+ OPTICK_INLINE T& Add()
+ {
+ if (index >= SIZE)
+ AddChunk();
+
+ return chunk->data[index++];
+ }
+
+ OPTICK_INLINE T& Add(const T& item)
+ {
+ return Add() = item;
+ }
+
+ OPTICK_INLINE T* AddRange(const T* items, size_t count, bool allowOverlap = true)
+ {
+ if (count == 0 || (count > SIZE && !allowOverlap))
+ return nullptr;
+
+ if (count >= (SIZE - index) && !allowOverlap)
+ {
+ AddChunk();
+ }
+
+ T* result = &chunk->data[index];
+
+ while (count)
+ {
+ size_t numLeft = SIZE - index;
+ size_t numCopy = numLeft < count ? numLeft : count;
+ std::memcpy(&chunk->data[index], items, sizeof(T) * numCopy);
+
+ count -= numCopy;
+ items += numCopy;
+ index += (uint32_t)numCopy;
+
+ if (count)
+ AddChunk();
+ }
+
+ return result;
+ }
+
+
+ OPTICK_INLINE T* TryAdd(int count)
+ {
+ if (index + count <= SIZE)
+ {
+ T* res = &chunk->data[index];
+ index += count;
+ return res;
+ }
+
+ return nullptr;
+ }
+
+ OPTICK_INLINE T* Back()
+ {
+ if (chunk && index > 0)
+ return &chunk->data[index - 1];
+
+ if (chunk && chunk->prev != nullptr)
+ return &chunk->prev->data[SIZE - 1];
+
+ return nullptr;
+ }
+
+ OPTICK_INLINE size_t Size() const
+ {
+ if (root == nullptr)
+ return 0;
+
+ size_t count = 0;
+
+ for (const Chunk* it = root; it != chunk; it = it->next)
+ count += SIZE;
+
+ return count + index;
+ }
+
+ OPTICK_INLINE bool IsEmpty() const
+ {
+ return (chunk == nullptr) || (chunk == root && index == 0);
+ }
+
+ OPTICK_INLINE void Clear(bool preserveMemory = true)
+ {
+ if (!preserveMemory)
+ {
+ if (root)
+ {
+ Memory::Delete(root);
+ root = nullptr;
+ chunk = nullptr;
+ index = SIZE;
+ }
+ }
+ else if (root)
+ {
+ index = 0;
+ chunk = root;
+ }
+ }
+
+ class const_iterator
+ {
+ void advance()
+ {
+ if (chunkIndex < SIZE - 1)
+ {
+ ++chunkIndex;
+ }
+ else
+ {
+ chunkPtr = chunkPtr->next;
+ chunkIndex = 0;
+ }
+ }
+ public:
+ typedef const_iterator self_type;
+ typedef T value_type;
+ typedef T& reference;
+ typedef T* pointer;
+ typedef int difference_type;
+ const_iterator(const Chunk* ptr, size_t index) : chunkPtr(ptr), chunkIndex(index) { }
+ self_type operator++()
+ {
+ self_type i = *this;
+ advance();
+ return i;
+ }
+ self_type operator++(int /*junk*/)
+ {
+ advance();
+ return *this;
+ }
+ reference operator*() { return (reference)chunkPtr->data[chunkIndex]; }
+ const pointer operator->() { return &chunkPtr->data[chunkIndex]; }
+ bool operator==(const self_type& rhs) { return (chunkPtr == rhs.chunkPtr) && (chunkIndex == rhs.chunkIndex); }
+ bool operator!=(const self_type& rhs) { return (chunkPtr != rhs.chunkPtr) || (chunkIndex != rhs.chunkIndex); }
+ private:
+ const Chunk* chunkPtr;
+ size_t chunkIndex;
+ };
+
+ const_iterator begin() const
+ {
+ return const_iterator(root, 0);
+ }
+
+ const_iterator end() const
+ {
+ return const_iterator(chunk, index);
+ }
+
+ template<class Func>
+ void ForEach(Func func) const
+ {
+ for (const Chunk* it = root; it != chunk; it = it->next)
+ for (uint32 i = 0; i < SIZE; ++i)
+ func(it->data[i]);
+
+ if (chunk)
+ for (uint32 i = 0; i < index; ++i)
+ func(chunk->data[i]);
+ }
+
+ template<class Func>
+ void ForEach(Func func)
+ {
+ for (Chunk* it = root; it != chunk; it = it->next)
+ for (uint32 i = 0; i < SIZE; ++i)
+ func(it->data[i]);
+
+ if (chunk)
+ for (uint32 i = 0; i < index; ++i)
+ func(chunk->data[i]);
+ }
+
+ template<class Func>
+ void ForEachChunk(Func func) const
+ {
+ for (const Chunk* it = root; it != chunk; it = it->next)
+ func(it->data, SIZE);
+
+ if (chunk)
+ func(chunk->data, index);
+ }
+
+ void ToArray(T* destination) const
+ {
+ uint32 curIndex = 0;
+
+ for (const Chunk* it = root; it != chunk; it = it->next)
+ {
+ memcpy(&destination[curIndex], it->data, sizeof(T) * SIZE);
+ curIndex += SIZE;
+ }
+
+ if (chunk && index > 0)
+ {
+ memcpy(&destination[curIndex], chunk->data, sizeof(T) * index);
+ }
+ }
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ template<uint32 CHUNK_SIZE>
+ class MemoryBuffer : private MemoryPool<uint8, CHUNK_SIZE>
+ {
+ public:
+ template<class U>
+ U* Add(U* data, size_t size, bool allowOverlap = true)
+ {
+ return (U*)(MemoryPool<uint8, CHUNK_SIZE>::AddRange((uint8*)data, size, allowOverlap));
+ }
+
+ template<class T>
+ T* Add(const T& val, bool allowOverlap = true)
+ {
+ return static_cast<T*>(Add(&val, sizeof(T), allowOverlap));
+ }
+ };
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_message.cpp b/external/optick/optick_message.cpp
new file mode 100644
index 0000000..b421d50
--- /dev/null
+++ b/external/optick/optick_message.cpp
@@ -0,0 +1,172 @@
+#include "optick.config.h"
+
+#if USE_OPTICK
+#include "optick_common.h"
+#include "optick_core.h"
+#include "optick_message.h"
+#include "optick_server.h"
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct MessageHeader
+{
+ uint32 mark;
+ uint32 length;
+
+ static const uint32 MESSAGE_MARK = 0xB50FB50F;
+
+ bool IsValid() const { return mark == MESSAGE_MARK; }
+
+ MessageHeader() : mark(0), length(0) {}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class MessageFactory
+{
+ typedef IMessage* (*MessageCreateFunction)(InputDataStream& str);
+ MessageCreateFunction factory[IMessage::COUNT];
+
+ template<class T>
+ void RegisterMessage()
+ {
+ factory[T::GetMessageType()] = T::Create;
+ }
+
+ MessageFactory()
+ {
+ memset(&factory[0], 0, sizeof(MessageCreateFunction));
+
+ RegisterMessage<StartMessage>();
+ RegisterMessage<StopMessage>();
+ RegisterMessage<CancelMessage>();
+ RegisterMessage<TurnSamplingMessage>();
+
+ for (uint32 msg = 0; msg < IMessage::COUNT; ++msg)
+ {
+ OPTICK_ASSERT(factory[msg] != nullptr, "Message is not registered to factory");
+ }
+ }
+public:
+ static MessageFactory& Get()
+ {
+ static MessageFactory instance;
+ return instance;
+ }
+
+ IMessage* Create(InputDataStream& str)
+ {
+ MessageHeader header;
+ str.Read(header);
+
+ size_t length = str.Length();
+
+ uint16 applicationID = 0;
+ uint16 messageType = IMessage::COUNT;
+
+ str >> applicationID;
+ str >> messageType;
+
+ OPTICK_VERIFY( 0 <= messageType && messageType < IMessage::COUNT && factory[messageType] != nullptr, "Unknown message type!", return nullptr )
+
+ IMessage* result = factory[messageType](str);
+
+ if (header.length + str.Length() != length)
+ {
+ OPTICK_FAILED("Message Stream is corrupted! Invalid Protocol?")
+ return nullptr;
+ }
+
+ return result;
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator<<(OutputDataStream& os, const DataResponse& val)
+{
+ return os << val.version << (uint32)val.type;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IMessage* IMessage::Create(InputDataStream& str)
+{
+ MessageHeader header;
+
+ while (str.Peek(header))
+ {
+ if (header.IsValid())
+ {
+ if (str.Length() < header.length + sizeof(MessageHeader))
+ break; // Not enough data yet
+
+ return MessageFactory::Get().Create(str);
+ }
+ else
+ {
+ // Some garbage in the stream?
+ str.Skip(1);
+ }
+ }
+
+ return nullptr;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void StartMessage::Apply()
+{
+ Core& core = Core::Get();
+ core.SetSettings(settings);
+ core.StartCapture();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+IMessage* StartMessage::Create(InputDataStream& stream)
+{
+ StartMessage* msg = Memory::New<StartMessage>();
+ CaptureSettings& settings = msg->settings;
+ stream >> settings.mode
+ >> settings.categoryMask
+ >> settings.samplingFrequency
+ >> settings.frameLimit
+ >> settings.timeLimitUs
+ >> settings.spikeLimitUs
+ >> settings.memoryLimitMb;
+ string password;
+ stream >> settings.password;
+ settings.password = base64_decode(password);
+ return msg;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void StopMessage::Apply()
+{
+ Core::Get().DumpCapture();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+IMessage* StopMessage::Create(InputDataStream&)
+{
+ return Memory::New<StopMessage>();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void CancelMessage::Apply()
+{
+ Core::Get().CancelCapture();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+IMessage* CancelMessage::Create(InputDataStream&)
+{
+ return Memory::New<CancelMessage>();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+IMessage* TurnSamplingMessage::Create( InputDataStream& stream )
+{
+ TurnSamplingMessage* msg = Memory::New<TurnSamplingMessage>();
+ stream >> msg->index;
+ stream >> msg->isSampling;
+ return msg;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void TurnSamplingMessage::Apply()
+{
+ // Backward compatibility
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_message.h b/external/optick/optick_message.h
new file mode 100644
index 0000000..a6d553e
--- /dev/null
+++ b/external/optick/optick_message.h
@@ -0,0 +1,130 @@
+#pragma once
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include "optick_common.h"
+#include "optick_serialization.h"
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+static const uint32 NETWORK_PROTOCOL_VERSION = 24;
+static const uint16 NETWORK_APPLICATION_ID = 0xB50F;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct DataResponse
+{
+ enum Type : uint16
+ {
+ FrameDescriptionBoard = 0, // DescriptionBoard for Instrumental Frames
+ EventFrame = 1, // Instrumental Data
+ SamplingFrame = 2, // Sampling Data
+ NullFrame = 3, // Last Fame Mark
+ ReportProgress = 4, // Report Current Progress
+ Handshake = 5, // Handshake Response
+ Reserved_0 = 6,
+ SynchronizationData = 7, // Synchronization Data for the thread
+ TagsPack = 8, // Pack of tags
+ CallstackDescriptionBoard = 9, // DescriptionBoard with resolved function addresses
+ CallstackPack = 10, // Pack of CallStacks
+ Reserved_1 = 11,
+ Reserved_2 = 12,
+ Reserved_3 = 13,
+ Reserved_4 = 14,
+ //...
+ Reserved_255 = 255,
+
+ FiberSynchronizationData = 1 << 8, // Synchronization Data for the Fibers
+ SyscallPack,
+ SummaryPack,
+ };
+
+ uint32 version;
+ uint32 size;
+ Type type;
+ uint16 application;
+
+ DataResponse(Type t, uint32 s) : version(NETWORK_PROTOCOL_VERSION), size(s), type(t), application(NETWORK_APPLICATION_ID){}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+OutputDataStream& operator << (OutputDataStream& os, const DataResponse& val);
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class IMessage
+{
+public:
+ enum Type : uint16
+ {
+ Start,
+ Stop,
+ Cancel,
+ TurnSampling,
+ COUNT,
+ };
+
+ virtual void Apply() = 0;
+ virtual ~IMessage() {}
+
+ static IMessage* Create( InputDataStream& str );
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<IMessage::Type MESSAGE_TYPE>
+class Message : public IMessage
+{
+ enum { id = MESSAGE_TYPE };
+public:
+ static uint32 GetMessageType() { return id; }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct CaptureSettings
+{
+ // Capture Mode
+ uint32 mode;
+ // Category Filter
+ uint32 categoryMask;
+ // Tracer: Sampling Frequency
+ uint32 samplingFrequency;
+ // Max Duration for a capture (frames)
+ uint32 frameLimit;
+ // Max Duration for a capture (us)
+ uint32 timeLimitUs;
+ // Max Duration for a capture (us)
+ uint32 spikeLimitUs;
+ // Max Memory for a capture (MB)
+ uint64 memoryLimitMb;
+ // Tracer: Root Password for the Device
+ string password;
+
+ CaptureSettings() : mode(0), categoryMask(0), samplingFrequency(0), frameLimit(0), timeLimitUs(0), spikeLimitUs(0), memoryLimitMb(0) {}
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct StartMessage : public Message<IMessage::Start>
+{
+ CaptureSettings settings;
+ static IMessage* Create(InputDataStream&);
+ virtual void Apply() override;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct StopMessage : public Message<IMessage::Stop>
+{
+ static IMessage* Create(InputDataStream&);
+ virtual void Apply() override;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct CancelMessage : public Message<IMessage::Cancel>
+{
+ static IMessage* Create(InputDataStream&);
+ virtual void Apply() override;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct TurnSamplingMessage : public Message<IMessage::TurnSampling>
+{
+ int32 index;
+ byte isSampling;
+
+ static IMessage* Create(InputDataStream& stream);
+ virtual void Apply() override;
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_serialization.cpp b/external/optick/optick_serialization.cpp
new file mode 100644
index 0000000..a47a9b7
--- /dev/null
+++ b/external/optick/optick_serialization.cpp
@@ -0,0 +1,178 @@
+#include "optick.config.h"
+
+#if USE_OPTICK
+
+#include "optick_common.h"
+#include "optick_serialization.h"
+
+namespace Optick
+{
+ string OutputDataStream::GetData()
+ {
+ flush();
+ return str();
+ }
+
+ OutputDataStream & OutputDataStream::Write(const char * buffer, size_t size)
+ {
+ write(buffer, size);
+ return *this;
+ }
+
+ OutputDataStream OutputDataStream::Empty;
+
+ OutputDataStream &operator << ( OutputDataStream &stream, const char* val )
+ {
+ uint32 length = val == nullptr ? 0 : (uint32)strlen(val);
+ stream << length;
+
+ if (length > 0)
+ {
+ stream.write( val, length );
+ }
+ return stream;
+ }
+
+ OutputDataStream &operator << ( OutputDataStream &stream, int val )
+ {
+ stream.write( (char*)&val, sizeof(int) );
+ return stream;
+ }
+
+ OutputDataStream &operator << ( OutputDataStream &stream, int64 val )
+ {
+ stream.write( (char*)&val, sizeof(int64) );
+ return stream;
+ }
+
+ OutputDataStream &operator << ( OutputDataStream &stream, char val )
+ {
+ stream.write( (char*)&val, sizeof(char) );
+ return stream;
+ }
+
+ OutputDataStream &operator << (OutputDataStream &stream, int8 val)
+ {
+ stream.write((char*)&val, sizeof(val));
+ return stream;
+ }
+
+ OutputDataStream &operator << ( OutputDataStream &stream, byte val )
+ {
+ stream.write( (char*)&val, sizeof(byte) );
+ return stream;
+ }
+
+ OutputDataStream & operator<<(OutputDataStream &stream, uint64 val)
+ {
+ stream.write( (char*)&val, sizeof(uint64) );
+ return stream;
+ }
+
+ OutputDataStream & operator<<(OutputDataStream &stream, uint32 val)
+ {
+ stream.write( (char*)&val, sizeof(uint32) );
+ return stream;
+ }
+
+ OutputDataStream & operator<<(OutputDataStream &stream, float val)
+ {
+ stream.write((char*)&val, sizeof(float));
+ return stream;
+ }
+
+ OutputDataStream & operator<<(OutputDataStream &stream, const string& val)
+ {
+ stream << (uint32)val.length();
+ if (!val.empty())
+ stream.write(&val[0], sizeof(val[0]) * val.length());
+ return stream;
+ }
+
+ OutputDataStream & operator<<(OutputDataStream &stream, const wstring& val)
+ {
+ size_t count = val.length() * sizeof(wchar_t);
+ stream << (uint32)count;
+ if (!val.empty())
+ stream.write((char*)(&val[0]), count);
+ return stream;
+ }
+
+ InputDataStream &operator >> (InputDataStream &stream, int16 &val)
+ {
+ stream.read((char*)&val, sizeof(int16));
+ return stream;
+ }
+
+ InputDataStream &operator >> ( InputDataStream &stream, int32 &val )
+ {
+ stream.read( (char*)&val, sizeof(int) );
+ return stream;
+ }
+
+ InputDataStream &operator >> ( InputDataStream &stream, int64 &val )
+ {
+ stream.read( (char*)&val, sizeof(int64) );
+ return stream;
+ }
+
+ InputDataStream & operator>>( InputDataStream &stream, byte &val )
+ {
+ stream.read( (char*)&val, sizeof(byte) );
+ return stream;
+ }
+
+ InputDataStream & operator >> (InputDataStream &stream, uint16 &val)
+ {
+ stream.read((char*)&val, sizeof(uint16));
+ return stream;
+ }
+
+ InputDataStream & operator>>( InputDataStream &stream, uint32 &val )
+ {
+ stream.read( (char*)&val, sizeof(uint32) );
+ return stream;
+ }
+
+ InputDataStream & operator>>( InputDataStream &stream, uint64 &val )
+ {
+ stream.read( (char*)&val, sizeof(uint64) );
+ return stream;
+ }
+
+ InputDataStream & operator >> ( InputDataStream &stream, string &val)
+ {
+ int32 length = 0;
+ stream >> length;
+ val.resize(length + 1);
+ stream.read( (char*)&val[0], length);
+ return stream;
+ }
+
+ InputDataStream::InputDataStream() :
+ stringstream( ios_base::in | ios_base::out )
+ {
+ }
+
+ void InputDataStream::Append(const char *buffer, size_t length)
+ {
+ write( buffer, length );
+ }
+
+ size_t InputDataStream::Length()
+ {
+ return (size_t)(tellp() - tellg());
+ }
+
+ bool InputDataStream::Skip(size_t length)
+ {
+ bool result = Length() <= length;
+ seekg(length, ios_base::cur);
+ return result;
+ }
+
+
+
+}
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_serialization.h b/external/optick/optick_serialization.h
new file mode 100644
index 0000000..91de32d
--- /dev/null
+++ b/external/optick/optick_serialization.h
@@ -0,0 +1,120 @@
+#pragma once
+#include "optick_common.h"
+
+#if USE_OPTICK
+#include "optick_memory.h"
+
+#if defined(OPTICK_MSVC)
+#pragma warning( push )
+
+//C4250. inherits 'std::basic_ostream'
+#pragma warning( disable : 4250 )
+
+//C4127. Conditional expression is constant
+#pragma warning( disable : 4127 )
+#endif
+
+namespace Optick
+{
+ class OutputDataStream : private ostringstream
+ {
+ public:
+ static OutputDataStream Empty;
+ // Move constructor rocks!
+ // Beware of one copy here(do not use it in performance critical parts)
+ string GetData();
+
+ // It is important to make private inheritance in order to avoid collision with default operator implementation
+ friend OutputDataStream &operator << ( OutputDataStream &stream, const char* val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, int val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, uint64 val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, uint32 val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, int64 val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, char val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, byte val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, int8 val);
+ friend OutputDataStream &operator << ( OutputDataStream &stream, float val);
+ friend OutputDataStream &operator << ( OutputDataStream &stream, const string& val );
+ friend OutputDataStream &operator << ( OutputDataStream &stream, const wstring& val );
+
+ OutputDataStream& Write(const char* buffer, size_t size);
+ };
+
+ template<class T>
+ OutputDataStream& operator<<(OutputDataStream &stream, const vector<T>& val)
+ {
+ stream << (uint32)val.size();
+
+ for(auto it = val.begin(); it != val.end(); ++it)
+ {
+ const T& element = *it;
+ stream << element;
+ }
+
+ return stream;
+ }
+
+ template<class T, uint32 N>
+ OutputDataStream& operator<<(OutputDataStream &stream, const MemoryPool<T, N>& val)
+ {
+ stream << (uint32)val.Size();
+
+ val.ForEach([&](const T& data)
+ {
+ stream << data;
+ });
+
+ return stream;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class InputDataStream : private stringstream {
+ public:
+ bool CanRead() { return !eof(); }
+
+ InputDataStream();
+
+ void Append(const char *buffer, size_t length);
+ bool Skip(size_t length);
+ size_t Length();
+
+ template<class T>
+ bool Peek(T& data)
+ {
+ if (Length() < sizeof(T))
+ return false;
+
+ pos_type currentPos = tellg();
+ read((char*)&data, sizeof(T));
+ seekg(currentPos);
+ return true;
+ }
+
+ template<class T>
+ bool Read(T& data)
+ {
+ if (Length() < sizeof(T))
+ return false;
+
+ read((char*)&data, sizeof(T));
+ return true;
+ }
+
+ friend InputDataStream &operator >> (InputDataStream &stream, byte &val );
+ friend InputDataStream &operator >> (InputDataStream &stream, int16 &val);
+ friend InputDataStream &operator >> (InputDataStream &stream, uint16 &val);
+ friend InputDataStream &operator >> (InputDataStream &stream, int32 &val );
+ friend InputDataStream &operator >> (InputDataStream &stream, uint32 &val );
+ friend InputDataStream &operator >> (InputDataStream &stream, int64 &val );
+ friend InputDataStream &operator >> (InputDataStream &stream, uint64 &val );
+ friend InputDataStream &operator >> (InputDataStream &stream, string &val);
+ };
+
+
+}
+
+#if defined(OPTICK_MSVC)
+#pragma warning( pop )
+#endif
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_server.cpp b/external/optick/optick_server.cpp
new file mode 100644
index 0000000..7596d3a
--- /dev/null
+++ b/external/optick/optick_server.cpp
@@ -0,0 +1,338 @@
+#include "optick.config.h"
+
+#if USE_OPTICK
+#include "optick_server.h"
+#include "optick_common.h"
+
+#if defined(OPTICK_MSVC)
+#define USE_WINDOWS_SOCKETS (1)
+#else
+#define USE_BERKELEY_SOCKETS (1)
+#endif
+#define SOCKET_PROTOCOL_TCP (6)
+#if defined(USE_BERKELEY_SOCKETS)
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+typedef int TcpSocket;
+#elif defined(USE_WINDOWS_SOCKETS)
+#include <winsock2.h>
+#include <basetsd.h>
+typedef UINT_PTR TcpSocket;
+#else
+#error Platform not supported
+#endif
+
+
+#if defined(OPTICK_MSVC)
+#pragma comment( lib, "ws2_32.lib" )
+#endif
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+static const short DEFAULT_PORT = 31318;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(USE_WINDOWS_SOCKETS)
+class Wsa
+{
+ bool isInitialized;
+ WSADATA data;
+
+ Wsa()
+ {
+ isInitialized = WSAStartup(0x0202, &data) == ERROR_SUCCESS;
+ OPTICK_ASSERT(isInitialized, "Can't initialize WSA");
+ }
+
+ ~Wsa()
+ {
+ if (isInitialized)
+ {
+ WSACleanup();
+ }
+ }
+public:
+ static bool Init()
+ {
+ static Wsa wsa;
+ return wsa.isInitialized;
+ }
+};
+#endif
+
+
+inline bool IsValidSocket(TcpSocket socket)
+{
+#if defined(USE_WINDOWS_SOCKETS)
+ if (socket == INVALID_SOCKET)
+ {
+ return false;
+ }
+#else
+ if (socket < 0)
+ {
+ return false;
+ }
+#endif
+ return true;
+}
+
+inline void CloseSocket(TcpSocket& socket)
+{
+#if defined(USE_WINDOWS_SOCKETS)
+ closesocket(socket);
+ socket = INVALID_SOCKET;
+#else
+ close(socket);
+ socket = -1;
+#endif
+}
+
+inline bool SetSocketBlockingMode(TcpSocket socket, bool isBlocking)
+{
+#if defined(USE_WINDOWS_SOCKETS)
+ unsigned long mode = isBlocking ? 0 : 1;
+ return (ioctlsocket(socket, FIONBIO, &mode) == 0) ? true : false;
+#else
+#if defined(OPTICK_OSX) || defined(OPTICK_LINUX)
+ int flags = fcntl(socket, F_GETFL, 0);
+ if (flags < 0) return false;
+ flags = isBlocking ? (flags & ~O_NONBLOCK) : (flags | O_NONBLOCK);
+ return (fcntl(socket, F_SETFL, flags) == 0) ? true : false;
+#else
+ int nonblocking = isBlocking ? 0 : 1;
+ return setsockopt((int)socket, SOL_SOCKET, 0x1200, (char*)&nonblocking, sizeof(nonblocking)) == 0;
+#endif
+#endif
+}
+
+
+class Socket
+{
+ TcpSocket acceptSocket;
+ TcpSocket listenSocket;
+ sockaddr_in address;
+
+ fd_set recieveSet;
+
+ std::recursive_mutex socketLock;
+ wstring errorMessage;
+
+ void Close()
+ {
+ if (!IsValidSocket(listenSocket))
+ {
+ CloseSocket(listenSocket);
+ }
+ }
+
+ bool Bind(short port)
+ {
+ address.sin_family = AF_INET;
+ address.sin_addr.s_addr = INADDR_ANY;
+ address.sin_port = htons(port);
+
+ if (::bind(listenSocket, (sockaddr *)&address, sizeof(address)) == 0)
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ void Disconnect()
+ {
+ std::lock_guard<std::recursive_mutex> lock(socketLock);
+
+ if (!IsValidSocket(acceptSocket))
+ {
+ CloseSocket(acceptSocket);
+ }
+ }
+public:
+ Socket() : acceptSocket((TcpSocket)-1), listenSocket((TcpSocket)-1)
+ {
+#if defined(USE_WINDOWS_SOCKETS)
+ Wsa::Init();
+#endif
+ listenSocket = ::socket(AF_INET, SOCK_STREAM, SOCKET_PROTOCOL_TCP);
+ OPTICK_ASSERT(IsValidSocket(listenSocket), "Can't create socket");
+
+ SetSocketBlockingMode(listenSocket, false);
+ }
+
+ ~Socket()
+ {
+ Disconnect();
+ Close();
+ }
+
+ bool Bind(short startPort, short portRange)
+ {
+ for (short port = startPort; port < startPort + portRange; ++port)
+ {
+ int result = Bind(port);
+
+ if (result == false)
+ continue;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ void Listen()
+ {
+ int result = ::listen(listenSocket, 8);
+ if (result != 0)
+ {
+ OPTICK_FAILED("Can't start listening");
+ }
+ }
+
+ bool Accept()
+ {
+ TcpSocket incomingSocket = ::accept(listenSocket, nullptr, nullptr);
+
+ if (IsValidSocket(incomingSocket))
+ {
+ std::lock_guard<std::recursive_mutex> lock(socketLock);
+ acceptSocket = incomingSocket;
+ SetSocketBlockingMode(acceptSocket, true);
+ }
+
+ return IsValidSocket(acceptSocket);
+ }
+
+ bool Send(const char *buf, size_t len)
+ {
+ std::lock_guard<std::recursive_mutex> lock(socketLock);
+
+ if (!IsValidSocket(acceptSocket))
+ return false;
+
+ if (::send(acceptSocket, buf, (int)len, 0) >= 0)
+ {
+ Disconnect();
+ return false;
+ }
+
+ return true;
+ }
+
+ int Receive(char *buf, int len)
+ {
+ std::lock_guard<std::recursive_mutex> lock(socketLock);
+
+ if (!IsValidSocket(acceptSocket))
+ return 0;
+
+ FD_ZERO(&recieveSet);
+ FD_SET(acceptSocket, &recieveSet);
+
+ static timeval lim = { 0, 0 };
+
+#if defined(USE_BERKELEY_SOCKETS)
+ if (::select(acceptSocket + 1, &recieveSet, nullptr, nullptr, &lim) == 1)
+#elif defined(USE_WINDOWS_SOCKETS)
+ if (::select(0, &recieveSet, nullptr, nullptr, &lim) == 1)
+#else
+#error Platform not supported
+#endif
+ {
+ return ::recv(acceptSocket, buf, len, 0);
+ }
+
+ return 0;
+ }
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Server::Server(short port) : socket(Memory::New<Socket>())
+{
+ if (!socket->Bind(port, 4))
+ {
+ OPTICK_FAILED("Failed to bind a socket! Most probably the port is blocked by anti-virus! Change the port and verify that your game has enough permissions to communicate over the TCP\IP.");
+ }
+ else
+ {
+ socket->Listen();
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Server::Update()
+{
+ std::lock_guard<std::recursive_mutex> lock(socketLock);
+
+ if (!InitConnection())
+ return;
+
+ int length = -1;
+ while ( (length = socket->Receive( buffer, BIFFER_SIZE ) ) > 0 )
+ {
+ networkStream.Append(buffer, length);
+ }
+
+ while (IMessage *message = IMessage::Create(networkStream))
+ {
+ message->Apply();
+ Memory::Delete(message);
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void Server::Send(DataResponse::Type type, OutputDataStream& stream)
+{
+ std::lock_guard<std::recursive_mutex> lock(socketLock);
+
+ string data = stream.GetData();
+
+ DataResponse response(type, (uint32)data.size());
+ socket->Send((char*)&response, sizeof(response));
+ socket->Send(data.c_str(), data.size());
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool Server::InitConnection()
+{
+ return socket->Accept();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+string Server::GetHostName() const
+{
+ const uint32 HOST_NAME_LENGTH = 256;
+ char hostname[HOST_NAME_LENGTH] = { 0 };
+
+#if defined(USE_BERKELEY_SOCKETS)
+#if defined(OPTICK_LINUX) || defined(OPTICK_OSX)
+ gethostname(hostname, HOST_NAME_LENGTH);
+#endif
+#elif defined(OPTICK_PC)
+ DWORD length = HOST_NAME_LENGTH;
+ GetComputerNameA(hostname, &length);
+#endif
+
+ return hostname;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Server::~Server()
+{
+ if (socket)
+ {
+ Memory::Delete(socket);
+ socket = nullptr;
+ }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+Server & Server::Get()
+{
+ static Server instance(DEFAULT_PORT);
+ return instance;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+}
+
+#endif //USE_OPTICK \ No newline at end of file
diff --git a/external/optick/optick_server.h b/external/optick/optick_server.h
new file mode 100644
index 0000000..b44153e
--- /dev/null
+++ b/external/optick/optick_server.h
@@ -0,0 +1,42 @@
+#pragma once
+#include "optick.config.h"
+
+#if USE_OPTICK
+#include "optick_message.h"
+
+#include <mutex>
+#include <thread>
+
+namespace Optick
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class Socket;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class Server
+{
+ InputDataStream networkStream;
+
+ static const int BIFFER_SIZE = 1024;
+ char buffer[BIFFER_SIZE];
+
+ Socket* socket;
+
+ std::recursive_mutex socketLock;
+
+ Server( short port );
+ ~Server();
+
+ bool InitConnection();
+
+public:
+ void Send(DataResponse::Type type, OutputDataStream& stream = OutputDataStream::Empty);
+ void Update();
+
+ string GetHostName() const;
+
+ static Server &Get();
+};
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+}
+
+#endif //USE_OPTICK \ No newline at end of file