summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt23
-rw-r--r--src/video_core/buffer_cache/buffer_base.h2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h38
-rw-r--r--src/video_core/control/channel_state.cpp2
-rw-r--r--src/video_core/control/channel_state_cache.h2
-rw-r--r--src/video_core/engines/engine_upload.cpp11
-rw-r--r--src/video_core/engines/engine_upload.h2
-rw-r--r--src/video_core/engines/fermi_2d.cpp26
-rw-r--r--src/video_core/engines/fermi_2d.h11
-rw-r--r--src/video_core/engines/kepler_compute.cpp6
-rw-r--r--src/video_core/engines/kepler_compute.h14
-rw-r--r--src/video_core/engines/kepler_memory.cpp6
-rw-r--r--src/video_core/engines/maxwell_3d.cpp166
-rw-r--r--src/video_core/engines/maxwell_3d.h94
-rw-r--r--src/video_core/engines/maxwell_dma.cpp82
-rw-r--r--src/video_core/engines/maxwell_dma.h3
-rw-r--r--src/video_core/engines/puller.cpp14
-rw-r--r--src/video_core/engines/sw_blitter/blitter.cpp238
-rw-r--r--src/video_core/engines/sw_blitter/blitter.h27
-rw-r--r--src/video_core/engines/sw_blitter/converter.cpp1234
-rw-r--r--src/video_core/engines/sw_blitter/converter.h36
-rw-r--r--src/video_core/engines/sw_blitter/generate_converters.py136
-rw-r--r--src/video_core/gpu.h18
-rw-r--r--src/video_core/gpu_thread.cpp2
-rw-r--r--src/video_core/gpu_thread.h1
-rw-r--r--src/video_core/host1x/syncpoint_manager.cpp6
-rw-r--r--src/video_core/host1x/syncpoint_manager.h12
-rw-r--r--src/video_core/macro/macro_hle.cpp16
-rw-r--r--src/video_core/macro/macro_interpreter.cpp2
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp1
-rw-r--r--src/video_core/precompiled_headers.h6
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp90
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h78
-rw-r--r--src/video_core/renderer_null/renderer_null.cpp24
-rw-r--r--src/video_core/renderer_null/renderer_null.h36
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h1
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp9
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h14
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp55
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp1
-rw-r--r--src/video_core/shader_cache.h1
-rw-r--r--src/video_core/shader_environment.cpp1
-rw-r--r--src/video_core/shader_environment.h2
-rw-r--r--src/video_core/surface.cpp27
-rw-r--r--src/video_core/surface.h4
-rw-r--r--src/video_core/texture_cache/formatter.cpp1
-rw-r--r--src/video_core/texture_cache/formatter.h2
-rw-r--r--src/video_core/texture_cache/render_targets.h2
-rw-r--r--src/video_core/texture_cache/slot_vector.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h29
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h11
-rw-r--r--src/video_core/textures/astc.cpp1
-rw-r--r--src/video_core/textures/decoders.cpp3
-rw-r--r--src/video_core/transform_feedback.cpp1
-rw-r--r--src/video_core/video_core.cpp4
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp288
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h18
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp28
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.cpp38
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp68
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h49
90 files changed, 2759 insertions, 522 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d7f7d336c..b9bad63ac 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -28,6 +28,10 @@ add_library(video_core STATIC
dirty_flags.h
dma_pusher.cpp
dma_pusher.h
+ engines/sw_blitter/blitter.cpp
+ engines/sw_blitter/blitter.h
+ engines/sw_blitter/converter.cpp
+ engines/sw_blitter/converter.h
engines/const_buffer_info.h
engines/engine_interface.h
engines/engine_upload.cpp
@@ -80,6 +84,7 @@ add_library(video_core STATIC
gpu_thread.h
memory_manager.cpp
memory_manager.h
+ precompiled_headers.h
pte_kind.h
query_cache.h
rasterizer_accelerated.cpp
@@ -87,6 +92,10 @@ add_library(video_core STATIC
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
+ renderer_null/null_rasterizer.cpp
+ renderer_null/null_rasterizer.h
+ renderer_null/renderer_null.cpp
+ renderer_null/renderer_null.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_compute_pipeline.cpp
@@ -275,9 +284,15 @@ if (MSVC)
/we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
)
else()
- target_compile_options(video_core PRIVATE
- -Werror=conversion
+ if (APPLE)
+ # error: declaration shadows a typedef in 'interval_base_set<SubType, DomainT, Compare, Interval, Alloc>'
+ # error: implicit conversion loses integer precision: 'int' to 'boost::icl::bound_type' (aka 'unsigned char')
+ target_compile_options(video_core PRIVATE -Wno-shadow -Wno-unused-local-typedef)
+ else()
+ target_compile_options(video_core PRIVATE -Werror=conversion)
+ endif()
+ target_compile_options(video_core PRIVATE
-Wno-sign-conversion
)
@@ -296,3 +311,7 @@ endif()
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
target_link_libraries(video_core PRIVATE dynarmic)
endif()
+
+if (YUZU_USE_PRECOMPILED_HEADERS)
+ target_precompile_headers(video_core PRIVATE precompiled_headers.h)
+endif()
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index f9a6472cf..92d77eef2 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -535,7 +535,7 @@ private:
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
- const u64 word_end = std::min(word_begin + num_query_words, NumWords());
+ const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords());
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2ba33543c..6881b34c4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -19,6 +19,7 @@
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/microprofile.h"
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
@@ -992,7 +993,20 @@ void BufferCache<P>::BindHostIndexBuffer() {
TouchBuffer(buffer, index_buffer.buffer_id);
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
const u32 size = index_buffer.size;
- SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
+ if (maxwell3d->inline_index_draw_indexes.size()) {
+ if constexpr (USE_MEMORY_MAPS) {
+ auto upload_staging = runtime.UploadStagingBuffer(size);
+ std::array<BufferCopy, 1> copies{
+ {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
+ std::memcpy(upload_staging.mapped_span.data(),
+ maxwell3d->inline_index_draw_indexes.data(), size);
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+ } else {
+ buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes);
+ }
+ } else {
+ SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
+ }
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const u32 new_offset = offset + maxwell3d->regs.index_buffer.first *
maxwell3d->regs.index_buffer.FormatSizeInBytes();
@@ -1275,7 +1289,15 @@ void BufferCache<P>::UpdateIndexBuffer() {
}
flags[Dirty::IndexBuffer] = false;
last_index_count = index_array.count;
-
+ if (maxwell3d->inline_index_draw_indexes.size()) {
+ auto inline_index_size = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size());
+ index_buffer = Binding{
+ .cpu_addr = 0,
+ .size = inline_index_size,
+ .buffer_id = CreateBuffer(0, inline_index_size),
+ };
+ return;
+ }
const GPUVAddr gpu_addr_begin = index_array.StartAddress();
const GPUVAddr gpu_addr_end = index_array.EndAddress();
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
@@ -1491,6 +1513,14 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
VAddr end = cpu_addr + wanted_size;
int stream_score = 0;
bool has_stream_leap = false;
+ if (begin == 0) {
+ return OverlapResult{
+ .ids = std::move(overlap_ids),
+ .begin = begin,
+ .end = end,
+ .has_stream_leap = has_stream_leap,
+ };
+ }
for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE);
cpu_addr += YUZU_PAGESIZE) {
const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS];
@@ -1713,12 +1743,12 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
if constexpr (USE_MEMORY_MAPS) {
+ auto upload_staging = runtime.UploadStagingBuffer(copy_size);
std::array copies{BufferCopy{
- .src_offset = 0,
+ .src_offset = upload_staging.offset,
.dst_offset = buffer.Offset(dest_address),
.size = copy_size,
}};
- auto upload_staging = runtime.UploadStagingBuffer(copy_size);
u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index cdecc3a91..832025d75 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) {
ASSERT(memory_manager);
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
- fermi_2d = std::make_unique<Engines::Fermi2D>();
+ fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index 584a0c26c..cdaf4f8d5 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -35,8 +35,6 @@ public:
explicit ChannelInfo(Tegra::Control::ChannelState& state);
ChannelInfo(const ChannelInfo& state) = delete;
ChannelInfo& operator=(const ChannelInfo&) = delete;
- ChannelInfo(ChannelInfo&& other) = default;
- ChannelInfo& operator=(ChannelInfo&& other) = default;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index a34819234..e4f8331ab 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -49,13 +49,12 @@ void State::ProcessData(std::span<const u8> read_buffer) {
if (regs.line_count == 1) {
rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
} else {
- for (u32 line = 0; line < regs.line_count; ++line) {
- const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
- memory_manager.WriteBlockUnsafe(
- dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
- regs.line_length_in);
+ for (size_t line = 0; line < regs.line_count; ++line) {
+ const GPUVAddr dest_line = address + line * regs.dest.pitch;
+ std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in,
+ regs.line_length_in);
+ rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
}
- memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
}
} else {
u32 width = regs.dest.width;
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index f08f6e36a..94fafd9dc 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -39,7 +39,7 @@ struct Registers {
u32 y;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
u32 BlockWidth() const {
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 453e0fb01..c6478ae85 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -3,17 +3,25 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "video_core/engines/fermi_2d.h"
-#include "video_core/memory_manager.h"
+#include "video_core/engines/sw_blitter/blitter.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
+#include "video_core/textures/decoders.h"
+
+MICROPROFILE_DECLARE(GPU_BlitEngine);
+MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128));
using VideoCore::Surface::BytesPerBlock;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
namespace Tegra::Engines {
-Fermi2D::Fermi2D() {
+using namespace Texture;
+
+Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
+ sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
@@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
}
void Fermi2D::Blit() {
+ MICROPROFILE_SCOPE(GPU_BlitEngine);
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
regs.src.Address(), regs.dst.Address());
@@ -52,9 +61,16 @@ void Fermi2D::Blit() {
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
const auto& args = regs.pixels_from_memory;
+ constexpr s64 null_derivate = 1ULL << 32;
+ Surface src = regs.src;
+ const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+ const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
+ src.format != regs.dst.format;
Config config{
.operation = regs.operation,
.filter = args.sample_mode.filter,
+ .must_accelerate =
+ args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu,
.dst_x0 = args.dst_x0,
.dst_y0 = args.dst_y0,
.dst_x1 = args.dst_x0 + args.dst_width,
@@ -64,8 +80,7 @@ void Fermi2D::Blit() {
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
- Surface src = regs.src;
- const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+
const auto need_align_to_pitch =
src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
static_cast<s32>(src.width) == config.src_x1 &&
@@ -78,8 +93,9 @@ void Fermi2D::Blit() {
config.src_x1 -= config.src_x0;
config.src_x0 = 0;
}
+
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
- UNIMPLEMENTED();
+ sw_blitter->Blit(src, regs.dst, config);
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 1229aa35b..100b21bac 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,6 +5,7 @@
#include <array>
#include <cstddef>
+#include <memory>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -21,6 +22,10 @@ class RasterizerInterface;
namespace Tegra::Engines {
+namespace Blitter {
+class SoftwareBlitEngine;
+}
+
/**
* This Engine is known as G80_2D. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
@@ -32,7 +37,7 @@ namespace Tegra::Engines {
class Fermi2D final : public EngineInterface {
public:
- explicit Fermi2D();
+ explicit Fermi2D(MemoryManager& memory_manager_);
~Fermi2D() override;
/// Binds a rasterizer to this engine.
@@ -92,7 +97,7 @@ public:
u32 addr_lower;
[[nodiscard]] constexpr GPUVAddr Address() const noexcept {
- return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
+ return (GPUVAddr{addr_upper} << 32) | GPUVAddr{addr_lower};
}
};
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
@@ -286,6 +291,7 @@ public:
struct Config {
Operation operation;
Filter filter;
+ bool must_accelerate;
s32 dst_x0;
s32 dst_y0;
s32 dst_x1;
@@ -298,6 +304,7 @@ public:
private:
VideoCore::RasterizerInterface* rasterizer = nullptr;
+ std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 7c50bdbe0..e5c622155 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -50,11 +50,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
u32 methods_pending) {
switch (method) {
case KEPLER_COMPUTE_REG_INDEX(data_upload):
- upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+ upload_state.ProcessData(base_start, amount);
return;
default:
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ for (u32 i = 0; i < amount; i++) {
+ CallMethod(method, base_start[i], methods_pending - i <= 1);
}
break;
}
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index aab309ecc..e154e3f06 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -68,7 +68,7 @@ public:
struct {
u32 address;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+ return GPUVAddr{address} << 8;
}
} launch_desc_loc;
@@ -83,8 +83,7 @@ public:
u32 address_low;
u32 limit;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
} tsc;
@@ -95,8 +94,7 @@ public:
u32 address_low;
u32 limit;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
} tic;
@@ -106,8 +104,7 @@ public:
u32 address_high;
u32 address_low;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
} code_loc;
@@ -162,8 +159,7 @@ public:
BitField<15, 17, u32> size;
};
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
- address_low);
+ return (GPUVAddr{address_high.Value()} << 32) | GPUVAddr{address_low};
}
};
std::array<ConstBufferConfig, NumConstBuffers> const_buffer_config;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index a3fbab1e5..08045d1cf 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -42,11 +42,11 @@ void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount
u32 methods_pending) {
switch (method) {
case KEPLERMEMORY_REG_INDEX(data):
- upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+ upload_state.ProcessData(base_start, amount);
return;
default:
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ for (u32 i = 0; i < amount; i++) {
+ CallMethod(method, base_start[i], methods_pending - i <= 1);
}
break;
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index d502d181c..34bbc72cf 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -217,22 +217,25 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
regs.index_buffer.count = regs.index_buffer32_first.count;
regs.index_buffer.first = regs.index_buffer32_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ draw_indexed = true;
return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer16_first):
regs.index_buffer.count = regs.index_buffer16_first.count;
regs.index_buffer.first = regs.index_buffer16_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ draw_indexed = true;
return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer8_first):
regs.index_buffer.count = regs.index_buffer8_first.count;
regs.index_buffer.first = regs.index_buffer8_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ draw_indexed = true;
return ProcessDraw();
case MAXWELL3D_REG_INDEX(topology_override):
use_topology_override = true;
return;
case MAXWELL3D_REG_INDEX(clear_surface):
- return ProcessClearBuffers();
+ return ProcessClearBuffers(1);
case MAXWELL3D_REG_INDEX(report_semaphore.query):
return ProcessQueryGet();
case MAXWELL3D_REG_INDEX(render_enable.mode):
@@ -249,9 +252,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return;
case MAXWELL3D_REG_INDEX(fragment_barrier):
return rasterizer->FragmentBarrier();
- case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
- rasterizer->InvalidateGPUCache();
- return rasterizer->WaitForIdle();
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier();
}
@@ -288,31 +288,63 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
+ const u32 argument = ProcessShadowRam(method, method_argument);
+ ProcessDirtyRegisters(method, argument);
+
if (draw_command[method]) {
regs.reg_array[method] = method_argument;
deferred_draw_method.push_back(method);
- auto u32_to_u8 = [&](const u32 argument) {
- inline_index_draw_indexes.push_back(static_cast<u8>(argument & 0x000000ff));
- inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x0000ff00) >> 8));
- inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x00ff0000) >> 16));
- inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0xff000000) >> 24));
+ auto update_inline_index = [&](const u32 index) {
+ inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
+ inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
+ inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x00ff0000) >> 16));
+ inline_index_draw_indexes.push_back(static_cast<u8>((index & 0xff000000) >> 24));
+ draw_mode = DrawMode::InlineIndex;
};
- if (MAXWELL3D_REG_INDEX(draw_inline_index) == method) {
- u32_to_u8(method_argument);
- } else if (MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method) {
- u32_to_u8(regs.inline_index_2x16.even);
- u32_to_u8(regs.inline_index_2x16.odd);
- } else if (MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
- u32_to_u8(regs.inline_index_4x8.index0);
- u32_to_u8(regs.inline_index_4x8.index1);
- u32_to_u8(regs.inline_index_4x8.index2);
- u32_to_u8(regs.inline_index_4x8.index3);
+ switch (method) {
+ case MAXWELL3D_REG_INDEX(draw.begin): {
+ draw_mode =
+ (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
+ (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged)
+ ? DrawMode::Instance
+ : DrawMode::General;
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(draw.end):
+ switch (draw_mode) {
+ case DrawMode::General:
+ ProcessDraw();
+ break;
+ case DrawMode::InlineIndex:
+ regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4);
+ regs.index_buffer.format = Regs::IndexFormat::UnsignedInt;
+ draw_indexed = true;
+ ProcessDraw();
+ inline_index_draw_indexes.clear();
+ break;
+ case DrawMode::Instance:
+ break;
+ }
+ break;
+ case MAXWELL3D_REG_INDEX(index_buffer.count):
+ draw_indexed = true;
+ break;
+ case MAXWELL3D_REG_INDEX(draw_inline_index):
+ update_inline_index(method_argument);
+ break;
+ case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
+ update_inline_index(regs.inline_index_2x16.even);
+ update_inline_index(regs.inline_index_2x16.odd);
+ break;
+ case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
+ update_inline_index(regs.inline_index_4x8.index0);
+ update_inline_index(regs.inline_index_4x8.index1);
+ update_inline_index(regs.inline_index_4x8.index2);
+ update_inline_index(regs.inline_index_4x8.index3);
+ break;
}
} else {
ProcessDeferredDraw();
-
- const u32 argument = ProcessShadowRam(method, method_argument);
- ProcessDirtyRegisters(method, argument);
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
}
@@ -345,11 +377,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
ProcessCBMultiData(base_start, amount);
break;
case MAXWELL3D_REG_INDEX(inline_data):
- upload_state.ProcessData(base_start, static_cast<size_t>(amount));
+ upload_state.ProcessData(base_start, amount);
return;
default:
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ for (u32 i = 0; i < amount; i++) {
+ CallMethod(method, base_start[i], methods_pending - i <= 1);
}
break;
}
@@ -511,10 +543,7 @@ void Maxwell3D::ProcessCounterReset() {
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
- const u32 cache_flush = regs.sync_info.clean_l2.Value();
- if (cache_flush != 0) {
- rasterizer->InvalidateGPUCache();
- }
+ [[maybe_unused]] const u32 cache_flush = regs.sync_info.clean_l2.Value();
rasterizer->SignalSyncPoint(sync_point);
}
@@ -596,87 +625,44 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
-void Maxwell3D::ProcessClearBuffers() {
- rasterizer->Clear();
+void Maxwell3D::ProcessClearBuffers(u32 layer_count) {
+ rasterizer->Clear(layer_count);
}
void Maxwell3D::ProcessDraw(u32 instance_count) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
- regs.vertex_buffer.count);
-
- ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?");
-
- // Both instance configuration registers can not be set at the same time.
- ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First ||
- regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged,
- "Illegal combination of instancing parameters");
+ draw_indexed ? regs.index_buffer.count : regs.vertex_buffer.count);
ProcessTopologyOverride();
- const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count;
if (ShouldExecute()) {
- rasterizer->Draw(is_indexed, instance_count);
+ rasterizer->Draw(draw_indexed, instance_count);
}
- if (is_indexed) {
- regs.index_buffer.count = 0;
- } else {
- regs.vertex_buffer.count = 0;
- }
+ draw_indexed = false;
+ deferred_draw_method.clear();
}
void Maxwell3D::ProcessDeferredDraw() {
- if (deferred_draw_method.empty()) {
+ if (draw_mode != DrawMode::Instance || deferred_draw_method.empty()) {
return;
}
- enum class DrawMode {
- Undefined,
- General,
- Instance,
- };
- DrawMode draw_mode{DrawMode::Undefined};
- u32 method_count = static_cast<u32>(deferred_draw_method.size());
- u32 method = deferred_draw_method[method_count - 1];
- if (MAXWELL3D_REG_INDEX(draw.end) != method) {
- return;
- }
- draw_mode = (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
- (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged)
- ? DrawMode::Instance
- : DrawMode::General;
- u32 instance_count = 0;
- if (draw_mode == DrawMode::Instance) {
- u32 vertex_buffer_count = 0;
- u32 index_buffer_count = 0;
- for (u32 index = 0; index < method_count; ++index) {
- method = deferred_draw_method[index];
- if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) {
- instance_count = ++vertex_buffer_count;
- } else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
- instance_count = ++index_buffer_count;
- }
- }
- ASSERT_MSG(!(vertex_buffer_count && index_buffer_count),
- "Instance both indexed and direct?");
- } else {
- instance_count = 1;
- for (u32 index = 0; index < method_count; ++index) {
- method = deferred_draw_method[index];
- if (MAXWELL3D_REG_INDEX(draw_inline_index) == method ||
- MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method ||
- MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
- regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4);
- regs.index_buffer.format = Regs::IndexFormat::UnsignedInt;
- break;
- }
+ const auto method_count = deferred_draw_method.size();
+ u32 instance_count = 1;
+ u32 vertex_buffer_count = 0;
+ u32 index_buffer_count = 0;
+ for (size_t index = 0; index < method_count; ++index) {
+ const u32 method = deferred_draw_method[index];
+ if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) {
+ instance_count = ++vertex_buffer_count;
+ } else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
+ instance_count = ++index_buffer_count;
}
}
+ ASSERT_MSG(!(vertex_buffer_count && index_buffer_count), "Instance both indexed and direct?");
ProcessDraw(instance_count);
-
- deferred_draw_method.clear();
- inline_index_draw_indexes.clear();
}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 34b085388..a541cd95f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -96,8 +96,7 @@ public:
u32 type;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -106,8 +105,7 @@ public:
u32 address_low;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -124,8 +122,7 @@ public:
Mode mode;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(offset_high) << 32) |
- offset_low);
+ return (GPUVAddr{offset_high} << 32) | GPUVAddr{offset_low};
}
};
@@ -187,7 +184,7 @@ public:
default:
// Thresholds begin at 0x10 (1 << 4)
// Threshold is in the range 0x1 to 0x13
- return 1 << (4 + threshold.Value() - 1);
+ return 1U << (4 + threshold.Value() - 1);
}
}
};
@@ -468,8 +465,7 @@ public:
INSERT_PADDING_BYTES_NOINIT(0xC);
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
static_assert(sizeof(Buffer) == 0x20);
@@ -511,12 +507,11 @@ public:
u32 default_size_per_warp;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
u64 Size() const {
- return (static_cast<u64>(size_high) << 32) | size_low;
+ return (u64{size_high} << 32) | u64{size_low};
}
};
@@ -538,13 +533,11 @@ public:
u32 storage_limit_address_low;
GPUVAddr StorageAddress() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(storage_address_high) << 32) |
- storage_address_low);
+ return (GPUVAddr{storage_address_high} << 32) | GPUVAddr{storage_address_low};
}
GPUVAddr StorageLimitAddress() const {
- return static_cast<GPUVAddr>(
- (static_cast<GPUVAddr>(storage_limit_address_high) << 32) |
- storage_limit_address_low);
+ return (GPUVAddr{storage_limit_address_high} << 32) |
+ GPUVAddr{storage_limit_address_low};
}
};
@@ -829,11 +822,11 @@ public:
struct CompressionThresholdSamples {
u32 samples;
- u32 Samples() {
+ u32 Samples() const {
if (samples == 0) {
return 0;
}
- return 1 << (samples - 1);
+ return 1U << (samples - 1);
}
};
@@ -1138,8 +1131,7 @@ public:
INSERT_PADDING_BYTES_NOINIT(0x18);
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
static_assert(sizeof(RenderTargetConfig) == 0x40);
@@ -1482,8 +1474,7 @@ public:
u32 address_low;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -1533,8 +1524,7 @@ public:
u32 address_low;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -1561,8 +1551,7 @@ public:
u32 array_pitch;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -1910,8 +1899,7 @@ public:
Mode mode;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -1921,8 +1909,7 @@ public:
u32 limit;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -1932,8 +1919,7 @@ public:
u32 limit;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -1981,8 +1967,7 @@ public:
u32 address_low;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -2027,8 +2012,7 @@ public:
u32 address_low;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -2224,19 +2208,16 @@ public:
}
GPUVAddr StartAddress() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_addr_high) << 32) |
- start_addr_low);
+ return (GPUVAddr{start_addr_high} << 32) | GPUVAddr{start_addr_low};
}
GPUVAddr EndAddress() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_addr_high) << 32) |
- limit_addr_low);
+ return (GPUVAddr{limit_addr_high} << 32) | GPUVAddr{limit_addr_low};
}
/// Adjust the index buffer offset so it points to the first desired index.
GPUVAddr IndexStart() const {
- return StartAddress() +
- static_cast<size_t>(first) * static_cast<size_t>(FormatSizeInBytes());
+ return StartAddress() + size_t{first} * size_t{FormatSizeInBytes()};
}
};
@@ -2464,8 +2445,7 @@ public:
} query;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -2479,8 +2459,7 @@ public:
u32 frequency;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
bool IsEnabled() const {
@@ -2494,8 +2473,7 @@ public:
u32 address_low;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
static_assert(sizeof(VertexStreamLimit) == 0x8);
@@ -2543,8 +2521,7 @@ public:
std::array<u32, NumCBData> buffer;
GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
+ return (GPUVAddr{address_high} << 32) | GPUVAddr{address_low};
}
};
@@ -3086,6 +3063,9 @@ public:
std::vector<u8> inline_index_draw_indexes;
+ /// Handles a write to the CLEAR_BUFFERS register.
+ void ProcessClearBuffers(u32 layer_count);
+
private:
void InitializeRegisterDefaults();
@@ -3120,9 +3100,6 @@ private:
/// Handles firmware blob 4
void ProcessFirmwareCall4();
- /// Handles a write to the CLEAR_BUFFERS register.
- void ProcessClearBuffers();
-
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
@@ -3148,10 +3125,12 @@ private:
/// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
void ProcessTopologyOverride();
- void ProcessDraw(u32 instance_count = 1);
-
+ /// Handles deferred draw(e.g., instance draw).
void ProcessDeferredDraw();
+ /// Handles a draw.
+ void ProcessDraw(u32 instance_count = 1);
+
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
@@ -3178,6 +3157,9 @@ private:
std::array<bool, Regs::NUM_REGS> draw_command{};
std::vector<u32> deferred_draw_method;
+ enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
+ DrawMode draw_mode{DrawMode::General};
+ bool draw_indexed{};
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 54523a4b2..a189e60ae 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -41,8 +41,8 @@ void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call)
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) {
- for (size_t i = 0; i < amount; ++i) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ for (u32 i = 0; i < amount; ++i) {
+ CallMethod(method, base_start[i], methods_pending - i <= 1);
}
}
@@ -62,7 +62,8 @@ void MaxwellDMA::Launch() {
if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
- UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
+ CopyBlockLinearToBlockLinear();
+ ReleaseSemaphore();
return;
}
@@ -93,14 +94,14 @@ void MaxwellDMA::Launch() {
reinterpret_cast<u8*>(tmp_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
- auto convert_linear_2_blocklinear_addr = [](u64 address) {
+ const auto convert_linear_2_blocklinear_addr = [](u64 address) {
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
((address & 0x180) >> 1) | ((address & 0x20) << 3);
};
- auto src_kind = memory_manager.GetPageKind(regs.offset_in);
- auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
- const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
- const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
+ const auto src_kind = memory_manager.GetPageKind(regs.offset_in);
+ const auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
+ const bool is_src_pitch = IsPitchKind(src_kind);
+ const bool is_dst_pitch = IsPitchKind(dst_kind);
if (!is_src_pitch && is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
@@ -291,6 +292,70 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
+void MaxwellDMA::CopyBlockLinearToBlockLinear() {
+ UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
+
+ const bool is_remapping = regs.launch_dma.remap_enable != 0;
+
+ // Deswizzle the input and copy it over.
+ const Parameters& src = regs.src_params;
+ const Parameters& dst = regs.dst_params;
+
+ const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
+ const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
+
+ const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
+
+ u32 src_width = src.width;
+ u32 dst_width = dst.width;
+ u32 x_elements = regs.line_length_in;
+ u32 src_x_offset = src.origin.x;
+ u32 dst_x_offset = dst.origin.x;
+ u32 bpp_shift = 0U;
+ if (!is_remapping) {
+ bpp_shift = Common::FoldRight(
+ 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
+ src_width, dst_width, x_elements, src_x_offset, dst_x_offset,
+ static_cast<u32>(regs.offset_in), static_cast<u32>(regs.offset_out));
+ src_width >>= bpp_shift;
+ dst_width >>= bpp_shift;
+ x_elements >>= bpp_shift;
+ src_x_offset >>= bpp_shift;
+ dst_x_offset >>= bpp_shift;
+ }
+
+ const u32 bytes_per_pixel = base_bpp << bpp_shift;
+ const size_t src_size = CalculateSize(true, bytes_per_pixel, src_width, src.height, src.depth,
+ src.block_size.height, src.block_size.depth);
+ const size_t dst_size = CalculateSize(true, bytes_per_pixel, dst_width, dst.height, dst.depth,
+ dst.block_size.height, dst.block_size.depth);
+
+ const u32 pitch = x_elements * bytes_per_pixel;
+ const size_t mid_buffer_size = pitch * regs.line_count;
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
+
+ intermediate_buffer.resize(mid_buffer_size);
+
+ memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+
+ UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height,
+ src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count,
+ src.block_size.height, src.block_size.depth, pitch);
+
+ SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height,
+ dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
+ dst.block_size.height, dst.block_size.depth, pitch);
+
+ memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
+
void MaxwellDMA::ReleaseSemaphore() {
const auto type = regs.launch_dma.semaphore_type;
const GPUVAddr address = regs.semaphore.address;
@@ -314,6 +379,7 @@ void MaxwellDMA::ReleaseSemaphore() {
}
default:
ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+ break;
}
}
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 953e34adc..d40d3d302 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -223,6 +223,8 @@ private:
void CopyPitchToBlockLinear();
+ void CopyBlockLinearToBlockLinear();
+
void FastCopyBlockLinearToPitch();
void ReleaseSemaphore();
@@ -234,6 +236,7 @@ private:
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
+ std::vector<u8> intermediate_buffer;
static constexpr std::size_t NUM_REGS = 0x800;
struct Regs {
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 3977bb0fb..7718a09b3 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -31,7 +31,7 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
const auto engine_id = static_cast<EngineID>(method_call.argument);
- bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+ bound_engines[method_call.subchannel] = engine_id;
switch (engine_id) {
case EngineID::FERMI_TWOD_A:
dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
@@ -50,6 +50,7 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+ break;
}
}
@@ -65,6 +66,7 @@ void Puller::ProcessFenceActionMethod() {
break;
default:
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
+ break;
}
}
@@ -116,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() {
std::function<void()> operation([this, sequence_address, payload] {
memory_manager.Write<u32>(sequence_address, payload);
});
- rasterizer->SyncOperation(std::move(operation));
+ rasterizer->SignalFence(std::move(operation));
}
void Puller::ProcessSemaphoreAcquire() {
@@ -149,8 +151,8 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequencePayload:
case BufferMethods::SyncpointPayload:
- break;
case BufferMethods::WrcacheFlush:
+ break;
case BufferMethods::RefCnt:
rasterizer->SignalReference();
break;
@@ -228,6 +230,7 @@ void Puller::CallEngineMethod(const MethodCall& method_call) {
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
+ break;
}
}
@@ -254,6 +257,7 @@ void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_s
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
+ break;
}
}
@@ -281,12 +285,12 @@ void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start,
if (ExecuteMethodOnEngine(method)) {
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
} else {
- for (std::size_t i = 0; i < amount; i++) {
+ for (u32 i = 0; i < amount; i++) {
CallPullerMethod(MethodCall{
method,
base_start[i],
subchannel,
- methods_pending - static_cast<u32>(i),
+ methods_pending - i,
});
}
}
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp
new file mode 100644
index 000000000..2f1ea4626
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/blitter.cpp
@@ -0,0 +1,238 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "video_core/engines/sw_blitter/blitter.h"
+#include "video_core/engines/sw_blitter/converter.h"
+#include "video_core/memory_manager.h"
+#include "video_core/surface.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+
+namespace Tegra::Engines::Blitter {
+
+using namespace Texture;
+
+namespace {
+
+constexpr size_t ir_components = 4;
+
+void NearestNeighbor(std::span<const u8> input, std::span<u8> output, u32 src_width, u32 src_height,
+ u32 dst_width, u32 dst_height, size_t bpp) {
+ const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
+ const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
+ size_t src_y = 0;
+ for (u32 y = 0; y < dst_height; y++) {
+ size_t src_x = 0;
+ for (u32 x = 0; x < dst_width; x++) {
+ const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp;
+ const size_t write_to = (y * dst_width + x) * bpp;
+
+ std::memcpy(&output[write_to], &input[read_from], bpp);
+ src_x += dx_du;
+ }
+ src_y += dy_dv;
+ }
+}
+
+void NearestNeighborFast(std::span<const f32> input, std::span<f32> output, u32 src_width,
+ u32 src_height, u32 dst_width, u32 dst_height) {
+ const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
+ const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
+ size_t src_y = 0;
+ for (u32 y = 0; y < dst_height; y++) {
+ size_t src_x = 0;
+ for (u32 x = 0; x < dst_width; x++) {
+ const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components;
+ const size_t write_to = (y * dst_width + x) * ir_components;
+
+ std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components);
+ src_x += dx_du;
+ }
+ src_y += dy_dv;
+ }
+}
+
+void Bilinear(std::span<const f32> input, std::span<f32> output, size_t src_width,
+ size_t src_height, size_t dst_width, size_t dst_height) {
+ const auto bilinear_sample = [](std::span<const f32> x0_y0, std::span<const f32> x1_y0,
+ std::span<const f32> x0_y1, std::span<const f32> x1_y1,
+ f32 weight_x, f32 weight_y) {
+ std::array<f32, ir_components> result{};
+ for (size_t i = 0; i < ir_components; i++) {
+ const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x);
+ const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x);
+ result[i] = std::lerp(a, b, weight_y);
+ }
+ return result;
+ };
+ const f32 dx_du =
+ dst_width > 1 ? static_cast<f32>(src_width - 1) / static_cast<f32>(dst_width - 1) : 0.f;
+ const f32 dy_dv =
+ dst_height > 1 ? static_cast<f32>(src_height - 1) / static_cast<f32>(dst_height - 1) : 0.f;
+ for (u32 y = 0; y < dst_height; y++) {
+ for (u32 x = 0; x < dst_width; x++) {
+ const f32 x_low = std::floor(static_cast<f32>(x) * dx_du);
+ const f32 y_low = std::floor(static_cast<f32>(y) * dy_dv);
+ const f32 x_high = std::ceil(static_cast<f32>(x) * dx_du);
+ const f32 y_high = std::ceil(static_cast<f32>(y) * dy_dv);
+ const f32 weight_x = (static_cast<f32>(x) * dx_du) - x_low;
+ const f32 weight_y = (static_cast<f32>(y) * dy_dv) - y_low;
+
+ const auto read_src = [&](f32 in_x, f32 in_y) {
+ const size_t read_from =
+ ((static_cast<size_t>(in_x) * src_width + static_cast<size_t>(in_y)) >> 32) *
+ ir_components;
+ return std::span<const f32>(&input[read_from], ir_components);
+ };
+
+ auto x0_y0 = read_src(x_low, y_low);
+ auto x1_y0 = read_src(x_high, y_low);
+ auto x0_y1 = read_src(x_low, y_high);
+ auto x1_y1 = read_src(x_high, y_high);
+
+ const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y);
+
+ const size_t write_to = (y * dst_width + x) * ir_components;
+
+ std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components);
+ }
+ }
+}
+
+} // namespace
+
+struct SoftwareBlitEngine::BlitEngineImpl {
+ std::vector<u8> tmp_buffer;
+ std::vector<u8> src_buffer;
+ std::vector<u8> dst_buffer;
+ std::vector<f32> intermediate_src;
+ std::vector<f32> intermediate_dst;
+ ConverterFactory converter_factory;
+};
+
+SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_)
+ : memory_manager{memory_manager_} {
+ impl = std::make_unique<BlitEngineImpl>();
+}
+
+SoftwareBlitEngine::~SoftwareBlitEngine() = default;
+
+bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
+ Fermi2D::Config& config) {
+ const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) {
+ if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) {
+ return CalculateSize(true, bytes_per_pixel, surface.width, surface.height,
+ surface.depth, surface.block_height, surface.block_depth);
+ }
+ return static_cast<size_t>(surface.pitch * surface.height);
+ };
+ const auto process_pitch_linear = [](bool unpack, std::span<const u8> input,
+ std::span<u8> output, u32 extent_x, u32 extent_y,
+ u32 pitch, u32 x0, u32 y0, size_t bpp) {
+ const size_t base_offset = x0 * bpp;
+ const size_t copy_size = extent_x * bpp;
+ for (u32 y = y0; y < extent_y; y++) {
+ const size_t first_offset = y * pitch + base_offset;
+ const size_t second_offset = y * extent_x * bpp;
+ u8* write_to = unpack ? &output[first_offset] : &output[second_offset];
+ const u8* read_from = unpack ? &input[second_offset] : &input[first_offset];
+ std::memcpy(write_to, read_from, copy_size);
+ }
+ };
+
+ const u32 src_extent_x = config.src_x1 - config.src_x0;
+ const u32 src_extent_y = config.src_y1 - config.src_y0;
+
+ const u32 dst_extent_x = config.dst_x1 - config.dst_x0;
+ const u32 dst_extent_y = config.dst_y1 - config.dst_y0;
+ const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+ const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
+ const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
+ impl->tmp_buffer.resize(src_size);
+ memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
+
+ const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
+
+ const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
+
+ impl->src_buffer.resize(src_copy_size);
+
+ const bool no_passthrough =
+ src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y;
+
+ const auto convertion_phase_same_format = [&]() {
+ NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y,
+ dst_extent_x, dst_extent_y, dst_bytes_per_pixel);
+ };
+
+ const auto convertion_phase_ir = [&]() {
+ auto* input_converter = impl->converter_factory.GetFormatConverter(src.format);
+ impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components);
+ impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components);
+ input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src);
+
+ if (config.filter != Fermi2D::Filter::Bilinear) {
+ NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x,
+ src_extent_y, dst_extent_x, dst_extent_y);
+ } else {
+ Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y,
+ dst_extent_x, dst_extent_y);
+ }
+
+ auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format);
+ output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer);
+ };
+
+ // Do actuall Blit
+
+ impl->dst_buffer.resize(dst_copy_size);
+ if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
+ UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
+ src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
+ src_extent_y, src.block_height, src.block_depth,
+ src_extent_x * src_bytes_per_pixel);
+ } else {
+ process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
+ src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
+ }
+
+ // Conversion Phase
+ if (no_passthrough) {
+ if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) {
+ convertion_phase_ir();
+ } else {
+ convertion_phase_same_format();
+ }
+ } else {
+ impl->dst_buffer.swap(impl->src_buffer);
+ }
+
+ const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
+ impl->tmp_buffer.resize(dst_size);
+ memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
+
+ if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
+ SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
+ dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
+ dst_extent_y, dst.block_height, dst.block_depth,
+ dst_extent_x * dst_bytes_per_pixel);
+ } else {
+ process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
+ dst.pitch, config.dst_x0, config.dst_y0,
+ static_cast<size_t>(dst_bytes_per_pixel));
+ }
+ memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
+ return true;
+}
+
+} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h
new file mode 100644
index 000000000..85b55c836
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/blitter.h
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include "video_core/engines/fermi_2d.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines::Blitter {
+
+class SoftwareBlitEngine {
+public:
+ explicit SoftwareBlitEngine(MemoryManager& memory_manager_);
+ ~SoftwareBlitEngine();
+
+ bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config);
+
+private:
+ MemoryManager& memory_manager;
+ struct BlitEngineImpl;
+ std::unique_ptr<BlitEngineImpl> impl;
+};
+
+} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp
new file mode 100644
index 000000000..2419b5632
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/converter.cpp
@@ -0,0 +1,1234 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <array>
+#include <cmath>
+#include <span>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/bit_cast.h"
+#include "video_core/engines/sw_blitter/converter.h"
+#include "video_core/surface.h"
+#include "video_core/textures/decoders.h"
+
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else
+#define FORCE_INLINE inline __attribute__((always_inline))
+#endif
+
+namespace Tegra::Engines::Blitter {
+
+enum class Swizzle : size_t {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+ None,
+};
+
+enum class ComponentType : u32 {
+ SNORM = 1,
+ UNORM = 2,
+ SINT = 3,
+ UINT = 4,
+ SNORM_FORCE_FP16 = 5,
+ UNORM_FORCE_FP16 = 6,
+ FLOAT = 7,
+ SRGB = 8,
+};
+
+namespace {
+
+/*
+ * Note: Use generate_converters.py to generate the structs and searches for new render target
+ * formats and copy paste them to this file in order to update. just call "python
+ * generate_converters.py" and get the code from the output. modify the file to add new formats.
+ */
+
+constexpr std::array<f32, 256> SRGB_TO_RGB_LUT = {
+ 0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f,
+ 1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f,
+ 3.676507e-03f, 4.024717e-03f, 4.391442e-03f, 4.776953e-03f, 5.181517e-03f, 5.605392e-03f,
+ 6.048833e-03f, 6.512091e-03f, 6.995410e-03f, 7.499032e-03f, 8.023193e-03f, 8.568126e-03f,
+ 9.134059e-03f, 9.721218e-03f, 1.032982e-02f, 1.096009e-02f, 1.161224e-02f, 1.228649e-02f,
+ 1.298303e-02f, 1.370208e-02f, 1.444384e-02f, 1.520851e-02f, 1.599629e-02f, 1.680738e-02f,
+ 1.764195e-02f, 1.850022e-02f, 1.938236e-02f, 2.028856e-02f, 2.121901e-02f, 2.217389e-02f,
+ 2.315337e-02f, 2.415763e-02f, 2.518686e-02f, 2.624122e-02f, 2.732089e-02f, 2.842604e-02f,
+ 2.955684e-02f, 3.071344e-02f, 3.189603e-02f, 3.310477e-02f, 3.433981e-02f, 3.560131e-02f,
+ 3.688945e-02f, 3.820437e-02f, 3.954624e-02f, 4.091520e-02f, 4.231141e-02f, 4.373503e-02f,
+ 4.518620e-02f, 4.666509e-02f, 4.817183e-02f, 4.970657e-02f, 5.126946e-02f, 5.286065e-02f,
+ 5.448028e-02f, 5.612849e-02f, 5.780543e-02f, 5.951124e-02f, 6.124605e-02f, 6.301001e-02f,
+ 6.480327e-02f, 6.662594e-02f, 6.847817e-02f, 7.036009e-02f, 7.227185e-02f, 7.421357e-02f,
+ 7.618538e-02f, 7.818742e-02f, 8.021982e-02f, 8.228271e-02f, 8.437621e-02f, 8.650046e-02f,
+ 8.865558e-02f, 9.084171e-02f, 9.305897e-02f, 9.530747e-02f, 9.758735e-02f, 9.989873e-02f,
+ 1.022417e-01f, 1.046165e-01f, 1.070231e-01f, 1.094617e-01f, 1.119324e-01f, 1.144354e-01f,
+ 1.169707e-01f, 1.195384e-01f, 1.221388e-01f, 1.247718e-01f, 1.274377e-01f, 1.301365e-01f,
+ 1.328683e-01f, 1.356333e-01f, 1.384316e-01f, 1.412633e-01f, 1.441285e-01f, 1.470273e-01f,
+ 1.499598e-01f, 1.529261e-01f, 1.559265e-01f, 1.589608e-01f, 1.620294e-01f, 1.651322e-01f,
+ 1.682694e-01f, 1.714411e-01f, 1.746474e-01f, 1.778884e-01f, 1.811642e-01f, 1.844750e-01f,
+ 1.878208e-01f, 1.912017e-01f, 1.946178e-01f, 1.980693e-01f, 2.015563e-01f, 2.050787e-01f,
+ 2.086369e-01f, 2.122308e-01f, 2.158605e-01f, 2.195262e-01f, 2.232280e-01f, 2.269659e-01f,
+ 2.307401e-01f, 2.345506e-01f, 2.383976e-01f, 2.422811e-01f, 2.462013e-01f, 2.501583e-01f,
+ 2.541521e-01f, 2.581829e-01f, 2.622507e-01f, 2.663556e-01f, 2.704978e-01f, 2.746773e-01f,
+ 2.788943e-01f, 2.831487e-01f, 2.874408e-01f, 2.917706e-01f, 2.961383e-01f, 3.005438e-01f,
+ 3.049873e-01f, 3.094689e-01f, 3.139887e-01f, 3.185468e-01f, 3.231432e-01f, 3.277781e-01f,
+ 3.324515e-01f, 3.371636e-01f, 3.419144e-01f, 3.467041e-01f, 3.515326e-01f, 3.564001e-01f,
+ 3.613068e-01f, 3.662526e-01f, 3.712377e-01f, 3.762621e-01f, 3.813260e-01f, 3.864294e-01f,
+ 3.915725e-01f, 3.967552e-01f, 4.019778e-01f, 4.072402e-01f, 4.125426e-01f, 4.178851e-01f,
+ 4.232677e-01f, 4.286905e-01f, 4.341536e-01f, 4.396572e-01f, 4.452012e-01f, 4.507858e-01f,
+ 4.564110e-01f, 4.620770e-01f, 4.677838e-01f, 4.735315e-01f, 4.793202e-01f, 4.851499e-01f,
+ 4.910209e-01f, 4.969330e-01f, 5.028865e-01f, 5.088813e-01f, 5.149177e-01f, 5.209956e-01f,
+ 5.271151e-01f, 5.332764e-01f, 5.394795e-01f, 5.457245e-01f, 5.520114e-01f, 5.583404e-01f,
+ 5.647115e-01f, 5.711249e-01f, 5.775805e-01f, 5.840784e-01f, 5.906188e-01f, 5.972018e-01f,
+ 6.038274e-01f, 6.104956e-01f, 6.172066e-01f, 6.239604e-01f, 6.307572e-01f, 6.375968e-01f,
+ 6.444797e-01f, 6.514056e-01f, 6.583748e-01f, 6.653873e-01f, 6.724432e-01f, 6.795425e-01f,
+ 6.866853e-01f, 6.938717e-01f, 7.011019e-01f, 7.083758e-01f, 7.156935e-01f, 7.230551e-01f,
+ 7.304608e-01f, 7.379104e-01f, 7.454042e-01f, 7.529422e-01f, 7.605245e-01f, 7.681512e-01f,
+ 7.758222e-01f, 7.835378e-01f, 7.912979e-01f, 7.991027e-01f, 8.069522e-01f, 8.148466e-01f,
+ 8.227857e-01f, 8.307699e-01f, 8.387990e-01f, 8.468732e-01f, 8.549926e-01f, 8.631572e-01f,
+ 8.713671e-01f, 8.796224e-01f, 8.879231e-01f, 8.962694e-01f, 9.046612e-01f, 9.130986e-01f,
+ 9.215819e-01f, 9.301109e-01f, 9.386857e-01f, 9.473065e-01f, 9.559733e-01f, 9.646863e-01f,
+ 9.734453e-01f, 9.822506e-01f, 9.911021e-01f, 1.000000e+00f};
+
+constexpr std::array<f32, 256> RGB_TO_SRGB_LUT = {
+ 0.000000e+00f, 4.984009e-02f, 8.494473e-02f, 1.107021e-01f, 1.318038e-01f, 1.500052e-01f,
+ 1.661857e-01f, 1.808585e-01f, 1.943532e-01f, 2.068957e-01f, 2.186491e-01f, 2.297351e-01f,
+ 2.402475e-01f, 2.502604e-01f, 2.598334e-01f, 2.690152e-01f, 2.778465e-01f, 2.863614e-01f,
+ 2.945889e-01f, 3.025538e-01f, 3.102778e-01f, 3.177796e-01f, 3.250757e-01f, 3.321809e-01f,
+ 3.391081e-01f, 3.458689e-01f, 3.524737e-01f, 3.589320e-01f, 3.652521e-01f, 3.714419e-01f,
+ 3.775084e-01f, 3.834581e-01f, 3.892968e-01f, 3.950301e-01f, 4.006628e-01f, 4.061998e-01f,
+ 4.116451e-01f, 4.170030e-01f, 4.222770e-01f, 4.274707e-01f, 4.325873e-01f, 4.376298e-01f,
+ 4.426010e-01f, 4.475037e-01f, 4.523403e-01f, 4.571131e-01f, 4.618246e-01f, 4.664766e-01f,
+ 4.710712e-01f, 4.756104e-01f, 4.800958e-01f, 4.845292e-01f, 4.889122e-01f, 4.932462e-01f,
+ 4.975329e-01f, 5.017734e-01f, 5.059693e-01f, 5.101216e-01f, 5.142317e-01f, 5.183006e-01f,
+ 5.223295e-01f, 5.263194e-01f, 5.302714e-01f, 5.341862e-01f, 5.380651e-01f, 5.419087e-01f,
+ 5.457181e-01f, 5.494938e-01f, 5.532369e-01f, 5.569480e-01f, 5.606278e-01f, 5.642771e-01f,
+ 5.678965e-01f, 5.714868e-01f, 5.750484e-01f, 5.785821e-01f, 5.820884e-01f, 5.855680e-01f,
+ 5.890211e-01f, 5.924487e-01f, 5.958509e-01f, 5.992285e-01f, 6.025819e-01f, 6.059114e-01f,
+ 6.092176e-01f, 6.125010e-01f, 6.157619e-01f, 6.190008e-01f, 6.222180e-01f, 6.254140e-01f,
+ 6.285890e-01f, 6.317436e-01f, 6.348780e-01f, 6.379926e-01f, 6.410878e-01f, 6.441637e-01f,
+ 6.472208e-01f, 6.502595e-01f, 6.532799e-01f, 6.562824e-01f, 6.592672e-01f, 6.622347e-01f,
+ 6.651851e-01f, 6.681187e-01f, 6.710356e-01f, 6.739363e-01f, 6.768209e-01f, 6.796897e-01f,
+ 6.825429e-01f, 6.853807e-01f, 6.882034e-01f, 6.910111e-01f, 6.938041e-01f, 6.965826e-01f,
+ 6.993468e-01f, 7.020969e-01f, 7.048331e-01f, 7.075556e-01f, 7.102645e-01f, 7.129600e-01f,
+ 7.156424e-01f, 7.183118e-01f, 7.209683e-01f, 7.236121e-01f, 7.262435e-01f, 7.288625e-01f,
+ 7.314693e-01f, 7.340640e-01f, 7.366470e-01f, 7.392181e-01f, 7.417776e-01f, 7.443256e-01f,
+ 7.468624e-01f, 7.493880e-01f, 7.519025e-01f, 7.544061e-01f, 7.568989e-01f, 7.593810e-01f,
+ 7.618526e-01f, 7.643137e-01f, 7.667645e-01f, 7.692052e-01f, 7.716358e-01f, 7.740564e-01f,
+ 7.764671e-01f, 7.788681e-01f, 7.812595e-01f, 7.836413e-01f, 7.860138e-01f, 7.883768e-01f,
+ 7.907307e-01f, 7.930754e-01f, 7.954110e-01f, 7.977377e-01f, 8.000556e-01f, 8.023647e-01f,
+ 8.046651e-01f, 8.069569e-01f, 8.092403e-01f, 8.115152e-01f, 8.137818e-01f, 8.160402e-01f,
+ 8.182903e-01f, 8.205324e-01f, 8.227665e-01f, 8.249926e-01f, 8.272109e-01f, 8.294214e-01f,
+ 8.316242e-01f, 8.338194e-01f, 8.360070e-01f, 8.381871e-01f, 8.403597e-01f, 8.425251e-01f,
+ 8.446831e-01f, 8.468339e-01f, 8.489776e-01f, 8.511142e-01f, 8.532437e-01f, 8.553662e-01f,
+ 8.574819e-01f, 8.595907e-01f, 8.616927e-01f, 8.637881e-01f, 8.658767e-01f, 8.679587e-01f,
+ 8.700342e-01f, 8.721032e-01f, 8.741657e-01f, 8.762218e-01f, 8.782716e-01f, 8.803151e-01f,
+ 8.823524e-01f, 8.843835e-01f, 8.864085e-01f, 8.884274e-01f, 8.904402e-01f, 8.924471e-01f,
+ 8.944480e-01f, 8.964431e-01f, 8.984324e-01f, 9.004158e-01f, 9.023935e-01f, 9.043654e-01f,
+ 9.063318e-01f, 9.082925e-01f, 9.102476e-01f, 9.121972e-01f, 9.141413e-01f, 9.160800e-01f,
+ 9.180133e-01f, 9.199412e-01f, 9.218637e-01f, 9.237810e-01f, 9.256931e-01f, 9.276000e-01f,
+ 9.295017e-01f, 9.313982e-01f, 9.332896e-01f, 9.351761e-01f, 9.370575e-01f, 9.389339e-01f,
+ 9.408054e-01f, 9.426719e-01f, 9.445336e-01f, 9.463905e-01f, 9.482424e-01f, 9.500897e-01f,
+ 9.519322e-01f, 9.537700e-01f, 9.556032e-01f, 9.574316e-01f, 9.592555e-01f, 9.610748e-01f,
+ 9.628896e-01f, 9.646998e-01f, 9.665055e-01f, 9.683068e-01f, 9.701037e-01f, 9.718961e-01f,
+ 9.736842e-01f, 9.754679e-01f, 9.772474e-01f, 9.790225e-01f, 9.807934e-01f, 9.825601e-01f,
+ 9.843225e-01f, 9.860808e-01f, 9.878350e-01f, 9.895850e-01f, 9.913309e-01f, 9.930727e-01f,
+ 9.948106e-01f, 9.965444e-01f, 9.982741e-01f, 1.000000e+00f};
+
+} // namespace
+
+struct R32G32B32A32_FLOATTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R32G32B32A32_SINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R32G32B32A32_UINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R32G32B32X32_FLOATTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
+};
+
+struct R32G32B32X32_SINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
+};
+
+struct R32G32B32X32_UINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
+};
+
+struct R16G16B16A16_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R16G16B16A16_SNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R16G16B16A16_SINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R16G16B16A16_UINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R16G16B16A16_FLOATTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A};
+};
+
+struct R32G32_FLOATTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT, ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R32G32_SINTTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT, ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R32G32_UINTTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32, 32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R16G16B16X16_FLOATTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None};
+};
+
+struct A8R8G8B8_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct A8R8G8B8_SRGBTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct A2B10G10R10_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct A2B10G10R10_UINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct A2R10G10B10_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct A8B8G8R8_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct A8B8G8R8_SRGBTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct A8B8G8R8_SNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct A8B8G8R8_SINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct A8B8G8R8_UINTTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct R16G16_UNORMTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R16G16_SNORMTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SNORM, ComponentType::SNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R16G16_SINTTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT, ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R16G16_UINTTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R16G16_FLOATTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT, ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16, 16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct B10G11R11_FLOATTraits {
+ static constexpr size_t num_components = 3;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {10, 11, 11};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct R32_SINTTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R32_UINTTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R32_FLOATTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {32};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct X8R8G8B8_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct X8R8G8B8_SRGBTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct R5G6B5_UNORMTraits {
+ static constexpr size_t num_components = 3;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {5, 6, 5};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct A1R5G5B5_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct R8G8_UNORMTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R8G8_SNORMTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SNORM, ComponentType::SNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R8G8_SINTTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT, ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R8G8_UINTTraits {
+ static constexpr size_t num_components = 2;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT, ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R,
+ Swizzle::G};
+};
+
+struct R16_UNORMTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R16_SNORMTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R16_SINTTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R16_UINTTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R16_FLOATTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::FLOAT};
+ static constexpr std::array<size_t, num_components> component_sizes = {16};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R8_UNORMTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R8_SNORMTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R8_SINTTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct R8_UINTTraits {
+ static constexpr size_t num_components = 1;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UINT};
+ static constexpr std::array<size_t, num_components> component_sizes = {8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R};
+};
+
+struct X1R5G5B5_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B};
+};
+
+struct X8B8G8R8_UNORMTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+struct X8B8G8R8_SRGBTraits {
+ static constexpr size_t num_components = 4;
+ static constexpr std::array<ComponentType, num_components> component_types = {
+ ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB};
+ static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8};
+ static constexpr std::array<Swizzle, num_components> component_swizzle = {
+ Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R};
+};
+
+template <class ConverterTraits>
+class ConverterImpl : public Converter {
+private:
+ static constexpr size_t num_components = ConverterTraits::num_components;
+ static constexpr std::array<ComponentType, num_components> component_types =
+ ConverterTraits::component_types;
+ static constexpr std::array<size_t, num_components> component_sizes =
+ ConverterTraits::component_sizes;
+ static constexpr std::array<Swizzle, num_components> component_swizzle =
+ ConverterTraits::component_swizzle;
+
+ static constexpr size_t CalculateByteSize() {
+ size_t size = 0;
+ for (const size_t component_size : component_sizes) {
+ size += component_size;
+ }
+ const size_t power = (sizeof(size_t) * 8) - std::countl_zero(size) - 1ULL;
+ const size_t base_size = 1ULL << power;
+ const size_t mask = base_size - 1ULL;
+ return ((size & mask) != 0 ? base_size << 1ULL : base_size) / 8;
+ }
+
+ static constexpr size_t total_bytes_per_pixel = CalculateByteSize();
+ static constexpr size_t total_words_per_pixel =
+ (total_bytes_per_pixel + sizeof(u32) - 1U) / sizeof(u32);
+ static constexpr size_t components_per_ir_rep = 4;
+
+ template <bool get_offsets>
+ static constexpr std::array<size_t, num_components> GetBoundWordsOffsets() {
+ std::array<size_t, num_components> result;
+ result.fill(0);
+ constexpr size_t total_bits_per_word = sizeof(u32) * 8;
+ size_t accumulated_size = 0;
+ size_t count = 0;
+ for (size_t i = 0; i < num_components; i++) {
+ if constexpr (get_offsets) {
+ result[i] = accumulated_size;
+ } else {
+ result[i] = count;
+ }
+ accumulated_size += component_sizes[i];
+ if (accumulated_size > total_bits_per_word) {
+ if constexpr (get_offsets) {
+ result[i] = 0;
+ } else {
+ result[i]++;
+ }
+ count++;
+ accumulated_size = component_sizes[i];
+ }
+ }
+ return result;
+ }
+
+ static constexpr std::array<size_t, num_components> bound_words = GetBoundWordsOffsets<false>();
+ static constexpr std::array<size_t, num_components> bound_offsets =
+ GetBoundWordsOffsets<true>();
+
+ static constexpr std::array<u32, num_components> GetComponentsMask() {
+ std::array<u32, num_components> result;
+ for (size_t i = 0; i < num_components; i++) {
+ result[i] = (((u32)~0) >> (8 * sizeof(u32) - component_sizes[i])) << bound_offsets[i];
+ }
+ return result;
+ }
+
+ static constexpr std::array<u32, num_components> component_mask = GetComponentsMask();
+
+ // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function
+ // calls, it may fail to detect the benefit of inlining.
+ template <size_t which_component>
+ FORCE_INLINE void ConvertToComponent(u32 which_word, f32& out_component) {
+ const u32 value = (which_word >> bound_offsets[which_component]) &
+ static_cast<u32>((1ULL << component_sizes[which_component]) - 1ULL);
+ const auto sign_extend = [](u32 base_value, size_t bits) {
+ const size_t shift_amount = sizeof(u32) * 8 - bits;
+ s32 shifted_value = static_cast<s32>(base_value << shift_amount);
+ return shifted_value >> shift_amount;
+ };
+ const auto force_to_fp16 = [](f32 base_value) {
+ u32 tmp = Common::BitCast<u32>(base_value);
+ constexpr size_t fp32_mantissa_bits = 23;
+ constexpr size_t fp16_mantissa_bits = 10;
+ constexpr size_t mantissa_mask =
+ ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL);
+ tmp = tmp & static_cast<u32>(mantissa_mask);
+ // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM
+ return Common::BitCast<f32>(tmp);
+ };
+ const auto from_fp_n = [&sign_extend](u32 base_value, size_t bits, size_t mantissa) {
+ constexpr size_t fp32_mantissa_bits = 23;
+ size_t shift_towards = fp32_mantissa_bits - mantissa;
+ const u32 new_value =
+ static_cast<u32>(sign_extend(base_value, bits) << shift_towards) & (~(1U << 31));
+ return Common::BitCast<f32>(new_value);
+ };
+ const auto calculate_snorm = [&]() {
+ return static_cast<f32>(
+ static_cast<f32>(sign_extend(value, component_sizes[which_component])) /
+ static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL));
+ };
+ const auto calculate_unorm = [&]() {
+ return static_cast<f32>(
+ static_cast<f32>(value) /
+ static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL));
+ };
+ if constexpr (component_types[which_component] == ComponentType::SNORM) {
+ out_component = calculate_snorm();
+ } else if constexpr (component_types[which_component] == ComponentType::UNORM) {
+ out_component = calculate_unorm();
+ } else if constexpr (component_types[which_component] == ComponentType::SINT) {
+ out_component = static_cast<f32>(
+ static_cast<s32>(sign_extend(value, component_sizes[which_component])));
+ } else if constexpr (component_types[which_component] == ComponentType::UINT) {
+ out_component = static_cast<f32>(
+ static_cast<s32>(sign_extend(value, component_sizes[which_component])));
+ } else if constexpr (component_types[which_component] == ComponentType::SNORM_FORCE_FP16) {
+ out_component = calculate_snorm();
+ out_component = force_to_fp16(out_component);
+ } else if constexpr (component_types[which_component] == ComponentType::UNORM_FORCE_FP16) {
+ out_component = calculate_unorm();
+ out_component = force_to_fp16(out_component);
+ } else if constexpr (component_types[which_component] == ComponentType::FLOAT) {
+ if constexpr (component_sizes[which_component] == 32) {
+ out_component = Common::BitCast<f32>(value);
+ } else if constexpr (component_sizes[which_component] == 16) {
+ static constexpr u32 sign_mask = 0x8000;
+ static constexpr u32 mantissa_mask = 0x8000;
+ out_component = Common::BitCast<f32>(((value & sign_mask) << 16) |
+ (((value & 0x7c00) + 0x1C000) << 13) |
+ ((value & mantissa_mask) << 13));
+ } else {
+ out_component = from_fp_n(value, component_sizes[which_component],
+ component_sizes[which_component] - 5);
+ }
+ } else if constexpr (component_types[which_component] == ComponentType::SRGB) {
+ if constexpr (component_swizzle[which_component] == Swizzle::A) {
+ out_component = calculate_unorm();
+ } else if constexpr (component_sizes[which_component] == 8) {
+ out_component = SRGB_TO_RGB_LUT[value];
+ } else {
+ out_component = calculate_unorm();
+ UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented",
+ component_sizes[which_component]);
+ }
+ }
+ }
+
+ // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function
+ // calls, it may fail to detect the benefit of inlining.
+ template <size_t which_component>
+ FORCE_INLINE void ConvertFromComponent(u32& which_word, f32 in_component) {
+ const auto insert_to_word = [&]<typename T>(T new_word) {
+ which_word |= (static_cast<u32>(new_word) << bound_offsets[which_component]) &
+ component_mask[which_component];
+ };
+ const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) {
+ constexpr size_t fp32_mantissa_bits = 23;
+ u32 tmp_value = Common::BitCast<u32>(std::max(base_value, 0.0f));
+ size_t shift_towards = fp32_mantissa_bits - mantissa;
+ return tmp_value >> shift_towards;
+ };
+ const auto calculate_unorm = [&]() {
+ return static_cast<u32>(
+ static_cast<f32>(in_component) *
+ static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL));
+ };
+ if constexpr (component_types[which_component] == ComponentType::SNORM ||
+ component_types[which_component] == ComponentType::SNORM_FORCE_FP16) {
+ s32 tmp_word = static_cast<s32>(
+ static_cast<f32>(in_component) *
+ static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL));
+ insert_to_word(tmp_word);
+
+ } else if constexpr (component_types[which_component] == ComponentType::UNORM ||
+ component_types[which_component] == ComponentType::UNORM_FORCE_FP16) {
+ u32 tmp_word = calculate_unorm();
+ insert_to_word(tmp_word);
+ } else if constexpr (component_types[which_component] == ComponentType::SINT) {
+ s32 tmp_word = static_cast<s32>(in_component);
+ insert_to_word(tmp_word);
+ } else if constexpr (component_types[which_component] == ComponentType::UINT) {
+ u32 tmp_word = static_cast<u32>(in_component);
+ insert_to_word(tmp_word);
+ } else if constexpr (component_types[which_component] == ComponentType::FLOAT) {
+ if constexpr (component_sizes[which_component] == 32) {
+ u32 tmp_word = Common::BitCast<u32>(in_component);
+ insert_to_word(tmp_word);
+ } else if constexpr (component_sizes[which_component] == 16) {
+ static constexpr u32 sign_mask = 0x8000;
+ static constexpr u32 mantissa_mask = 0x03ff;
+ static constexpr u32 exponent_mask = 0x7c00;
+ const u32 tmp_word = Common::BitCast<u32>(in_component);
+ const u32 half = ((tmp_word >> 16) & sign_mask) |
+ ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & exponent_mask) |
+ ((tmp_word >> 13) & mantissa_mask);
+ insert_to_word(half);
+ } else {
+ insert_to_word(to_fp_n(in_component, component_sizes[which_component],
+ component_sizes[which_component] - 5));
+ }
+ } else if constexpr (component_types[which_component] == ComponentType::SRGB) {
+ if constexpr (component_swizzle[which_component] != Swizzle::A) {
+ if constexpr (component_sizes[which_component] == 8) {
+ const u32 index = calculate_unorm();
+ in_component = RGB_TO_SRGB_LUT[index];
+ } else {
+ UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented",
+ component_sizes[which_component]);
+ }
+ }
+ const u32 tmp_word = calculate_unorm();
+ insert_to_word(tmp_word);
+ }
+ }
+
+public:
+ void ConvertTo(std::span<const u8> input, std::span<f32> output) override {
+ const size_t num_pixels = output.size() / components_per_ir_rep;
+ for (size_t pixel = 0; pixel < num_pixels; pixel++) {
+ std::array<u32, total_words_per_pixel> words{};
+
+ std::memcpy(words.data(), &input[pixel * total_bytes_per_pixel], total_bytes_per_pixel);
+ std::span<f32> new_components(&output[pixel * components_per_ir_rep],
+ components_per_ir_rep);
+ if constexpr (component_swizzle[0] != Swizzle::None) {
+ ConvertToComponent<0>(words[bound_words[0]],
+ new_components[static_cast<size_t>(component_swizzle[0])]);
+ } else {
+ new_components[0] = 0.0f;
+ }
+ if constexpr (num_components >= 2) {
+ if constexpr (component_swizzle[1] != Swizzle::None) {
+ ConvertToComponent<1>(
+ words[bound_words[1]],
+ new_components[static_cast<size_t>(component_swizzle[1])]);
+ } else {
+ new_components[1] = 0.0f;
+ }
+ } else {
+ new_components[1] = 0.0f;
+ }
+ if constexpr (num_components >= 3) {
+ if constexpr (component_swizzle[2] != Swizzle::None) {
+ ConvertToComponent<2>(
+ words[bound_words[2]],
+ new_components[static_cast<size_t>(component_swizzle[2])]);
+ } else {
+ new_components[2] = 0.0f;
+ }
+ } else {
+ new_components[2] = 0.0f;
+ }
+ if constexpr (num_components >= 4) {
+ if constexpr (component_swizzle[3] != Swizzle::None) {
+ ConvertToComponent<3>(
+ words[bound_words[3]],
+ new_components[static_cast<size_t>(component_swizzle[3])]);
+ } else {
+ new_components[3] = 0.0f;
+ }
+ } else {
+ new_components[3] = 0.0f;
+ }
+ }
+ }
+
+ void ConvertFrom(std::span<const f32> input, std::span<u8> output) override {
+ const size_t num_pixels = output.size() / total_bytes_per_pixel;
+ for (size_t pixel = 0; pixel < num_pixels; pixel++) {
+ std::span<const f32> old_components(&input[pixel * components_per_ir_rep],
+ components_per_ir_rep);
+ std::array<u32, total_words_per_pixel> words{};
+ if constexpr (component_swizzle[0] != Swizzle::None) {
+ ConvertFromComponent<0>(words[bound_words[0]],
+ old_components[static_cast<size_t>(component_swizzle[0])]);
+ }
+ if constexpr (num_components >= 2) {
+ if constexpr (component_swizzle[1] != Swizzle::None) {
+ ConvertFromComponent<1>(
+ words[bound_words[1]],
+ old_components[static_cast<size_t>(component_swizzle[1])]);
+ }
+ }
+ if constexpr (num_components >= 3) {
+ if constexpr (component_swizzle[2] != Swizzle::None) {
+ ConvertFromComponent<2>(
+ words[bound_words[2]],
+ old_components[static_cast<size_t>(component_swizzle[2])]);
+ }
+ }
+ if constexpr (num_components >= 4) {
+ if constexpr (component_swizzle[3] != Swizzle::None) {
+ ConvertFromComponent<3>(
+ words[bound_words[3]],
+ old_components[static_cast<size_t>(component_swizzle[3])]);
+ }
+ }
+ std::memcpy(&output[pixel * total_bytes_per_pixel], words.data(),
+ total_bytes_per_pixel);
+ }
+ }
+
+ ConverterImpl() = default;
+ ~ConverterImpl() override = default;
+};
+
+struct ConverterFactory::ConverterFactoryImpl {
+ std::unordered_map<RenderTargetFormat, std::unique_ptr<Converter>> converters_cache;
+};
+
+ConverterFactory::ConverterFactory() {
+ impl = std::make_unique<ConverterFactoryImpl>();
+}
+
+ConverterFactory::~ConverterFactory() = default;
+
+Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) {
+ auto it = impl->converters_cache.find(format);
+ if (it == impl->converters_cache.end()) [[unlikely]] {
+ return BuildConverter(format);
+ }
+ return it->second.get();
+}
+
+class NullConverter : public Converter {
+public:
+ void ConvertTo([[maybe_unused]] std::span<const u8> input, std::span<f32> output) override {
+ std::fill(output.begin(), output.end(), 0.0f);
+ }
+ void ConvertFrom([[maybe_unused]] std::span<const f32> input, std::span<u8> output) override {
+ const u8 fill_value = 0U;
+ std::fill(output.begin(), output.end(), fill_value);
+ }
+ NullConverter() = default;
+ ~NullConverter() = default;
+};
+
+Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) {
+ switch (format) {
+ case RenderTargetFormat::R32G32B32A32_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32B32A32_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32B32A32_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32B32X32_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32B32X32_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32B32X32_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16B16A16_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16B16A16_SNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16B16A16_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16B16A16_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16B16A16_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32G32_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32G32_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16B16X16_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16B16X16_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A8R8G8B8_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A8R8G8B8_SRGB:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_SRGBTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A2B10G10R10_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A2B10G10R10_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A2R10G10B10_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A2R10G10B10_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A8B8G8R8_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A8B8G8R8_SRGB:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SRGBTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A8B8G8R8_SNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A8B8G8R8_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A8B8G8R8_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16_SNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16_SNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16G16_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16G16_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::B10G11R11_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<B10G11R11_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R32_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R32_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::X8R8G8B8_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::X8R8G8B8_SRGB:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_SRGBTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R5G6B5_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R5G6B5_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::A1R5G5B5_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<A1R5G5B5_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8G8_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8G8_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8G8_SNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8G8_SNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8G8_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8G8_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8G8_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8G8_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16_SNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16_SNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R16_FLOAT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R16_FLOATTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8_SNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8_SNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8_SINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8_SINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::R8_UINT:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<R8_UINTTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::X1R5G5B5_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<X1R5G5B5_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::X8B8G8R8_UNORM:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_UNORMTraits>>())
+ .first->second.get();
+ break;
+ case RenderTargetFormat::X8B8G8R8_SRGB:
+ return impl->converters_cache
+ .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_SRGBTraits>>())
+ .first->second.get();
+ break;
+ default: {
+ UNIMPLEMENTED_MSG("This format {} converter is not implemented", format);
+ return impl->converters_cache.emplace(format, std::make_unique<NullConverter>())
+ .first->second.get();
+ }
+ }
+}
+
+} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h
new file mode 100644
index 000000000..f9bdc516e
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/converter.h
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <memory>
+#include <span>
+
+#include "common/common_types.h"
+
+#include "video_core/gpu.h"
+
+namespace Tegra::Engines::Blitter {
+
+class Converter {
+public:
+ virtual void ConvertTo(std::span<const u8> input, std::span<f32> output) = 0;
+ virtual void ConvertFrom(std::span<const f32> input, std::span<u8> output) = 0;
+ virtual ~Converter() = default;
+};
+
+class ConverterFactory {
+public:
+ ConverterFactory();
+ ~ConverterFactory();
+
+ Converter* GetFormatConverter(RenderTargetFormat format);
+
+private:
+ Converter* BuildConverter(RenderTargetFormat format);
+
+ struct ConverterFactoryImpl;
+ std::unique_ptr<ConverterFactoryImpl> impl;
+};
+
+} // namespace Tegra::Engines::Blitter
diff --git a/src/video_core/engines/sw_blitter/generate_converters.py b/src/video_core/engines/sw_blitter/generate_converters.py
new file mode 100644
index 000000000..f641564f7
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/generate_converters.py
@@ -0,0 +1,136 @@
+# SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import re
+
+class Format:
+ def __init__(self, string_value):
+ self.name = string_value
+ tmp = string_value.split('_')
+ self.component_type = tmp[1]
+ component_data = re.findall(r"\w\d+", tmp[0])
+ self.num_components = len(component_data)
+ sizes = []
+ swizzle = []
+ for data in component_data:
+ swizzle.append(data[0])
+ sizes.append(int(data[1:]))
+ self.sizes = sizes
+ self.swizzle = swizzle
+
+ def build_component_type_array(self):
+ result = "{ "
+ b = False
+ for i in range(0, self.num_components):
+ if b:
+ result += ", "
+ b = True
+ result += "ComponentType::" + self.component_type
+ result += " }"
+ return result
+
+ def build_component_sizes_array(self):
+ result = "{ "
+ b = False
+ for i in range(0, self.num_components):
+ if b:
+ result += ", "
+ b = True
+ result += str(self.sizes[i])
+ result += " }"
+ return result
+
+ def build_component_swizzle_array(self):
+ result = "{ "
+ b = False
+ for i in range(0, self.num_components):
+ if b:
+ result += ", "
+ b = True
+ swizzle = self.swizzle[i]
+ if swizzle == "X":
+ swizzle = "None"
+ result += "Swizzle::" + swizzle
+ result += " }"
+ return result
+
+ def print_declaration(self):
+ print("struct " + self.name + "Traits {")
+ print(" static constexpr size_t num_components = " + str(self.num_components) + ";")
+ print(" static constexpr std::array<ComponentType, num_components> component_types = " + self.build_component_type_array() + ";")
+ print(" static constexpr std::array<size_t, num_components> component_sizes = " + self.build_component_sizes_array() + ";")
+ print(" static constexpr std::array<Swizzle, num_components> component_swizzle = " + self.build_component_swizzle_array() + ";")
+ print("};\n")
+
+ def print_case(self):
+ print("case RenderTargetFormat::" + self.name + ":")
+ print(" return impl->converters_cache")
+ print(" .emplace(format, std::make_unique<ConverterImpl<" + self.name + "Traits>>())")
+ print(" .first->second.get();")
+ print(" break;")
+
+txt = """
+R32G32B32A32_FLOAT
+R32G32B32A32_SINT
+R32G32B32A32_UINT
+R32G32B32X32_FLOAT
+R32G32B32X32_SINT
+R32G32B32X32_UINT
+R16G16B16A16_UNORM
+R16G16B16A16_SNORM
+R16G16B16A16_SINT
+R16G16B16A16_UINT
+R16G16B16A16_FLOAT
+R32G32_FLOAT
+R32G32_SINT
+R32G32_UINT
+R16G16B16X16_FLOAT
+A8R8G8B8_UNORM
+A8R8G8B8_SRGB
+A2B10G10R10_UNORM
+A2B10G10R10_UINT
+A2R10G10B10_UNORM
+A8B8G8R8_UNORM
+A8B8G8R8_SRGB
+A8B8G8R8_SNORM
+A8B8G8R8_SINT
+A8B8G8R8_UINT
+R16G16_UNORM
+R16G16_SNORM
+R16G16_SINT
+R16G16_UINT
+R16G16_FLOAT
+B10G11R11_FLOAT
+R32_SINT
+R32_UINT
+R32_FLOAT
+X8R8G8B8_UNORM
+X8R8G8B8_SRGB
+R5G6B5_UNORM
+A1R5G5B5_UNORM
+R8G8_UNORM
+R8G8_SNORM
+R8G8_SINT
+R8G8_UINT
+R16_UNORM
+R16_SNORM
+R16_SINT
+R16_UINT
+R16_FLOAT
+R8_UNORM
+R8_SNORM
+R8_SINT
+R8_UINT
+X1R5G5B5_UNORM
+X8B8G8R8_UNORM
+X8B8G8R8_SRGB
+"""
+
+x = txt.split()
+y = list(map(lambda a: Format(a), x))
+formats = list(y)
+for format in formats:
+ format.print_declaration()
+
+for format in formats:
+ format.print_case()
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d0709dc69..8a871593a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -27,12 +27,12 @@ struct CommandList;
// TODO: Implement the commented ones
enum class RenderTargetFormat : u32 {
NONE = 0x0,
- R32B32G32A32_FLOAT = 0xC0,
+ R32G32B32A32_FLOAT = 0xC0,
R32G32B32A32_SINT = 0xC1,
R32G32B32A32_UINT = 0xC2,
- // R32G32B32X32_FLOAT = 0xC3,
- // R32G32B32X32_SINT = 0xC4,
- // R32G32B32X32_UINT = 0xC5,
+ R32G32B32X32_FLOAT = 0xC3,
+ R32G32B32X32_SINT = 0xC4,
+ R32G32B32X32_UINT = 0xC5,
R16G16B16A16_UNORM = 0xC6,
R16G16B16A16_SNORM = 0xC7,
R16G16B16A16_SINT = 0xC8,
@@ -56,13 +56,13 @@ enum class RenderTargetFormat : u32 {
R16G16_SINT = 0xDC,
R16G16_UINT = 0xDD,
R16G16_FLOAT = 0xDE,
- // A2R10G10B10_UNORM = 0xDF,
+ A2R10G10B10_UNORM = 0xDF,
B10G11R11_FLOAT = 0xE0,
R32_SINT = 0xE3,
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
- // X8R8G8B8_UNORM = 0xE6,
- // X8R8G8B8_SRGB = 0xE7,
+ X8R8G8B8_UNORM = 0xE6,
+ X8R8G8B8_SRGB = 0xE7,
R5G6B5_UNORM = 0xE8,
A1R5G5B5_UNORM = 0xE9,
R8G8_UNORM = 0xEA,
@@ -79,11 +79,11 @@ enum class RenderTargetFormat : u32 {
R8_SINT = 0xF5,
R8_UINT = 0xF6,
- /*
- A8_UNORM = 0xF7,
+ // A8_UNORM = 0xF7,
X1R5G5B5_UNORM = 0xF8,
X8B8G8R8_UNORM = 0xF9,
X8B8G8R8_SRGB = 0xFA,
+ /*
Z1R5G5B5_UNORM = 0xFB,
O1R5G5B5_UNORM = 0xFC,
Z8R8G8B8_UNORM = 0xFD,
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 1bd477011..164a5252a 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -125,7 +125,7 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
state.queue.Push(CommandDataContainer(std::move(command_data), fence, block));
if (block) {
- state.cv.wait(lk, thread.get_stop_token(), [this, fence] {
+ Common::CondvarWait(state.cv, lk, thread.get_stop_token(), [this, fence] {
return fence <= state.signaled_fence.load(std::memory_order_relaxed);
});
}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 64628d3e3..c71a419c7 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,6 +10,7 @@
#include <thread>
#include <variant>
+#include "common/polyfill_thread.h"
#include "common/threadsafe_queue.h"
#include "video_core/framebuffer_config.h"
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
index a44fc83d3..8f23ce527 100644
--- a/src/video_core/host1x/syncpoint_manager.cpp
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -34,7 +34,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
}
void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
- ActionHandle& handle) {
+ const ActionHandle& handle) {
std::unique_lock lk(guard);
// We want to ensure the iterator still exists prior to erasing it
@@ -49,11 +49,11 @@ void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_stor
}
}
-void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) {
+void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, const ActionHandle& handle) {
DeregisterAction(guest_action_storage[syncpoint_id], handle);
}
-void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) {
+void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, const ActionHandle& handle) {
DeregisterAction(host_action_storage[syncpoint_id], handle);
}
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
index 50a264e23..847ed20c8 100644
--- a/src/video_core/host1x/syncpoint_manager.h
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -36,21 +36,19 @@ public:
template <typename Func>
ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
- std::function<void()> func(action);
return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id],
- expected_value, std::move(func));
+ expected_value, std::move(action));
}
template <typename Func>
ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
- std::function<void()> func(action);
return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id],
- expected_value, std::move(func));
+ expected_value, std::move(action));
}
- void DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle);
+ void DeregisterGuestAction(u32 syncpoint_id, const ActionHandle& handle);
- void DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle);
+ void DeregisterHostAction(u32 syncpoint_id, const ActionHandle& handle);
void IncrementGuest(u32 syncpoint_id);
@@ -76,7 +74,7 @@ private:
std::list<RegisteredAction>& action_storage, u32 expected_value,
std::function<void()>&& action);
- void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle);
+ void DeregisterAction(std::list<RegisteredAction>& action_storage, const ActionHandle& handle);
void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value);
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index f896591bf..0f3262edb 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -126,11 +126,25 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
}
}
-constexpr std::array<std::pair<u64, HLEFunction>, 4> hle_funcs{{
+// Multi-layer Clear
+void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
+ ASSERT(parameters.size() == 1);
+
+ const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
+ const u32 rt_index = clear_params.RT;
+ const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
+ ASSERT(clear_params.layer == 0);
+
+ maxwell3d.regs.clear_surface.raw = clear_params.raw;
+ maxwell3d.ProcessClearBuffers(num_layers);
+}
+
+constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
{0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
{0x0217920100488FF7, &HLE_0217920100488FF7},
{0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164},
+ {0xEAD26C3E2109B06B, &HLE_EAD26C3E2109B06B},
}};
class HLEMacroImpl final : public CachedMacro {
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index c0d32c112..0d63495a9 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -201,6 +201,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
}
default:
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value());
+ break;
}
// An instruction with the Exit flag will not actually
@@ -297,6 +298,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r
break;
default:
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
+ break;
}
}
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 25c1ce798..7347cbd88 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -652,6 +652,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
break;
default:
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
+ break;
}
}
diff --git a/src/video_core/precompiled_headers.h b/src/video_core/precompiled_headers.h
new file mode 100644
index 000000000..aabae730b
--- /dev/null
+++ b/src/video_core/precompiled_headers.h
@@ -0,0 +1,6 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_precompiled_headers.h"
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1cbfef090..b6907463c 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,8 +6,8 @@
#include <functional>
#include <optional>
#include <span>
-#include <stop_token>
#include "common/common_types.h"
+#include "common/polyfill_thread.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
@@ -43,7 +43,7 @@ public:
virtual void Draw(bool is_indexed, u32 instance_count) = 0;
/// Clear the current framebuffer
- virtual void Clear() = 0;
+ virtual void Clear(u32 layer_count) = 0;
/// Dispatches a compute shader invocation
virtual void DispatchCompute() = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
new file mode 100644
index 000000000..9734d84bc
--- /dev/null
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -0,0 +1,90 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "video_core/host1x/host1x.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_null/null_rasterizer.h"
+
+namespace Null {
+
+AccelerateDMA::AccelerateDMA() = default;
+
+bool AccelerateDMA::BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) {
+ return true;
+}
+bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
+ return true;
+}
+
+RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu)
+ : RasterizerAccelerated(cpu_memory_), m_gpu{gpu} {}
+RasterizerNull::~RasterizerNull() = default;
+
+void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
+void RasterizerNull::Clear(u32 layer_count) {}
+void RasterizerNull::DispatchCompute() {}
+void RasterizerNull::ResetCounter(VideoCore::QueryType type) {}
+void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
+ std::optional<u64> timestamp) {
+ if (!gpu_memory) {
+ return;
+ }
+
+ gpu_memory->Write(gpu_addr, u64{0});
+ if (timestamp) {
+ gpu_memory->Write(gpu_addr + 8, *timestamp);
+ }
+}
+void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {}
+void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {}
+void RasterizerNull::FlushAll() {}
+void RasterizerNull::FlushRegion(VAddr addr, u64 size) {}
+bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size) {
+ return false;
+}
+void RasterizerNull::InvalidateRegion(VAddr addr, u64 size) {}
+void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
+void RasterizerNull::InvalidateGPUCache() {}
+void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {}
+void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {}
+void RasterizerNull::SignalFence(std::function<void()>&& func) {
+ func();
+}
+void RasterizerNull::SyncOperation(std::function<void()>&& func) {
+ func();
+}
+void RasterizerNull::SignalSyncPoint(u32 value) {
+ auto& syncpoint_manager = m_gpu.Host1x().GetSyncpointManager();
+ syncpoint_manager.IncrementGuest(value);
+ syncpoint_manager.IncrementHost(value);
+}
+void RasterizerNull::SignalReference() {}
+void RasterizerNull::ReleaseFences() {}
+void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size) {}
+void RasterizerNull::WaitForIdle() {}
+void RasterizerNull::FragmentBarrier() {}
+void RasterizerNull::TiledCacheBarrier() {}
+void RasterizerNull::FlushCommands() {}
+void RasterizerNull::TickFrame() {}
+Tegra::Engines::AccelerateDMAInterface& RasterizerNull::AccessAccelerateDMA() {
+ return m_accelerate_dma;
+}
+bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Config& copy_config) {
+ return true;
+}
+void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<const u8> memory) {}
+bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config,
+ VAddr framebuffer_addr, u32 pixel_stride) {
+ return true;
+}
+void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {}
+void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {}
+void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {}
+void RasterizerNull::ReleaseChannel(s32 channel_id) {}
+
+} // namespace Null
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
new file mode 100644
index 000000000..ecf77ba42
--- /dev/null
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/rasterizer_accelerated.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Null {
+
+class RasterizerNull;
+
+class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
+public:
+ explicit AccelerateDMA();
+ bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
+ bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+};
+
+class RasterizerNull final : public VideoCore::RasterizerAccelerated,
+ protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
+public:
+ explicit RasterizerNull(Core::Memory::Memory& cpu_memory, Tegra::GPU& gpu);
+ ~RasterizerNull() override;
+
+ void Draw(bool is_indexed, u32 instance_count) override;
+ void Clear(u32 layer_count) override;
+ void DispatchCompute() override;
+ void ResetCounter(VideoCore::QueryType type) override;
+ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
+ void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
+ void FlushAll() override;
+ void FlushRegion(VAddr addr, u64 size) override;
+ bool MustFlushRegion(VAddr addr, u64 size) override;
+ void InvalidateRegion(VAddr addr, u64 size) override;
+ void OnCPUWrite(VAddr addr, u64 size) override;
+ void InvalidateGPUCache() override;
+ void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
+ void SignalFence(std::function<void()>&& func) override;
+ void SyncOperation(std::function<void()>&& func) override;
+ void SignalSyncPoint(u32 value) override;
+ void SignalReference() override;
+ void ReleaseFences() override;
+ void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ void WaitForIdle() override;
+ void FragmentBarrier() override;
+ void TiledCacheBarrier() override;
+ void FlushCommands() override;
+ void TickFrame() override;
+ bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
+ void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<const u8> memory) override;
+ bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
+ u32 pixel_stride) override;
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) override;
+ void InitializeChannel(Tegra::Control::ChannelState& channel) override;
+ void BindChannel(Tegra::Control::ChannelState& channel) override;
+ void ReleaseChannel(s32 channel_id) override;
+
+private:
+ Tegra::GPU& m_gpu;
+ AccelerateDMA m_accelerate_dma;
+};
+
+} // namespace Null
diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp
new file mode 100644
index 000000000..e2a189b63
--- /dev/null
+++ b/src/video_core/renderer_null/renderer_null.cpp
@@ -0,0 +1,24 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "video_core/renderer_null/renderer_null.h"
+
+namespace Null {
+
+RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+ : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(cpu_memory, gpu) {}
+
+RendererNull::~RendererNull() = default;
+
+void RendererNull::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ if (!framebuffer) {
+ return;
+ }
+
+ m_gpu.RendererFrameEndNotify();
+ render_window.OnFrameDisplayed();
+}
+
+} // namespace Null
diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h
new file mode 100644
index 000000000..967ff5645
--- /dev/null
+++ b/src/video_core/renderer_null/renderer_null.h
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_null/null_rasterizer.h"
+
+namespace Null {
+
+class RendererNull final : public VideoCore::RendererBase {
+public:
+ explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
+ ~RendererNull() override;
+
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+
+ VideoCore::RasterizerInterface* ReadRasterizer() override {
+ return &m_rasterizer;
+ }
+
+ [[nodiscard]] std::string GetDeviceVendor() const override {
+ return "NULL";
+ }
+
+private:
+ Tegra::GPU& m_gpu;
+ RasterizerNull m_rasterizer;
+};
+
+} // namespace Null
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 1663e277d..e2e3dac34 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -14,6 +14,7 @@
#include "common/literals.h"
#include "common/logging/log.h"
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d05a5f60b..f71a316b6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -136,7 +136,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load
shader_cache.LoadDiskResources(title_id, stop_loading, callback);
}
-void RasterizerOpenGL::Clear() {
+void RasterizerOpenGL::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(OpenGL_Clears);
if (!maxwell3d->ShouldExecute()) {
return;
@@ -222,8 +222,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
- BindInlineIndexBuffer();
-
SyncState();
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology);
@@ -468,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
std::scoped_lock lock{texture_cache.mutex};
- texture_cache.BlitImage(dst, src, copy_config);
- return true;
+ return texture_cache.BlitImage(dst, src, copy_config);
}
Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
@@ -1140,16 +1137,6 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
-void RasterizerOpenGL::BindInlineIndexBuffer() {
- if (maxwell3d->inline_index_draw_indexes.empty()) {
- return;
- }
- const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size());
- auto buffer = Buffer(buffer_cache_runtime, *this, 0, data_count);
- buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes);
- buffer_cache_runtime.BindIndexBuffer(buffer, 0, data_count);
-}
-
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 793e0d608..fc183c3ca 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -69,7 +69,7 @@ public:
~RasterizerOpenGL() override;
void Draw(bool is_indexed, u32 instance_count) override;
- void Clear() override;
+ void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
@@ -199,8 +199,6 @@ private:
/// End a transform feedback
void EndTransformFeedback();
- void BindInlineIndexBuffer();
-
Tegra::GPU& gpu;
const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3fe04a115..a38060100 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -39,6 +39,7 @@ using Shader::Backend::GLASM::EmitGLASM;
using Shader::Backend::GLSL::EmitGLSL;
using Shader::Backend::SPIRV::EmitSPIRV;
using Shader::Maxwell::ConvertLegacyToGeneric;
+using Shader::Maxwell::GenerateGeometryPassthrough;
using Shader::Maxwell::MergeDualVertexPrograms;
using Shader::Maxwell::TranslateProgram;
using VideoCommon::ComputeEnvironment;
@@ -56,6 +57,17 @@ auto MakeSpan(Container& container) {
return std::span(container.data(), container.size());
}
+Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) {
+ switch (topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ return Shader::OutputTopology::PointList;
+ case Maxwell::PrimitiveTopology::LineStrip:
+ return Shader::OutputTopology::LineStrip;
+ default:
+ return Shader::OutputTopology::TriangleStrip;
+ }
+}
+
Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
const Shader::IR::Program& program,
const Shader::IR::Program* previous_program,
@@ -220,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
+ .support_viewport_index_layer = device.HasVertexViewportLayer(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();
@@ -314,9 +327,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
const auto& regs{maxwell3d->regs};
graphics_key.raw = 0;
graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0);
- graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
- ? regs.draw.topology.Value()
- : Maxwell::PrimitiveTopology{});
+ graphics_key.gs_input_topology.Assign(regs.draw.topology.Value());
graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value());
graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value());
graphics_key.tessellation_clockwise.Assign(
@@ -415,7 +426,19 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
const bool uses_vertex_a{key.unique_hashes[0] != 0};
const bool uses_vertex_b{key.unique_hashes[1] != 0};
+
+ // Layer passthrough generation for devices without GL_ARB_shader_viewport_layer_array
+ Shader::IR::Program* layer_source_program{};
+
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ const bool is_emulated_stage = layer_source_program != nullptr &&
+ index == static_cast<u32>(Maxwell::ShaderType::Geometry);
+ if (key.unique_hashes[index] == 0 && is_emulated_stage) {
+ auto topology = MaxwellToOutputTopology(key.gs_input_topology);
+ programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info,
+ *layer_source_program, topology);
+ continue;
+ }
if (key.unique_hashes[index] == 0) {
continue;
}
@@ -443,6 +466,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors);
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
+
+ if (programs[index].info.requires_layer_emulation) {
+ layer_source_program = &programs[index];
+ }
}
const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
@@ -456,7 +483,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const bool use_glasm{device.UseAssemblyShaders()};
const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
- if (key.unique_hashes[index] == 0) {
+ const bool is_emulated_stage = layer_source_program != nullptr &&
+ index == static_cast<u32>(Maxwell::ShaderType::Geometry);
+ if (key.unique_hashes[index] == 0 && !is_emulated_stage) {
continue;
}
UNIMPLEMENTED_IF(index == 0);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 89f181fe3..53ffea904 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -4,7 +4,6 @@
#pragma once
#include <filesystem>
-#include <stop_token>
#include <unordered_map>
#include "common/common_types.h"
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 99cd11d1e..9f7ce7414 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -891,6 +891,7 @@ void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t b
break;
default:
ASSERT(false);
+ break;
}
}
@@ -927,6 +928,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
break;
default:
ASSERT(false);
+ break;
}
// Compressed formats don't have a pixel format or type
const bool is_compressed = gl_format == GL_NONE;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index e14f9b2db..ef1190e1f 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -28,6 +28,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2R10G10B10_UNORM
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 8bd5eba7e..f29462f7c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -340,6 +340,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
// static_cast<u32>(framebuffer.pixel_format));
+ break;
}
texture.resource.Release();
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 98cc26679..f3f08b42c 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -7,6 +7,7 @@
#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/common_types.h"
+#include "common/polyfill_ranges.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 5c156087b..3e03c5cd6 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -58,7 +58,7 @@ VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wra
case Tegra::Texture::WrapMode::Border:
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
case Tegra::Texture::WrapMode::Clamp:
- if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
+ if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY) {
// Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
// by sending an invalid enumeration.
return static_cast<VkSamplerAddressMode>(0xcafe);
@@ -125,6 +125,7 @@ struct FormatTuple {
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
+ {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
{VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled)
{VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
@@ -149,7 +150,7 @@ struct FormatTuple {
{VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT
{VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM
- {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM
+ {VK_FORMAT_B8G8R8A8_UNORM, Attachable | Storage}, // B8G8R8A8_UNORM
{VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT
{VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT
{VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT
@@ -159,7 +160,7 @@ struct FormatTuple {
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
{VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM
{VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
- {VK_FORMAT_UNDEFINED}, // R16_SINT
+ {VK_FORMAT_R16_SINT, Attachable | Storage}, // R16_SINT
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
{VK_FORMAT_R16G16_UINT, Attachable | Storage}, // R16G16_UINT
@@ -183,7 +184,7 @@ struct FormatTuple {
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
{VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB
- {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM
+ {VK_FORMAT_R4G4B4A4_UNORM_PACK16}, // A4B4G4R4_UNORM
{VK_FORMAT_R4G4_UNORM_PACK8}, // G4R4_UNORM
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index b7843e995..28b893e25 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -44,17 +44,17 @@ public:
});
}
- vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
- VkPipelineLayout pipeline_layout,
- bool use_push_descriptor) const {
+ vk::DescriptorUpdateTemplate CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
+ VkPipelineLayout pipeline_layout,
+ bool use_push_descriptor) const {
if (entries.empty()) {
return nullptr;
}
const VkDescriptorUpdateTemplateType type =
use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
- : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
- return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
+ return device->GetLogical().CreateDescriptorUpdateTemplate({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
@@ -129,7 +129,7 @@ private:
const Device* device{};
bool is_compute{};
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
- boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
+ boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 32> entries;
u32 binding{};
u32 num_descriptors{};
size_t offset{};
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index d8131232a..18be54729 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -45,14 +45,14 @@ std::string GetDriverVersion(const Device& device) {
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
const u32 version = device.GetDriverVersion();
- if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
+ if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY) {
const u32 major = (version >> 22) & 0x3ff;
const u32 minor = (version >> 14) & 0x0ff;
const u32 secondary = (version >> 6) & 0x0ff;
const u32 tertiary = version & 0x003f;
return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
}
- if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
+ if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
const u32 major = version >> 14;
const u32 minor = version & 0x3fff;
return fmt::format("{}.{}", major, minor);
@@ -172,6 +172,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
}
void RendererVulkan::Report() const {
+ using namespace Common::Literals;
const std::string vendor_name{device.GetVendorName()};
const std::string model_name{device.GetModelName()};
const std::string driver_version = GetDriverVersion(device);
@@ -181,9 +182,12 @@ void RendererVulkan::Report() const {
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
+ const auto available_vram = static_cast<f64>(device.GetDeviceLocalMemory()) / f64{1_GiB};
+
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
+ LOG_INFO(Render_Vulkan, "Available VRAM: {:.2f} GiB", available_vram);
static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 89426121f..6e5abade4 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -10,6 +10,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 241d7573e..2c00979d7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -93,7 +93,7 @@ constexpr DescriptorBankInfo ASTC_BANK_INFO{
.score = 2,
};
-constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
+constexpr VkDescriptorUpdateTemplateEntry INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 2,
@@ -102,7 +102,7 @@ constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMP
.stride = sizeof(DescriptorUpdateEntry),
};
-constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
+constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS>
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
{
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
@@ -134,7 +134,7 @@ struct AstcPushConstants {
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
- vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+ vk::Span<VkDescriptorUpdateTemplateEntry> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
: device{device_} {
@@ -155,13 +155,13 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
.pPushConstantRanges = push_constants.data(),
});
if (!templates.empty()) {
- descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplate({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = templates.size(),
.pDescriptorUpdateEntries = templates.data(),
- .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+ .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
.descriptorSetLayout = *descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = *layout,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index dcc691a8e..5d32e3caf 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -29,14 +29,14 @@ class ComputePass {
public:
explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
- vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+ vk::Span<VkDescriptorUpdateTemplateEntry> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~ComputePass();
protected:
const Device& device;
- vk::DescriptorUpdateTemplateKHR descriptor_template;
+ vk::DescriptorUpdateTemplate descriptor_template;
vk::PipelineLayout layout;
vk::Pipeline pipeline;
vk::DescriptorSetLayout descriptor_set_layout;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 7906e11a8..04a3a861e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -53,7 +53,7 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
.requiredSubgroupSize = GuestWarpSize,
};
VkPipelineCreateFlags flags{};
- if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
+ if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateComputePipeline({
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 9879735fe..d70837fc5 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -55,7 +55,7 @@ private:
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
vk::PipelineLayout pipeline_layout;
- vk::DescriptorUpdateTemplateKHR descriptor_update_template;
+ vk::DescriptorUpdateTemplate descriptor_update_template;
vk::Pipeline pipeline;
std::condition_variable build_condvar;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index c7196b64e..b5ae6443c 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -7,6 +7,7 @@
#include <vector>
#include "common/common_types.h"
+#include "common/polyfill_ranges.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index dd450169e..33daa8c1c 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -5,6 +5,7 @@
#include "common/bit_cast.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
+#include "common/settings.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
@@ -227,7 +228,10 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
- FsrRcasCon(push_constants.data(), 0.25f);
+ const float sharpening =
+ static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
+
+ FsrRcasCon(push_constants.data(), sharpening);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index ef75c126c..e77a57a4a 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -830,7 +830,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
*/
}
VkPipelineCreateFlags flags{};
- if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
+ if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateGraphicsPipeline({
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 6bf577d25..1ed2967be 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -151,7 +151,7 @@ private:
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
vk::PipelineLayout pipeline_layout;
- vk::DescriptorUpdateTemplateKHR descriptor_update_template;
+ vk::DescriptorUpdateTemplate descriptor_update_template;
vk::Pipeline pipeline;
std::condition_variable build_condvar;
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 4e81d3d28..8aa07ef9d 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -11,10 +11,10 @@
namespace Vulkan {
MasterSemaphore::MasterSemaphore(const Device& device) {
- static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
+ static constexpr VkSemaphoreTypeCreateInfo semaphore_type_ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.pNext = nullptr,
- .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = 0,
};
static constexpr VkSemaphoreCreateInfo semaphore_ci{
@@ -28,7 +28,7 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
return;
}
// Validation layers have a bug where they fail to track resource usage when using timeline
- // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
+ // semaphores and synchronizing with GetSemaphoreCounterValue. To workaround this issue, have
// a separate thread waiting for each timeline semaphore value.
debug_thread = std::jthread([this](std::stop_token stop_token) {
u64 counter = 0;
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 362ed579a..689f02ea5 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -7,6 +7,7 @@
#include <thread>
#include "common/common_types.h"
+#include "common/polyfill_thread.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index d4b0a542a..38a6b7488 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -46,6 +46,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);
namespace {
using Shader::Backend::SPIRV::EmitSPIRV;
using Shader::Maxwell::ConvertLegacyToGeneric;
+using Shader::Maxwell::GenerateGeometryPassthrough;
using Shader::Maxwell::MergeDualVertexPrograms;
using Shader::Maxwell::TranslateProgram;
using VideoCommon::ComputeEnvironment;
@@ -53,13 +54,24 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
-constexpr u32 CACHE_VERSION = 7;
+constexpr u32 CACHE_VERSION = 8;
template <typename Container>
auto MakeSpan(Container& container) {
return std::span(container.data(), container.size());
}
+Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) {
+ switch (topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ return Shader::OutputTopology::PointList;
+ case Maxwell::PrimitiveTopology::LineStrip:
+ return Shader::OutputTopology::LineStrip;
+ default:
+ return Shader::OutputTopology::TriangleStrip;
+ }
+}
+
Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never_D3D:
@@ -275,9 +287,9 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
- const VkDriverIdKHR driver_id{device.GetDriverID()};
+ const VkDriverId driver_id{device.GetDriverID()};
profile = Shader::Profile{
- .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
+ .supported_spirv = device.SupportedSpirvVersion(),
.unified_descriptor_binding = true,
.support_descriptor_aliasing = true,
.support_int8 = device.IsInt8Supported(),
@@ -285,10 +297,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false,
.support_float_controls = true,
- .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
- VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+ .support_separate_denorm_behavior =
+ float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_separate_rounding_mode =
- float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+ float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
@@ -315,18 +327,19 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.lower_left_origin_mode = false,
.need_declared_frag_colors = false,
- .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
+ .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
.has_broken_unsigned_image_offsets = false,
.has_broken_signed_operations = false,
- .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
+ .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
.ignore_nan_fp_comparisons = false,
};
host_info = Shader::HostTranslateInfo{
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
- .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
- driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
+ .needs_demote_reorder =
+ driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
.support_snorm_render_buffer = true,
+ .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
};
}
@@ -395,7 +408,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
std::unique_ptr<PipelineStatistics> statistics;
} state;
- if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
+ if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
state.statistics = std::make_unique<PipelineStatistics>(device);
}
const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
@@ -509,7 +522,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
const bool uses_vertex_a{key.unique_hashes[0] != 0};
const bool uses_vertex_b{key.unique_hashes[1] != 0};
+
+ // Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer
+ Shader::IR::Program* layer_source_program{};
+
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ const bool is_emulated_stage = layer_source_program != nullptr &&
+ index == static_cast<u32>(Maxwell::ShaderType::Geometry);
+ if (key.unique_hashes[index] == 0 && is_emulated_stage) {
+ auto topology = MaxwellToOutputTopology(key.state.topology);
+ programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info,
+ *layer_source_program, topology);
+ continue;
+ }
if (key.unique_hashes[index] == 0) {
continue;
}
@@ -530,6 +555,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
+
+ if (programs[index].info.requires_layer_emulation) {
+ layer_source_program = &programs[index];
+ }
}
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
@@ -538,7 +567,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
Shader::Backend::Bindings binding;
for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
++index) {
- if (key.unique_hashes[index] == 0) {
+ const bool is_emulated_stage = layer_source_program != nullptr &&
+ index == static_cast<u32>(Maxwell::ShaderType::Geometry);
+ if (key.unique_hashes[index] == 0 && !is_emulated_stage) {
continue;
}
UNIMPLEMENTED_IF(index == 0);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 4b15c0f85..929c8ece6 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -98,7 +98,7 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
const vk::Device* logical = &cache.GetDevice().GetLogical();
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
- logical->ResetQueryPoolEXT(query.first, query.second, 1);
+ logical->ResetQueryPool(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
});
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f69c0c50f..d8ad8815c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -191,8 +191,6 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
- BindInlineIndexBuffer();
-
BeginTransformFeedback();
UpdateDynamicStates();
@@ -213,7 +211,7 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
EndTransformFeedback();
}
-void RasterizerVulkan::Clear() {
+void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
if (!maxwell3d->ShouldExecute()) {
@@ -256,7 +254,7 @@ void RasterizerVulkan::Clear() {
.rect = regs.clear_control.use_scissor ? GetScissorState(regs, 0, up_scale, down_shift)
: default_scissor,
.baseArrayLayer = regs.clear_surface.layer,
- .layerCount = 1,
+ .layerCount = layer_count,
};
if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
return;
@@ -544,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
std::scoped_lock lock{texture_cache.mutex};
- texture_cache.BlitImage(dst, src, copy_config);
- return true;
+ return texture_cache.BlitImage(dst, src, copy_config);
}
Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() {
@@ -1029,17 +1026,4 @@ void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
-void RasterizerVulkan::BindInlineIndexBuffer() {
- if (maxwell3d->inline_index_draw_indexes.empty()) {
- return;
- }
- const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size());
- auto buffer = buffer_cache_runtime.UploadStagingBuffer(data_count);
- std::memcpy(buffer.mapped_span.data(), maxwell3d->inline_index_draw_indexes.data(), data_count);
- buffer_cache_runtime.BindIndexBuffer(
- maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format,
- maxwell3d->regs.index_buffer.first, maxwell3d->regs.index_buffer.count, buffer.buffer,
- static_cast<u32>(buffer.offset), data_count);
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index b0bc306f5..ee483cfd9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -65,7 +65,7 @@ public:
~RasterizerVulkan() override;
void Draw(bool is_indexed, u32 instance_count) override;
- void Clear() override;
+ void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
@@ -141,8 +141,6 @@ private:
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
- void BindInlineIndexBuffer();
-
Tegra::GPU& gpu;
ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
index dc21b7e69..91ad4bf57 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -12,7 +12,7 @@
namespace Vulkan {
struct RenderPassKey {
- auto operator<=>(const RenderPassKey&) const noexcept = default;
+ bool operator==(const RenderPassKey&) const noexcept = default;
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
VideoCore::Surface::PixelFormat depth_format;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 7934f2a51..c2e53a5d5 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -145,7 +145,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
if (work_queue.empty()) {
wait_cv.notify_all();
}
- work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
+ Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); });
if (stop_token.stop_requested()) {
continue;
}
@@ -194,8 +194,8 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
};
- const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+ const VkTimelineSemaphoreSubmitInfo timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
@@ -221,6 +221,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
[[fallthrough]];
default:
vk::Check(result);
+ break;
}
});
chunk->MarkSubmit();
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 929216749..3858c506c 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -12,6 +12,7 @@
#include "common/alignment.h"
#include "common/common_types.h"
+#include "common/polyfill_thread.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 706d9ba74..d7be417f5 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -7,6 +7,7 @@
#include <vector>
#include "common/logging/log.h"
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 853b80d8a..a65bbeb1c 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -108,6 +108,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
break;
default:
ASSERT_MSG(false, "Invalid surface type");
+ break;
}
}
if (info.storage) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index a4391202d..f3cc4c70b 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -12,6 +12,7 @@
#include <vector>
#include "common/common_types.h"
+#include "common/polyfill_ranges.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/shader_environment.h"
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index f24f320b6..958810747 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -15,6 +15,7 @@
#include "common/fs/fs.h"
#include "common/fs/path_util.h"
#include "common/logging/log.h"
+#include "common/polyfill_ranges.h"
#include "shader_recompiler/environment.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/memory_manager.h"
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index bb55b029f..1342fab1e 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -10,12 +10,12 @@
#include <memory>
#include <optional>
#include <span>
-#include <stop_token>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
+#include "common/polyfill_thread.h"
#include "common/unique_function.h"
#include "shader_recompiler/environment.h"
#include "video_core/engines/maxwell_3d.h"
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 6bd133d10..1a76d4178 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -93,11 +93,14 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
- case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT:
+ case Tegra::RenderTargetFormat::R32G32B32A32_FLOAT:
+ case Tegra::RenderTargetFormat::R32G32B32X32_FLOAT:
return PixelFormat::R32G32B32A32_FLOAT;
case Tegra::RenderTargetFormat::R32G32B32A32_SINT:
+ case Tegra::RenderTargetFormat::R32G32B32X32_SINT:
return PixelFormat::R32G32B32A32_SINT;
case Tegra::RenderTargetFormat::R32G32B32A32_UINT:
+ case Tegra::RenderTargetFormat::R32G32B32X32_UINT:
return PixelFormat::R32G32B32A32_UINT;
case Tegra::RenderTargetFormat::R16G16B16A16_UNORM:
return PixelFormat::R16G16B16A16_UNORM;
@@ -118,16 +121,22 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT:
return PixelFormat::R16G16B16X16_FLOAT;
case Tegra::RenderTargetFormat::A8R8G8B8_UNORM:
+ case Tegra::RenderTargetFormat::X8R8G8B8_UNORM:
return PixelFormat::B8G8R8A8_UNORM;
case Tegra::RenderTargetFormat::A8R8G8B8_SRGB:
+ case Tegra::RenderTargetFormat::X8R8G8B8_SRGB:
return PixelFormat::B8G8R8A8_SRGB;
case Tegra::RenderTargetFormat::A2B10G10R10_UNORM:
return PixelFormat::A2B10G10R10_UNORM;
case Tegra::RenderTargetFormat::A2B10G10R10_UINT:
return PixelFormat::A2B10G10R10_UINT;
+ case Tegra::RenderTargetFormat::A2R10G10B10_UNORM:
+ return PixelFormat::A2R10G10B10_UNORM;
case Tegra::RenderTargetFormat::A8B8G8R8_UNORM:
+ case Tegra::RenderTargetFormat::X8B8G8R8_UNORM:
return PixelFormat::A8B8G8R8_UNORM;
case Tegra::RenderTargetFormat::A8B8G8R8_SRGB:
+ case Tegra::RenderTargetFormat::X8B8G8R8_SRGB:
return PixelFormat::A8B8G8R8_SRGB;
case Tegra::RenderTargetFormat::A8B8G8R8_SNORM:
return PixelFormat::A8B8G8R8_SNORM;
@@ -156,6 +165,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
case Tegra::RenderTargetFormat::R5G6B5_UNORM:
return PixelFormat::R5G6B5_UNORM;
case Tegra::RenderTargetFormat::A1R5G5B5_UNORM:
+ case Tegra::RenderTargetFormat::X1R5G5B5_UNORM:
return PixelFormat::A1R5G5B5_UNORM;
case Tegra::RenderTargetFormat::R8G8_UNORM:
return PixelFormat::R8G8_UNORM;
@@ -204,23 +214,16 @@ PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format)
}
SurfaceType GetFormatType(PixelFormat pixel_format) {
- if (static_cast<std::size_t>(pixel_format) <
- static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
+ if (pixel_format < PixelFormat::MaxColorFormat) {
return SurfaceType::ColorTexture;
}
-
- if (static_cast<std::size_t>(pixel_format) <
- static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
+ if (pixel_format < PixelFormat::MaxDepthFormat) {
return SurfaceType::Depth;
}
-
- if (static_cast<std::size_t>(pixel_format) <
- static_cast<std::size_t>(PixelFormat::MaxStencilFormat)) {
+ if (pixel_format < PixelFormat::MaxStencilFormat) {
return SurfaceType::Stencil;
}
-
- if (static_cast<std::size_t>(pixel_format) <
- static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
+ if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
return SurfaceType::DepthStencil;
}
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 57ca7f597..44b79af20 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -23,6 +23,7 @@ enum class PixelFormat {
A1R5G5B5_UNORM,
A2B10G10R10_UNORM,
A2B10G10R10_UINT,
+ A2R10G10B10_UNORM,
A1B5G5R5_UNORM,
A5B5G5R1_UNORM,
R8_UNORM,
@@ -159,6 +160,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
1, // A1R5G5B5_UNORM
1, // A2B10G10R10_UNORM
1, // A2B10G10R10_UINT
+ 1, // A2R10G10B10_UNORM
1, // A1B5G5R5_UNORM
1, // A5B5G5R1_UNORM
1, // R8_UNORM
@@ -264,6 +266,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
1, // A1R5G5B5_UNORM
1, // A2B10G10R10_UNORM
1, // A2B10G10R10_UINT
+ 1, // A2R10G10B10_UNORM
1, // A1B5G5R5_UNORM
1, // A5B5G5R1_UNORM
1, // R8_UNORM
@@ -369,6 +372,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
16, // A1R5G5B5_UNORM
32, // A2B10G10R10_UNORM
32, // A2B10G10R10_UINT
+ 32, // A2R10G10B10_UNORM
16, // A1B5G5R5_UNORM
16, // A5B5G5R1_UNORM
8, // R8_UNORM
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index ee4f2d406..418890126 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
#include <string>
+#include "common/polyfill_ranges.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index acc854715..f1f0a057b 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -35,6 +35,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "A2B10G10R10_UNORM";
case PixelFormat::A2B10G10R10_UINT:
return "A2B10G10R10_UINT";
+ case PixelFormat::A2R10G10B10_UNORM:
+ return "A2R10G10B10_UNORM";
case PixelFormat::A1B5G5R5_UNORM:
return "A1B5G5R5_UNORM";
case PixelFormat::A5B5G5R1_UNORM:
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
index 1efbd6507..0829d773a 100644
--- a/src/video_core/texture_cache/render_targets.h
+++ b/src/video_core/texture_cache/render_targets.h
@@ -13,7 +13,7 @@ namespace VideoCommon {
/// Framebuffer properties used to lookup a framebuffer
struct RenderTargets {
- constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
+ constexpr bool operator==(const RenderTargets&) const noexcept = default;
constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
const auto contains = [elements](ImageViewId item) {
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 46e8a86e6..1e2aad76a 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/polyfill_ranges.h"
namespace VideoCommon {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8ef75fe73..8e68a2e53 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -506,10 +506,14 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
}
template <class P>
-void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
+bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy) {
- const BlitImages images = GetBlitImages(dst, src, copy);
+ const auto result = GetBlitImages(dst, src, copy);
+ if (!result) {
+ return false;
+ }
+ const BlitImages images = *result;
const ImageId dst_id = images.dst_id;
const ImageId src_id = images.src_id;
@@ -596,6 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
copy.operation);
}
+ return true;
}
template <class P>
@@ -1133,7 +1138,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
}
template <class P>
-typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
+std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImages(
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy) {
@@ -1154,6 +1159,20 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
has_deleted_images = false;
src_id = FindImage(src_info, src_addr, try_options);
dst_id = FindImage(dst_info, dst_addr, try_options);
+ if (!copy.must_accelerate) {
+ do {
+ if (!src_id && !dst_id) {
+ return std::nullopt;
+ }
+ if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) {
+ break;
+ }
+ if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) {
+ break;
+ }
+ return std::nullopt;
+ } while (false);
+ }
const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
if (src_image && src_image->info.num_samples > 1) {
RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
@@ -1194,12 +1213,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
} while (has_deleted_images);
}
- return BlitImages{
+ return {BlitImages{
.dst_id = dst_id,
.src_id = src_id,
.dst_format = dst_info.format,
.src_format = src_info.format,
- };
+ }};
}
template <class P>
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 2fa8445eb..587339a31 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -16,6 +16,7 @@
#include "common/hash.h"
#include "common/literals.h"
#include "common/lru_cache.h"
+#include "common/polyfill_ranges.h"
#include "video_core/compatible_formats.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
@@ -60,8 +61,6 @@ public:
TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept;
TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
- TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default;
- TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
@@ -174,7 +173,7 @@ public:
void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
- void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
+ bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
@@ -285,9 +284,9 @@ private:
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
/// Return a blit image pair from the given guest blit parameters
- [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
- const Tegra::Engines::Fermi2D::Surface& src,
- const Tegra::Engines::Fermi2D::Config& copy);
+ [[nodiscard]] std::optional<BlitImages> GetBlitImages(
+ const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Config& copy);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 69a32819a..e8d7c7863 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -15,6 +15,7 @@
#include "common/alignment.h"
#include "common/common_types.h"
+#include "common/polyfill_ranges.h"
#include "common/thread_worker.h"
#include "video_core/textures/astc.h"
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index fd1a4b987..59120cd09 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -170,6 +170,7 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
+ break;
}
}
@@ -217,6 +218,7 @@ void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_p
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
+ break;
}
}
@@ -240,6 +242,7 @@ void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
+ break;
}
}
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
index 45071185a..155599316 100644
--- a/src/video_core/transform_feedback.cpp
+++ b/src/video_core/transform_feedback.cpp
@@ -7,6 +7,7 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/polyfill_ranges.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/transform_feedback.h"
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 04ac4af11..fedb4a7bb 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -7,6 +7,7 @@
#include "common/settings.h"
#include "core/core.h"
#include "video_core/renderer_base.h"
+#include "video_core/renderer_null/renderer_null.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/video_core.h"
@@ -26,6 +27,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
case Settings::RendererBackend::Vulkan:
return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
gpu, std::move(context));
+ case Settings::RendererBackend::Null:
+ return std::make_unique<Null::RendererNull>(emu_window, cpu_memory, gpu,
+ std::move(context));
default:
return nullptr;
}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index ddecfca13..33856fe59 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/literals.h"
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -74,23 +75,8 @@ enum class NvidiaArchitecture {
};
constexpr std::array REQUIRED_EXTENSIONS{
- VK_KHR_MAINTENANCE1_EXTENSION_NAME,
- VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
- VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
- VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
- VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
- VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
- VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
- VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
- VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
- VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
- VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
- VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
- VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
- VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
- VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
#ifdef _WIN32
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
#endif
@@ -99,6 +85,17 @@ constexpr std::array REQUIRED_EXTENSIONS{
#endif
};
+constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_2{
+ VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
+ VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
+ VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
+ VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
+};
+
+constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_3{
+ VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
+};
+
template <typename T>
void SetNext(void**& next, T& data) {
*next = &data;
@@ -308,10 +305,10 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
shading_rate_props.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
- VkPhysicalDeviceProperties2KHR physical_properties{};
- physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ VkPhysicalDeviceProperties2 physical_properties{};
+ physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
physical_properties.pNext = &shading_rate_props;
- physical.GetProperties2KHR(physical_properties);
+ physical.GetProperties2(physical_properties);
if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
// Only Ampere and newer support this feature
return NvidiaArchitecture::AmpereOrNewer;
@@ -327,7 +324,8 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
const vk::InstanceDispatch& dld_)
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
- supported_extensions{GetSupportedExtensions(physical)},
+ instance_version{properties.apiVersion}, supported_extensions{GetSupportedExtensions(
+ physical)},
format_properties(GetFormatProperties(physical)) {
CheckSuitability(surface != nullptr);
SetupFamilies(surface);
@@ -401,15 +399,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const void* first_next = &features2;
void** next = &features2.pNext;
- VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
+ VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
.pNext = nullptr,
.timelineSemaphore = true,
};
SetNext(next, timeline_semaphore);
- VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
+ VkPhysicalDevice16BitStorageFeatures bit16_storage{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES,
.pNext = nullptr,
.storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
@@ -418,8 +416,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
};
SetNext(next, bit16_storage);
- VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR,
+ VkPhysicalDevice8BitStorageFeatures bit8_storage{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES,
.pNext = nullptr,
.storageBuffer8BitAccess = false,
.uniformAndStorageBuffer8BitAccess = true,
@@ -436,32 +434,39 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
};
SetNext(next, robustness2);
- VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
+ VkPhysicalDeviceHostQueryResetFeatures host_query_reset{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES,
.pNext = nullptr,
.hostQueryReset = true,
};
SetNext(next, host_query_reset);
- VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR,
+ VkPhysicalDeviceVariablePointerFeatures variable_pointers{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES,
.pNext = nullptr,
.variablePointersStorageBuffer = VK_TRUE,
.variablePointers = VK_TRUE,
};
SetNext(next, variable_pointers);
- VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES,
.pNext = nullptr,
.shaderDemoteToHelperInvocation = true,
};
SetNext(next, demote);
- VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
+ VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES,
+ .pNext = nullptr,
+ .shaderDrawParameters = true,
+ };
+ SetNext(next, draw_parameters);
+
+ VkPhysicalDeviceShaderFloat16Int8Features float16_int8;
if (is_int8_supported || is_float16_supported) {
float16_int8 = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES,
.pNext = nullptr,
.shaderFloat16 = is_float16_supported,
.shaderInt8 = is_int8_supported,
@@ -487,10 +492,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
}
- VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
+ VkPhysicalDeviceUniformBufferStandardLayoutFeatures std430_layout;
if (khr_uniform_buffer_standard_layout) {
std430_layout = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR,
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES,
.pNext = nullptr,
.uniformBufferStandardLayout = true,
};
@@ -608,10 +613,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
}
- VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64;
+ VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
if (ext_shader_atomic_int64) {
atomic_int64 = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR,
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES,
.pNext = nullptr,
.shaderBufferInt64Atomics = VK_TRUE,
.shaderSharedInt64Atomics = VK_TRUE,
@@ -896,28 +901,51 @@ std::string Device::GetDriverName() const {
}
}
+static std::vector<const char*> ExtensionsRequiredForInstanceVersion(u32 available_version) {
+ std::vector<const char*> extensions{REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()};
+
+ if (available_version < VK_API_VERSION_1_2) {
+ extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_2.begin(),
+ REQUIRED_EXTENSIONS_BEFORE_1_2.end());
+ }
+
+ if (available_version < VK_API_VERSION_1_3) {
+ extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_3.begin(),
+ REQUIRED_EXTENSIONS_BEFORE_1_3.end());
+ }
+
+ return extensions;
+}
+
void Device::CheckSuitability(bool requires_swapchain) const {
- std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
- bool has_swapchain = false;
- for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
- const std::string_view name{property.extensionName};
- for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
- if (available_extensions[i]) {
- continue;
- }
- available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
- }
- has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME;
+ std::vector<const char*> required_extensions =
+ ExtensionsRequiredForInstanceVersion(instance_version);
+ std::vector<const char*> available_extensions;
+
+ if (requires_swapchain) {
+ required_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
}
- for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
- if (available_extensions[i]) {
- continue;
+
+ auto extension_properties = physical.EnumerateDeviceExtensionProperties();
+
+ for (const VkExtensionProperties& property : extension_properties) {
+ available_extensions.push_back(property.extensionName);
+ }
+
+ bool has_all_required_extensions = true;
+ for (const char* requirement_name : required_extensions) {
+ const bool found =
+ std::ranges::any_of(available_extensions, [&](const char* extension_name) {
+ return std::strcmp(requirement_name, extension_name) == 0;
+ });
+
+ if (!found) {
+ LOG_ERROR(Render_Vulkan, "Missing required extension: {}", requirement_name);
+ has_all_required_extensions = false;
}
- LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
- throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
- if (requires_swapchain && !has_swapchain) {
- LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain");
+
+ if (!has_all_required_extensions) {
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
@@ -940,27 +968,46 @@ void Device::CheckSuitability(bool requires_swapchain) const {
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
}
}
- VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
- demote.sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{};
+ demote.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES;
demote.pNext = nullptr;
- VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{};
- variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR;
+ VkPhysicalDeviceVariablePointerFeatures variable_pointers{};
+ variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES;
variable_pointers.pNext = &demote;
VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
robustness2.pNext = &variable_pointers;
- VkPhysicalDeviceFeatures2KHR features2{};
+ VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{};
+ timeline_semaphore.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES;
+ timeline_semaphore.pNext = &robustness2;
+
+ VkPhysicalDevice16BitStorageFeatures bit16_storage{};
+ bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES;
+ bit16_storage.pNext = &timeline_semaphore;
+
+ VkPhysicalDevice8BitStorageFeatures bit8_storage{};
+ bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
+ bit8_storage.pNext = &bit16_storage;
+
+ VkPhysicalDeviceHostQueryResetFeatures host_query_reset{};
+ host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES;
+ host_query_reset.pNext = &bit8_storage;
+
+ VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{};
+ draw_parameters.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES;
+ draw_parameters.pNext = &host_query_reset;
+
+ VkPhysicalDeviceFeatures2 features2{};
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
- features2.pNext = &robustness2;
+ features2.pNext = &draw_parameters;
- physical.GetFeatures2KHR(features2);
+ physical.GetFeatures2(features2);
const VkPhysicalDeviceFeatures& features{features2.features};
- const std::array feature_report{
+ std::array feature_report{
std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
std::make_pair(features.imageCubeArray, "imageCubeArray"),
@@ -976,6 +1023,7 @@ void Device::CheckSuitability(bool requires_swapchain) const {
std::make_pair(features.tessellationShader, "tessellationShader"),
std::make_pair(features.sampleRateShading, "sampleRateShading"),
std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
+ std::make_pair(features.logicOp, "logicOp"),
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
@@ -983,27 +1031,38 @@ void Device::CheckSuitability(bool requires_swapchain) const {
"shaderStorageImageWriteWithoutFormat"),
std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
- std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
std::make_pair(variable_pointers.variablePointers, "variablePointers"),
std::make_pair(variable_pointers.variablePointersStorageBuffer,
"variablePointersStorageBuffer"),
std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
+ std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
+ std::make_pair(timeline_semaphore.timelineSemaphore, "timelineSemaphore"),
+ std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"),
+ std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess,
+ "uniformAndStorageBuffer16BitAccess"),
+ std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess,
+ "uniformAndStorageBuffer8BitAccess"),
+ std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"),
+ std::make_pair(draw_parameters.shaderDrawParameters, "shaderDrawParameters"),
};
+
+ bool has_all_required_features = true;
for (const auto& [is_supported, name] : feature_report) {
- if (is_supported) {
- continue;
+ if (!is_supported) {
+ LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
+ has_all_required_features = false;
}
- LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
+ }
+
+ if (!has_all_required_features) {
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
}
}
std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
- std::vector<const char*> extensions;
- extensions.reserve(8 + REQUIRED_EXTENSIONS.size());
- extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
+ std::vector<const char*> extensions = ExtensionsRequiredForInstanceVersion(instance_version);
if (requires_surface) {
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
}
@@ -1079,37 +1138,37 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false);
}
}
- VkPhysicalDeviceFeatures2KHR features{};
- features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
+ VkPhysicalDeviceFeatures2 features{};
+ features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
- VkPhysicalDeviceProperties2KHR physical_properties{};
- physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ VkPhysicalDeviceProperties2 physical_properties{};
+ physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
if (has_khr_shader_float16_int8) {
- VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features;
- float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
+ VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features;
+ float16_int8_features.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
float16_int8_features.pNext = nullptr;
features.pNext = &float16_int8_features;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
is_float16_supported = float16_int8_features.shaderFloat16;
is_int8_supported = float16_int8_features.shaderInt8;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (has_ext_subgroup_size_control) {
- VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
- subgroup_features.sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+ VkPhysicalDeviceSubgroupSizeControlFeatures subgroup_features;
+ subgroup_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES;
subgroup_features.pNext = nullptr;
features.pNext = &subgroup_features;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
- VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties;
+ VkPhysicalDeviceSubgroupSizeControlProperties subgroup_properties;
subgroup_properties.sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
subgroup_properties.pNext = nullptr;
physical_properties.pNext = &subgroup_properties;
- physical.GetProperties2KHR(physical_properties);
+ physical.GetProperties2(physical_properties);
is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize;
@@ -1128,7 +1187,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
provoking_vertex.pNext = nullptr;
features.pNext = &provoking_vertex;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (provoking_vertex.provokingVertexLast &&
provoking_vertex.transformFeedbackPreservesProvokingVertex) {
@@ -1142,7 +1201,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
vertex_input.pNext = nullptr;
features.pNext = &vertex_input;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (vertex_input.vertexInputDynamicState) {
extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
@@ -1154,7 +1213,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES;
atomic_int64.pNext = nullptr;
features.pNext = &atomic_int64;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
@@ -1166,13 +1225,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
tfb_features.pNext = nullptr;
features.pNext = &tfb_features;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
tfb_properties.pNext = nullptr;
physical_properties.pNext = &tfb_properties;
- physical.GetProperties2KHR(physical_properties);
+ physical.GetProperties2(physical_properties);
if (tfb_features.transformFeedback && tfb_features.geometryStreams &&
tfb_properties.maxTransformFeedbackStreams >= 4 &&
@@ -1187,7 +1246,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
border_features.pNext = nullptr;
features.pNext = &border_features;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) {
extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
@@ -1200,7 +1259,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
extended_dynamic_state.pNext = nullptr;
features.pNext = &extended_dynamic_state;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (extended_dynamic_state.extendedDynamicState) {
extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
@@ -1212,7 +1271,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT;
line_raster.pNext = nullptr;
features.pNext = &line_raster;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (line_raster.rectangularLines && line_raster.smoothLines) {
extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
ext_line_rasterization = true;
@@ -1224,7 +1283,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
layout.pNext = nullptr;
features.pNext = &layout;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (layout.workgroupMemoryExplicitLayout &&
layout.workgroupMemoryExplicitLayout8BitAccess &&
@@ -1240,7 +1299,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR;
executable_properties.pNext = nullptr;
features.pNext = &executable_properties;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
if (executable_properties.pipelineExecutableInfo) {
extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
@@ -1253,7 +1312,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT;
primitive_topology_list_restart.pNext = nullptr;
features.pNext = &primitive_topology_list_restart;
- physical.GetFeatures2KHR(features);
+ physical.GetFeatures2(features);
is_topology_list_restart_supported =
primitive_topology_list_restart.primitiveTopologyListRestart;
@@ -1271,7 +1330,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
push_descriptor.pNext = nullptr;
physical_properties.pNext = &push_descriptor;
- physical.GetProperties2KHR(physical_properties);
+ physical.GetProperties2(physical_properties);
max_push_descriptors = push_descriptor.maxPushDescriptors;
}
@@ -1322,18 +1381,18 @@ void Device::SetupFeatures() {
}
void Device::SetupProperties() {
- float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
+ float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
VkPhysicalDeviceProperties2KHR properties2{};
- properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
properties2.pNext = &float_controls;
- physical.GetProperties2KHR(properties2);
+ physical.GetProperties2(properties2);
}
void Device::CollectTelemetryParameters() {
- VkPhysicalDeviceDriverPropertiesKHR driver{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
+ VkPhysicalDeviceDriverProperties driver{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
.pNext = nullptr,
.driverID = {},
.driverName = {},
@@ -1341,12 +1400,12 @@ void Device::CollectTelemetryParameters() {
.conformanceVersion = {},
};
- VkPhysicalDeviceProperties2KHR device_properties{
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
+ VkPhysicalDeviceProperties2 device_properties{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &driver,
.properties = {},
};
- physical.GetProperties2KHR(device_properties);
+ physical.GetProperties2(device_properties);
driver_id = driver.driverID;
vendor_name = driver.driverName;
@@ -1402,23 +1461,10 @@ void Device::CollectToolingInfo() {
if (!ext_tooling_info) {
return;
}
- const auto vkGetPhysicalDeviceToolPropertiesEXT =
- reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
- dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
- if (!vkGetPhysicalDeviceToolPropertiesEXT) {
- return;
- }
- u32 tool_count = 0;
- if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
- return;
- }
- std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
- if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
- return;
- }
- for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
+ auto tools{physical.GetPhysicalDeviceToolProperties()};
+ for (const VkPhysicalDeviceToolProperties& tool : tools) {
const std::string_view name = tool.name;
- LOG_INFO(Render_Vulkan, "{}", name);
+ LOG_INFO(Render_Vulkan, "Attached debugging tool: {}", name);
has_renderdoc = has_renderdoc || name == "RenderDoc";
has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
}
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index d7cc6c593..db802437c 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -211,18 +211,13 @@ public:
return khr_uniform_buffer_standard_layout;
}
- /// Returns true if the device supports VK_KHR_spirv_1_4.
- bool IsKhrSpirv1_4Supported() const {
- return khr_spirv_1_4;
- }
-
/// Returns true if the device supports VK_KHR_push_descriptor.
bool IsKhrPushDescriptorSupported() const {
return khr_push_descriptor;
}
/// Returns true if VK_KHR_pipeline_executable_properties is enabled.
- bool IsKhrPipelineEexecutablePropertiesEnabled() const {
+ bool IsKhrPipelineExecutablePropertiesEnabled() const {
return khr_pipeline_executable_properties;
}
@@ -316,6 +311,17 @@ public:
return ext_shader_atomic_int64;
}
+ /// Returns the minimum supported version of SPIR-V.
+ u32 SupportedSpirvVersion() const {
+ if (instance_version >= VK_API_VERSION_1_3) {
+ return 0x00010600U;
+ }
+ if (khr_spirv_1_4) {
+ return 0x00010400U;
+ }
+ return 0x00010000U;
+ }
+
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index a082e3059..562039b56 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -9,18 +9,21 @@
#include "common/common_types.h"
#include "common/dynamic_library.h"
#include "common/logging/log.h"
+#include "common/polyfill_ranges.h"
#include "core/frontend/emu_window.h"
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
// Include these late to avoid polluting previous headers
-#ifdef _WIN32
+#if defined(_WIN32)
#include <windows.h>
// ensure include order
#include <vulkan/vulkan_win32.h>
-#endif
-
-#if !defined(_WIN32) && !defined(__APPLE__)
+#elif defined(__APPLE__)
+#include <vulkan/vulkan_macos.h>
+#elif defined(__ANDROID__)
+#include <vulkan/vulkan_android.h>
+#else
#include <X11/Xlib.h>
#include <vulkan/vulkan_wayland.h>
#include <vulkan/vulkan_xlib.h>
@@ -39,8 +42,15 @@ namespace {
case Core::Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
-#endif
-#if !defined(_WIN32) && !defined(__APPLE__)
+#elif defined(__APPLE__)
+ case Core::Frontend::WindowSystemType::Cocoa:
+ extensions.push_back(VK_MVK_MACOS_SURFACE_EXTENSION_NAME);
+ break;
+#elif defined(__ANDROID__)
+ case Core::Frontend::WindowSystemType::Android:
+ extensions.push_back(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME);
+ break;
+#else
case Core::Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
@@ -59,6 +69,10 @@ namespace {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
+
+#ifdef __APPLE__
+ extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
+#endif
return extensions;
}
@@ -140,7 +154,7 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
}
vk::Instance instance =
std::async([&] {
- return vk::Instance::Create(required_version, layers, extensions, dld);
+ return vk::Instance::Create(available_version, layers, extensions, dld);
}).get();
if (!vk::Load(*instance, dld)) {
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 6442898bd..1732866e0 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/polyfill_ranges.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp
index 69f9c494b..fa9bafa20 100644
--- a/src/video_core/vulkan_common/vulkan_surface.cpp
+++ b/src/video_core/vulkan_common/vulkan_surface.cpp
@@ -11,9 +11,11 @@
#include <windows.h>
// ensure include order
#include <vulkan/vulkan_win32.h>
-#endif
-
-#if !defined(_WIN32) && !defined(__APPLE__)
+#elif defined(__APPLE__)
+#include <vulkan/vulkan_macos.h>
+#elif defined(__ANDROID__)
+#include <vulkan/vulkan_android.h>
+#else
#include <X11/Xlib.h>
#include <vulkan/vulkan_wayland.h>
#include <vulkan/vulkan_xlib.h>
@@ -40,8 +42,33 @@ vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
}
-#endif
-#if !defined(_WIN32) && !defined(__APPLE__)
+#elif defined(__APPLE__)
+ if (window_info.type == Core::Frontend::WindowSystemType::Cocoa) {
+ const VkMacOSSurfaceCreateInfoMVK mvk_ci{VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK,
+ nullptr, 0, window_info.render_surface};
+ const auto vkCreateMacOSSurfaceMVK = reinterpret_cast<PFN_vkCreateMacOSSurfaceMVK>(
+ dld.vkGetInstanceProcAddr(*instance, "vkCreateMacOSSurfaceMVK"));
+ if (!vkCreateMacOSSurfaceMVK ||
+ vkCreateMacOSSurfaceMVK(*instance, &mvk_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
+ LOG_ERROR(Render_Vulkan, "Failed to initialize Metal surface");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ }
+#elif defined(__ANDROID__)
+ if (window_info.type == Core::Frontend::WindowSystemType::Android) {
+ const VkAndroidSurfaceCreateInfoKHR android_ci{
+ VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR, nullptr, 0,
+ reinterpret_cast<ANativeWindow*>(window_info.render_surface)};
+ const auto vkCreateAndroidSurfaceKHR = reinterpret_cast<PFN_vkCreateAndroidSurfaceKHR>(
+ dld.vkGetInstanceProcAddr(*instance, "vkCreateAndroidSurfaceKHR"));
+ if (!vkCreateAndroidSurfaceKHR ||
+ vkCreateAndroidSurfaceKHR(*instance, &android_ci, nullptr, &unsafe_surface) !=
+ VK_SUCCESS) {
+ LOG_ERROR(Render_Vulkan, "Failed to initialize Android surface");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ }
+#else
if (window_info.type == Core::Frontend::WindowSystemType::X11) {
const VkXlibSurfaceCreateInfoKHR xlib_ci{
VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
@@ -70,6 +97,7 @@ vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
}
}
#endif
+
if (!unsafe_surface) {
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2ad98dcfe..e4a07813f 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -130,7 +130,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCreateComputePipelines);
X(vkCreateDescriptorPool);
X(vkCreateDescriptorSetLayout);
- X(vkCreateDescriptorUpdateTemplateKHR);
+ X(vkCreateDescriptorUpdateTemplate);
X(vkCreateEvent);
X(vkCreateFence);
X(vkCreateFramebuffer);
@@ -149,7 +149,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkDestroyCommandPool);
X(vkDestroyDescriptorPool);
X(vkDestroyDescriptorSetLayout);
- X(vkDestroyDescriptorUpdateTemplateKHR);
+ X(vkDestroyDescriptorUpdateTemplate);
X(vkDestroyEvent);
X(vkDestroyFence);
X(vkDestroyFramebuffer);
@@ -180,18 +180,29 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkGetQueryPoolResults);
X(vkGetPipelineExecutablePropertiesKHR);
X(vkGetPipelineExecutableStatisticsKHR);
- X(vkGetSemaphoreCounterValueKHR);
+ X(vkGetSemaphoreCounterValue);
X(vkMapMemory);
X(vkQueueSubmit);
X(vkResetFences);
- X(vkResetQueryPoolEXT);
+ X(vkResetQueryPool);
X(vkSetDebugUtilsObjectNameEXT);
X(vkSetDebugUtilsObjectTagEXT);
X(vkUnmapMemory);
- X(vkUpdateDescriptorSetWithTemplateKHR);
+ X(vkUpdateDescriptorSetWithTemplate);
X(vkUpdateDescriptorSets);
X(vkWaitForFences);
- X(vkWaitSemaphoresKHR);
+ X(vkWaitSemaphores);
+
+ // Support for timeline semaphores is mandatory in Vulkan 1.2
+ if (!dld.vkGetSemaphoreCounterValue) {
+ Proc(dld.vkGetSemaphoreCounterValue, dld, "vkGetSemaphoreCounterValueKHR", device);
+ Proc(dld.vkWaitSemaphores, dld, "vkWaitSemaphoresKHR", device);
+ }
+
+ // Support for host query reset is mandatory in Vulkan 1.2
+ if (!dld.vkResetQueryPool) {
+ Proc(dld.vkResetQueryPool, dld, "vkResetQueryPoolEXT", device);
+ }
#undef X
}
@@ -224,12 +235,13 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
X(vkCreateDebugUtilsMessengerEXT);
X(vkDestroyDebugUtilsMessengerEXT);
X(vkDestroySurfaceKHR);
- X(vkGetPhysicalDeviceFeatures2KHR);
- X(vkGetPhysicalDeviceProperties2KHR);
+ X(vkGetPhysicalDeviceFeatures2);
+ X(vkGetPhysicalDeviceProperties2);
X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR);
X(vkGetPhysicalDeviceSurfaceFormatsKHR);
X(vkGetPhysicalDeviceSurfacePresentModesKHR);
X(vkGetPhysicalDeviceSurfaceSupportKHR);
+ X(vkGetPhysicalDeviceToolProperties);
X(vkGetSwapchainImagesKHR);
X(vkQueuePresentKHR);
@@ -359,9 +371,9 @@ void Destroy(VkDevice device, VkDescriptorSetLayout handle, const DeviceDispatch
dld.vkDestroyDescriptorSetLayout(device, handle, nullptr);
}
-void Destroy(VkDevice device, VkDescriptorUpdateTemplateKHR handle,
+void Destroy(VkDevice device, VkDescriptorUpdateTemplate handle,
const DeviceDispatch& dld) noexcept {
- dld.vkDestroyDescriptorUpdateTemplateKHR(device, handle, nullptr);
+ dld.vkDestroyDescriptorUpdateTemplate(device, handle, nullptr);
}
void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) noexcept {
@@ -737,11 +749,11 @@ CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const {
return CommandPool(object, handle, *dld);
}
-DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR(
- const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const {
- VkDescriptorUpdateTemplateKHR object;
- Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object));
- return DescriptorUpdateTemplateKHR(object, handle, *dld);
+DescriptorUpdateTemplate Device::CreateDescriptorUpdateTemplate(
+ const VkDescriptorUpdateTemplateCreateInfo& ci) const {
+ VkDescriptorUpdateTemplate object;
+ Check(dld->vkCreateDescriptorUpdateTemplate(handle, &ci, nullptr, &object));
+ return DescriptorUpdateTemplate(object, handle, *dld);
}
QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const {
@@ -857,20 +869,20 @@ VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept {
return properties;
}
-void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept {
- dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties);
+void PhysicalDevice::GetProperties2(VkPhysicalDeviceProperties2& properties) const noexcept {
+ dld->vkGetPhysicalDeviceProperties2(physical_device, &properties);
}
VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept {
- VkPhysicalDeviceFeatures2KHR features2;
- features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
+ VkPhysicalDeviceFeatures2 features2;
+ features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = nullptr;
- dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2);
+ dld->vkGetPhysicalDeviceFeatures2(physical_device, &features2);
return features2.features;
}
-void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept {
- dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features);
+void PhysicalDevice::GetFeatures2(VkPhysicalDeviceFeatures2& features) const noexcept {
+ dld->vkGetPhysicalDeviceFeatures2(physical_device, &features);
}
VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept {
@@ -895,6 +907,18 @@ std::vector<VkQueueFamilyProperties> PhysicalDevice::GetQueueFamilyProperties()
return properties;
}
+std::vector<VkPhysicalDeviceToolProperties> PhysicalDevice::GetPhysicalDeviceToolProperties()
+ const {
+ u32 num = 0;
+ if (!dld->vkGetPhysicalDeviceToolProperties) {
+ return {};
+ }
+ dld->vkGetPhysicalDeviceToolProperties(physical_device, &num, nullptr);
+ std::vector<VkPhysicalDeviceToolProperties> properties(num);
+ dld->vkGetPhysicalDeviceToolProperties(physical_device, &num, properties.data());
+ return properties;
+}
+
bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const {
VkBool32 supported;
Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface,
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 1b3f493bd..8395ff2cb 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -168,12 +168,13 @@ struct InstanceDispatch {
PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{};
PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices{};
PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr{};
- PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{};
+ PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2{};
PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{};
PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{};
PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2{};
PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{};
- PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{};
+ PFN_vkGetPhysicalDeviceProperties2 vkGetPhysicalDeviceProperties2{};
+ PFN_vkGetPhysicalDeviceToolProperties vkGetPhysicalDeviceToolProperties{};
PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{};
PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR{};
PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR{};
@@ -247,7 +248,7 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCreateComputePipelines vkCreateComputePipelines{};
PFN_vkCreateDescriptorPool vkCreateDescriptorPool{};
PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout{};
- PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR{};
+ PFN_vkCreateDescriptorUpdateTemplate vkCreateDescriptorUpdateTemplate{};
PFN_vkCreateEvent vkCreateEvent{};
PFN_vkCreateFence vkCreateFence{};
PFN_vkCreateFramebuffer vkCreateFramebuffer{};
@@ -266,7 +267,7 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkDestroyCommandPool vkDestroyCommandPool{};
PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool{};
PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout{};
- PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR{};
+ PFN_vkDestroyDescriptorUpdateTemplate vkDestroyDescriptorUpdateTemplate{};
PFN_vkDestroyEvent vkDestroyEvent{};
PFN_vkDestroyFence vkDestroyFence{};
PFN_vkDestroyFramebuffer vkDestroyFramebuffer{};
@@ -297,18 +298,18 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkGetPipelineExecutablePropertiesKHR vkGetPipelineExecutablePropertiesKHR{};
PFN_vkGetPipelineExecutableStatisticsKHR vkGetPipelineExecutableStatisticsKHR{};
PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
- PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
+ PFN_vkGetSemaphoreCounterValue vkGetSemaphoreCounterValue{};
PFN_vkMapMemory vkMapMemory{};
PFN_vkQueueSubmit vkQueueSubmit{};
PFN_vkResetFences vkResetFences{};
- PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT{};
+ PFN_vkResetQueryPool vkResetQueryPool{};
PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT{};
PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT{};
PFN_vkUnmapMemory vkUnmapMemory{};
- PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR{};
+ PFN_vkUpdateDescriptorSetWithTemplate vkUpdateDescriptorSetWithTemplate{};
PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets{};
PFN_vkWaitForFences vkWaitForFences{};
- PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR{};
+ PFN_vkWaitSemaphores vkWaitSemaphores{};
};
/// Loads instance agnostic function pointers.
@@ -327,7 +328,7 @@ void Destroy(VkDevice, VkBufferView, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkCommandPool, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
-void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkDescriptorUpdateTemplate, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
@@ -559,7 +560,7 @@ private:
using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
-using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>;
+using DescriptorUpdateTemplate = Handle<VkDescriptorUpdateTemplate, VkDevice, DeviceDispatch>;
using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
@@ -766,7 +767,7 @@ public:
[[nodiscard]] u64 GetCounter() const {
u64 value;
- Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
+ Check(dld->vkGetSemaphoreCounterValue(owner, handle, &value));
return value;
}
@@ -778,15 +779,15 @@ public:
* @return True on successful wait, false on timeout
*/
bool Wait(u64 value, u64 timeout = std::numeric_limits<u64>::max()) const {
- const VkSemaphoreWaitInfoKHR wait_info{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR,
+ const VkSemaphoreWaitInfo wait_info{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
.pNext = nullptr,
.flags = 0,
.semaphoreCount = 1,
.pSemaphores = &handle,
.pValues = &value,
};
- const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout);
+ const VkResult result = dld->vkWaitSemaphores(owner, &wait_info, timeout);
switch (result) {
case VK_SUCCESS:
return true;
@@ -840,8 +841,8 @@ public:
CommandPool CreateCommandPool(const VkCommandPoolCreateInfo& ci) const;
- DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR(
- const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const;
+ DescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
+ const VkDescriptorUpdateTemplateCreateInfo& ci) const;
QueryPool CreateQueryPool(const VkQueryPoolCreateInfo& ci) const;
@@ -869,9 +870,9 @@ public:
void UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
Span<VkCopyDescriptorSet> copies) const noexcept;
- void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplateKHR update_template,
+ void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplate update_template,
const void* data) const noexcept {
- dld->vkUpdateDescriptorSetWithTemplateKHR(handle, set, update_template, data);
+ dld->vkUpdateDescriptorSetWithTemplate(handle, set, update_template, data);
}
VkResult AcquireNextImageKHR(VkSwapchainKHR swapchain, u64 timeout, VkSemaphore semaphore,
@@ -884,8 +885,8 @@ public:
return dld->vkDeviceWaitIdle(handle);
}
- void ResetQueryPoolEXT(VkQueryPool query_pool, u32 first, u32 count) const noexcept {
- dld->vkResetQueryPoolEXT(handle, query_pool, first, count);
+ void ResetQueryPool(VkQueryPool query_pool, u32 first, u32 count) const noexcept {
+ dld->vkResetQueryPool(handle, query_pool, first, count);
}
VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
@@ -910,11 +911,11 @@ public:
VkPhysicalDeviceProperties GetProperties() const noexcept;
- void GetProperties2KHR(VkPhysicalDeviceProperties2KHR&) const noexcept;
+ void GetProperties2(VkPhysicalDeviceProperties2&) const noexcept;
VkPhysicalDeviceFeatures GetFeatures() const noexcept;
- void GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR&) const noexcept;
+ void GetFeatures2(VkPhysicalDeviceFeatures2&) const noexcept;
VkFormatProperties GetFormatProperties(VkFormat) const noexcept;
@@ -922,6 +923,8 @@ public:
std::vector<VkQueueFamilyProperties> GetQueueFamilyProperties() const;
+ std::vector<VkPhysicalDeviceToolProperties> GetPhysicalDeviceToolProperties() const;
+
bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
@@ -980,7 +983,7 @@ public:
dynamic_offsets.size(), dynamic_offsets.data());
}
- void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
+ void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplate update_template,
VkPipelineLayout layout, u32 set,
const void* data) const noexcept {
dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);